@ishlabs/cli 0.16.0 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask.js +2 -0
- package/dist/commands/profile.js +10 -2
- package/dist/commands/study-run.js +35 -16
- package/dist/connect.js +8 -6
- package/dist/index.js +1 -1
- package/dist/lib/command-helpers.d.ts +2 -0
- package/dist/lib/command-helpers.js +19 -3
- package/dist/lib/docs.js +25 -15
- package/dist/lib/skill-content.js +190 -484
- package/package.json +5 -2
package/dist/commands/ask.js
CHANGED
package/dist/commands/profile.js
CHANGED
|
@@ -30,7 +30,9 @@ Concept pages: ish docs get-page concepts/profile
|
|
|
30
30
|
.command("list")
|
|
31
31
|
.description("List profiles (defaults to simulatable AI profiles)")
|
|
32
32
|
.option("--workspace <id>", "Filter by workspace ID")
|
|
33
|
-
.option("--search <query>", "
|
|
33
|
+
.option("--search <query>", "Substring match against profile name")
|
|
34
|
+
.option("--bio <text>", "Substring match against profile bio")
|
|
35
|
+
.option("--occupation <text>", "Substring match against profile occupation (repeatable)", collect, [])
|
|
34
36
|
.option("--type <type>", "Profile type: ai, human, all (default: ai)", "ai")
|
|
35
37
|
.option("--gender <gender>", "Filter by gender (repeatable)", collect, [])
|
|
36
38
|
.option("--country <country>", "Filter by country code, e.g. US (repeatable)", collect, [])
|
|
@@ -42,10 +44,12 @@ Concept pages: ish docs get-page concepts/profile
|
|
|
42
44
|
Examples:
|
|
43
45
|
$ ish profile list
|
|
44
46
|
$ ish profile list --search "engineer" --country US
|
|
47
|
+
$ ish profile list --bio "voice-first user"
|
|
48
|
+
$ ish profile list --occupation founder --occupation designer
|
|
45
49
|
$ ish profile list --gender female --gender male --country US --country GB
|
|
46
50
|
$ ish profile list --type all --json
|
|
47
51
|
|
|
48
|
-
# Pagination
|
|
52
|
+
# Pagination: default --limit is 50, iterate with --offset.
|
|
49
53
|
$ ish profile list --limit 100
|
|
50
54
|
$ ish profile list --limit 100 --offset 100 # next page
|
|
51
55
|
# When more results exist, a stderr hint surfaces the next --offset / --limit.`)
|
|
@@ -58,6 +62,10 @@ Examples:
|
|
|
58
62
|
};
|
|
59
63
|
if (opts.search)
|
|
60
64
|
params.search = opts.search;
|
|
65
|
+
if (opts.bio)
|
|
66
|
+
params.bio = opts.bio;
|
|
67
|
+
if (opts.occupation.length > 0)
|
|
68
|
+
params.occupation = opts.occupation;
|
|
61
69
|
if (opts.type !== "all")
|
|
62
70
|
params.type = opts.type;
|
|
63
71
|
if (opts.gender.length > 0)
|
|
@@ -13,8 +13,14 @@ import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
|
|
|
13
13
|
import { output, formatSimulationPoll } from "../lib/output.js";
|
|
14
14
|
import { streamStudyEvents } from "../lib/study-events.js";
|
|
15
15
|
import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
// NOTE: local-sim modules are loaded via dynamic import at the `--local`
|
|
17
|
+
// branch below, NOT statically here. `local-sim/install.ts` deep-imports
|
|
18
|
+
// `playwright-core/lib/server/registry/index`, which is not exposed by
|
|
19
|
+
// playwright-core's `exports` map — Node refuses to resolve it during
|
|
20
|
+
// module load (ERR_PACKAGE_PATH_NOT_EXPORTED), so a static import here
|
|
21
|
+
// would crash *every* `ish` invocation on the npm-installed CLI, not
|
|
22
|
+
// just `study run --local`. The bun-compiled binary bundles the deep
|
|
23
|
+
// path so it doesn't hit Node's resolver; only the npm path is sensitive.
|
|
18
24
|
import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
|
|
19
25
|
function parseMaxInteractions(value) {
|
|
20
26
|
const n = parseInt(value, 10);
|
|
@@ -260,9 +266,10 @@ Note: --workspace and --study are optional if you have set active context
|
|
|
260
266
|
first with \`ish iteration create\`.
|
|
261
267
|
|
|
262
268
|
Audience: pass nothing to reuse the iteration's existing testers. Pass
|
|
263
|
-
--profile to use specific profiles, or filter flags (--
|
|
264
|
-
--min-age, --max-age, --search, --visibility)
|
|
265
|
-
to seed a fresh audience from the workspace
|
|
269
|
+
--profile to use specific profiles, or filter flags (--bio, --country,
|
|
270
|
+
--gender, --min-age, --max-age, --occupation, --search, --visibility)
|
|
271
|
+
with --sample <N> or --all to seed a fresh audience from the workspace
|
|
272
|
+
pool.
|
|
266
273
|
|
|
267
274
|
Examples:
|
|
268
275
|
# Run the latest iteration, reusing its testers:
|
|
@@ -453,15 +460,22 @@ Examples:
|
|
|
453
460
|
throw new Error(`Iteration "${iterationLabel}" has no testers and no audience flags were given. ` +
|
|
454
461
|
"Pass --profile <ids>, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility) with --sample <N> or --all.");
|
|
455
462
|
}
|
|
456
|
-
// Step 3: Resolve simulation config
|
|
457
|
-
//
|
|
458
|
-
//
|
|
459
|
-
//
|
|
460
|
-
//
|
|
463
|
+
// Step 3: Resolve simulation config. Always pre-flight every profile
|
|
464
|
+
// when no --config override is given: missing simulation_config_id
|
|
465
|
+
// is fatal across all modalities (media + chat batch dispatch use it
|
|
466
|
+
// per-item; interactive + pair dispatch fail server-side on the
|
|
467
|
+
// first sim start) and creating tester rows before discovering it
|
|
468
|
+
// leaves phantom DRAFT rows in the iteration. Pair mode reads
|
|
469
|
+
// pairConfig.audience_a; non-pair uses profileIds. profileConfigMap
|
|
470
|
+
// is consumed by the media branch; other branches just need the
|
|
471
|
+
// validation side effect.
|
|
461
472
|
const resolvedConfigOverride = opts.config ? resolveId(opts.config) : undefined;
|
|
462
473
|
const profileConfigMap = new Map();
|
|
463
|
-
if (
|
|
464
|
-
|
|
474
|
+
if (!resolvedConfigOverride) {
|
|
475
|
+
const idsToCheck = isPair && pairConfig
|
|
476
|
+
? [...new Set([...pairConfig.audience_a, ...pairConfig.audience_b])]
|
|
477
|
+
: profileIds;
|
|
478
|
+
for (const pid of idsToCheck) {
|
|
465
479
|
const profile = await client.get(`/tester-profiles/${pid}`);
|
|
466
480
|
if (profile.simulation_config_id) {
|
|
467
481
|
profileConfigMap.set(pid, profile.simulation_config_id);
|
|
@@ -604,6 +618,7 @@ Examples:
|
|
|
604
618
|
log("");
|
|
605
619
|
}
|
|
606
620
|
if (opts.local) {
|
|
621
|
+
const { ensureBrowser } = await import("../lib/local-sim/install.js");
|
|
607
622
|
await ensureBrowser({ quiet: globals.quiet, skipPrompt: globals.json });
|
|
608
623
|
}
|
|
609
624
|
// Step 5: Either reuse the iteration's testers or batch-create new ones
|
|
@@ -710,6 +725,7 @@ Examples:
|
|
|
710
725
|
for (const t of createdTesters) {
|
|
711
726
|
testerNameMap.set(t.id, t.tester_profile?.name ?? "Unknown");
|
|
712
727
|
}
|
|
728
|
+
const { runLocalSimulations } = await import("../lib/local-sim/loop.js");
|
|
713
729
|
await runLocalSimulations(client, {
|
|
714
730
|
workspaceId: resolvedWorkspace,
|
|
715
731
|
studyId: resolvedStudy,
|
|
@@ -790,18 +806,21 @@ Examples:
|
|
|
790
806
|
// Fall back to the first audience_a profile's
|
|
791
807
|
// simulation_config_id. Pair dispatch takes a single config
|
|
792
808
|
// for the whole batch, so we don't need the per-profile map
|
|
793
|
-
// the external_chatbot path builds.
|
|
809
|
+
// the external_chatbot path builds. Step 3 already populated
|
|
810
|
+
// profileConfigMap with every audience profile's config when
|
|
811
|
+
// --config was not passed, so reuse that.
|
|
794
812
|
const fallbackProfileId = pairConfig.audience_a[0];
|
|
795
813
|
if (!fallbackProfileId) {
|
|
796
814
|
throw new Error("Pair-mode dispatch requires --config <id>: the iteration has no audience profile to draw a default config_id from.");
|
|
797
815
|
}
|
|
798
|
-
|
|
799
|
-
if (!
|
|
816
|
+
pairConfigId = profileConfigMap.get(fallbackProfileId);
|
|
817
|
+
if (!pairConfigId) {
|
|
818
|
+
// Defensive: Step 3 should have either populated the map or
|
|
819
|
+
// thrown. If we land here something upstream changed.
|
|
800
820
|
throw new Error(`Pair-mode dispatch requires a config_id. Profile ${fallbackProfileId} has no simulation config assigned and --config was not passed.\n` +
|
|
801
821
|
"Use --config <id> to specify one, or assign a config to the profile.\n" +
|
|
802
822
|
"List configs with: ish config list");
|
|
803
823
|
}
|
|
804
|
-
pairConfigId = fallbackProfile.simulation_config_id;
|
|
805
824
|
}
|
|
806
825
|
const simResult = await dispatchAttempt(() => client.post("/simulation/chat/pair/start/batch", {
|
|
807
826
|
product_id: resolvedWorkspace,
|
package/dist/connect.js
CHANGED
|
@@ -284,12 +284,14 @@ async function resolveToken(tokenArg, apiUrl, tokenFileArg) {
|
|
|
284
284
|
// --- Branding ---
|
|
285
285
|
function printBanner() {
|
|
286
286
|
console.log(`
|
|
287
|
-
${c.orange}${c.bold}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
287
|
+
${c.orange}${c.bold} /██ /██
|
|
288
|
+
|__/ | ██
|
|
289
|
+
/██ /███████| ███████
|
|
290
|
+
| ██ /██_____/| ██__ ██
|
|
291
|
+
| ██| ██████ | ██ \\ ██
|
|
292
|
+
| ██ \\____ ██| ██ | ██
|
|
293
|
+
| ██ /███████/| ██ | ██
|
|
294
|
+
|__/|_______/ |__/ |__/${c.reset}
|
|
293
295
|
|
|
294
296
|
Connected
|
|
295
297
|
`);
|
package/dist/index.js
CHANGED
|
@@ -30,7 +30,7 @@ import pkg from "../package.json" with { type: "json" };
|
|
|
30
30
|
const { version } = pkg;
|
|
31
31
|
program
|
|
32
32
|
.name("ish")
|
|
33
|
-
.description("
|
|
33
|
+
.description("ish CLI — run studies and asks against AI tester audiences")
|
|
34
34
|
.version(version)
|
|
35
35
|
.addHelpText("after", AGENT_HELP_FOOTER);
|
|
36
36
|
// Unified error envelope for Commander-level failures (unknown command,
|
|
@@ -33,6 +33,10 @@ function describeFilters(flags) {
|
|
|
33
33
|
const parts = [];
|
|
34
34
|
if (flags.search)
|
|
35
35
|
parts.push(`--search "${flags.search}"`);
|
|
36
|
+
if (flags.bio)
|
|
37
|
+
parts.push(`--bio "${flags.bio}"`);
|
|
38
|
+
if (flags.occupation?.length)
|
|
39
|
+
parts.push(...flags.occupation.map((o) => `--occupation ${o}`));
|
|
36
40
|
if (flags.gender?.length)
|
|
37
41
|
parts.push(...flags.gender.map((g) => `--gender ${g}`));
|
|
38
42
|
if (flags.country?.length)
|
|
@@ -75,6 +79,10 @@ async function suggestCountries(client, workspace, flags, opts) {
|
|
|
75
79
|
if (keepOtherFilters) {
|
|
76
80
|
if (flags.search)
|
|
77
81
|
broader.search = flags.search;
|
|
82
|
+
if (flags.bio)
|
|
83
|
+
broader.bio = flags.bio;
|
|
84
|
+
if (flags.occupation && flags.occupation.length > 0)
|
|
85
|
+
broader.occupation = flags.occupation;
|
|
78
86
|
if (flags.gender && flags.gender.length > 0)
|
|
79
87
|
broader.gender = flags.gender;
|
|
80
88
|
if (flags.minAge)
|
|
@@ -118,6 +126,8 @@ async function suggestCountries(client, workspace, flags, opts) {
|
|
|
118
126
|
}
|
|
119
127
|
function hasFilterFlag(flags) {
|
|
120
128
|
return Boolean(flags.search
|
|
129
|
+
|| flags.bio
|
|
130
|
+
|| (flags.occupation && flags.occupation.length > 0)
|
|
121
131
|
|| (flags.gender && flags.gender.length > 0)
|
|
122
132
|
|| (flags.country && flags.country.length > 0)
|
|
123
133
|
|| flags.minAge
|
|
@@ -153,7 +163,7 @@ export async function resolveAudienceProfileIds(client, workspace, flags, opts =
|
|
|
153
163
|
const filtersUsed = hasFilterFlag(flags);
|
|
154
164
|
if (explicit.length > 0) {
|
|
155
165
|
if (sampleN !== undefined || flags.all || filtersUsed) {
|
|
156
|
-
throw new Error(`Use either explicit --profile flags or --sample/${allFlagName}/filter flags (--country, --gender, --min-age, --max-age, --search, --visibility), not both.`);
|
|
166
|
+
throw new Error(`Use either explicit --profile flags or --sample/${allFlagName}/filter flags (--bio, --country, --gender, --min-age, --max-age, --occupation, --search, --visibility), not both.`);
|
|
157
167
|
}
|
|
158
168
|
return explicit;
|
|
159
169
|
}
|
|
@@ -161,7 +171,7 @@ export async function resolveAudienceProfileIds(client, workspace, flags, opts =
|
|
|
161
171
|
throw new Error(`Use either --sample <N> or ${allFlagName}, not both. --sample picks a random subset; ${allFlagName} returns every match.`);
|
|
162
172
|
}
|
|
163
173
|
if (sampleN === undefined && !flags.all && !filtersUsed) {
|
|
164
|
-
throw new Error(`Pick an audience: pass --profile <id> (repeatable), --sample <N>, ${allFlagName}, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility).`);
|
|
174
|
+
throw new Error(`Pick an audience: pass --profile <id> (repeatable), --sample <N>, ${allFlagName}, or filter flags (--bio, --country, --gender, --min-age, --max-age, --occupation, --search, --visibility).`);
|
|
165
175
|
}
|
|
166
176
|
const params = {
|
|
167
177
|
product_id: workspace,
|
|
@@ -171,6 +181,10 @@ export async function resolveAudienceProfileIds(client, workspace, flags, opts =
|
|
|
171
181
|
};
|
|
172
182
|
if (flags.search)
|
|
173
183
|
params.search = flags.search;
|
|
184
|
+
if (flags.bio)
|
|
185
|
+
params.bio = flags.bio;
|
|
186
|
+
if (flags.occupation && flags.occupation.length > 0)
|
|
187
|
+
params.occupation = flags.occupation;
|
|
174
188
|
if (flags.gender && flags.gender.length > 0)
|
|
175
189
|
params.gender = flags.gender;
|
|
176
190
|
if (flags.country && flags.country.length > 0)
|
|
@@ -240,7 +254,9 @@ export function addAudienceFilterFlags(cmd, opts = {}) {
|
|
|
240
254
|
.option("--profile <ids>", "Tester profile IDs/aliases (comma-separated or repeatable)", collectIds, [])
|
|
241
255
|
.option("--sample <N>", "Randomly sample N profiles from the matching pool")
|
|
242
256
|
.option(allFlag, allDesc)
|
|
243
|
-
.option("--search <text>", "
|
|
257
|
+
.option("--search <text>", "Substring match against profile name")
|
|
258
|
+
.option("--bio <text>", "Substring match against profile bio")
|
|
259
|
+
.option("--occupation <text>", "Substring match against profile occupation (repeatable)", collectRepeatable, [])
|
|
244
260
|
.option("--gender <gender>", "Filter by gender (repeatable)", collectRepeatable, [])
|
|
245
261
|
.option("--country <code>", "Filter by 2-letter country code (repeatable)", collectRepeatable, [])
|
|
246
262
|
.option("--min-age <n>", "Minimum age (inclusive)")
|
package/dist/lib/docs.js
CHANGED
|
@@ -1267,7 +1267,9 @@ flags. Two ways to select:
|
|
|
1267
1267
|
- \`--gender female\` (repeatable)
|
|
1268
1268
|
- \`--min-age 25\`
|
|
1269
1269
|
- \`--max-age 50\`
|
|
1270
|
-
- \`--search "
|
|
1270
|
+
- \`--search "Anna"\` (substring match against profile name)
|
|
1271
|
+
- \`--bio "voice-first user"\` (substring match against profile bio)
|
|
1272
|
+
- \`--occupation founder\` (substring match against profile occupation; repeatable, OR semantics)
|
|
1271
1273
|
- \`--visibility workspace|shared|platform\` (filter by where the
|
|
1272
1274
|
profile lives: your workspace, the community-published pool, or
|
|
1273
1275
|
the admin-curated platform pool; old values \`private\` /
|
|
@@ -1302,20 +1304,22 @@ Two adjacent footguns surface most often on first-time audience
|
|
|
1302
1304
|
construction. Both are documented here because they cost a round-trip
|
|
1303
1305
|
to discover by experiment.
|
|
1304
1306
|
|
|
1305
|
-
###
|
|
1306
|
-
|
|
1307
|
-
\`audience_build\`
|
|
1308
|
-
|
|
1309
|
-
taxonomy match.
|
|
1310
|
-
retail store managers, bank branch managers
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
- **Whole-token alternation**:
|
|
1315
|
-
"software engineering manager"
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1307
|
+
### \`--occupation\` is a loose substring match
|
|
1308
|
+
|
|
1309
|
+
\`audience_build\` and the \`--occupation\` flag treat the value as a
|
|
1310
|
+
**loose, case-insensitive substring filter**, not a whole-token or
|
|
1311
|
+
taxonomy match. \`--occupation manager\` will match hotel managers,
|
|
1312
|
+
retail store managers, bank branch managers: anything containing the
|
|
1313
|
+
literal string "manager". Three patterns that recover the specificity
|
|
1314
|
+
you usually want:
|
|
1315
|
+
|
|
1316
|
+
- **Whole-token alternation**: \`--occupation "engineering manager"
|
|
1317
|
+
--occupation "software engineering manager" --occupation "vp
|
|
1318
|
+
engineering" --occupation "tech lead"\`: exhaustive enumeration of
|
|
1319
|
+
the role surface beats one short token. Multiple \`--occupation\`
|
|
1320
|
+
flags OR together server-side.
|
|
1321
|
+
- **Pair with other filters**: \`--occupation manager --min-age 28
|
|
1322
|
+
--country US --country SE\` narrows even a loose substring
|
|
1319
1323
|
meaningfully.
|
|
1320
1324
|
- **Preview before dispatch**: \`audience_build\` returns a
|
|
1321
1325
|
\`match_preview\` summary on the response — a 1-line histogram of
|
|
@@ -1358,6 +1362,12 @@ ish study run --profile tp-795,tp-af2
|
|
|
1358
1362
|
# Sample 3 Swedish profiles aged 35-50:
|
|
1359
1363
|
ish study run --country SE --min-age 35 --max-age 50 --sample 3
|
|
1360
1364
|
|
|
1365
|
+
# Every female founder or designer:
|
|
1366
|
+
ish study run --gender female --occupation founder --occupation designer --all
|
|
1367
|
+
|
|
1368
|
+
# Bio substring (e.g. accessibility cohort):
|
|
1369
|
+
ish study run --bio "screen reader" --all
|
|
1370
|
+
|
|
1361
1371
|
# Every female profile in the workspace:
|
|
1362
1372
|
ish study run --gender female --all
|
|
1363
1373
|
|
|
@@ -24,506 +24,205 @@ const VERSION = pkg.version;
|
|
|
24
24
|
* "ish". Hard cap is 1024 chars. Front-load the use case.
|
|
25
25
|
*/
|
|
26
26
|
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a tester profile, " +
|
|
27
|
-
"a simulation run, an \"ask\", an audience,
|
|
28
|
-
"or wants to rehearse a conversation
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
27
|
+
"a simulation run, an \"ask\", an audience, a chatbot probe, wants to " +
|
|
28
|
+
"dispatch tests against AI testers, or wants to rehearse a conversation " +
|
|
29
|
+
"between two AI personas (e.g. sales rep vs. skeptical buyer). Covers both " +
|
|
30
|
+
"the `ish` CLI (via Bash) and the hosted ish MCP server " +
|
|
31
|
+
"(`mcp__claude_ai_ish__*` on claude.ai) — same operations, pick whichever " +
|
|
32
|
+
"your environment has. Read this skill first to orient on the mental model, " +
|
|
33
|
+
"then trust `ish docs` (CLI) or the MCP tool descriptions for argument details.";
|
|
34
34
|
const SKILL_BODY = `# ish
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
reactions against AI tester audiences. The CLI is the agent surface;
|
|
38
|
-
this skill teaches you how to use it without re-reading its docs every
|
|
39
|
-
time.
|
|
36
|
+
ish runs user-research simulations: simulated people experience your draft (page, copy, ad, pitch, chatbot, video, document) and report what they noticed, where they stalled, what they would do next. Use before shipping, when you need a fast reaction round, or to rehearse a conversation between two AI personas.
|
|
40
37
|
|
|
41
|
-
## When to invoke
|
|
38
|
+
## When to invoke
|
|
42
39
|
|
|
43
|
-
The user mentioned
|
|
44
|
-
a tester source, a simulation run, an iteration, an "ask", an audience,
|
|
45
|
-
or wants to dispatch tests against AI testers. Also invoke if the user
|
|
46
|
-
asks to "run a study", "generate testers", "compare variants", "test a
|
|
47
|
-
prototype with users", or similar.
|
|
40
|
+
The user mentioned \`ish\`, a study, an "ask", a tester profile, an audience, a simulation, "rehearse", "compare variants", "test before shipping", "probe a chatbot".
|
|
48
41
|
|
|
49
|
-
##
|
|
42
|
+
## Drivers
|
|
50
43
|
|
|
51
|
-
|
|
44
|
+
ish has two surfaces; pick whichever your environment has:
|
|
52
45
|
|
|
53
|
-
|
|
54
|
-
ish docs overview
|
|
55
|
-
\`\`\`
|
|
46
|
+
- **MCP** — \`mcp__claude_ai_ish__*\` on claude.ai. Tool descriptions are authoritative for argument schemas.
|
|
47
|
+
- **CLI** — the \`ish\` binary. \`ish --help\` per command; \`ish docs overview\` / \`ish docs list\` / \`ish docs search\` / \`ish docs get-page <slug>\` for concept docs.
|
|
56
48
|
|
|
57
|
-
|
|
58
|
-
→ results) and lists every concept page available offline. The model is
|
|
59
|
-
non-obvious — *do not* skip this step the first time the user asks for
|
|
60
|
-
anything ish-related in a session.
|
|
49
|
+
Both wrap the same operations. If neither is present, tell the user: \`npm i -g @ishlabs/cli\`, or enable the ish connector on claude.ai. Don't try to drive ish without a driver.
|
|
61
50
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
ish docs get-page concepts/run-verbs # study run vs ask run
|
|
68
|
-
ish docs search "<keyword>" # ranked hits with snippets
|
|
69
|
-
\`\`\`
|
|
51
|
+
**When both are available, pick by op:**
|
|
52
|
+
- Streaming results to a watching user → **CLI** with \`--wait\` (per-tester output as testers complete).
|
|
53
|
+
- Structured one-shot reads or run dispatch → **MCP** (JSON in, JSON out, no shell).
|
|
54
|
+
- Idempotent setup (e.g. cold-start workspace) → **CLI** has \`--ensure\`; MCP doesn't.
|
|
55
|
+
- Local file uploads (images, video, docs) → **CLI** only — MCP doesn't accept binaries.
|
|
70
56
|
|
|
71
|
-
|
|
72
|
-
skill file. **Trust \`ish docs\` over anything in this skill if they
|
|
73
|
-
conflict.**
|
|
57
|
+
**Naming convention in this skill**: shapes below use MCP tool names (\`ask_run\`, \`study_create\`, \`chat_endpoint_init\`, …). The CLI equivalents are the same names kebab-cased under a noun group (\`ish ask run\`, \`ish study create\`, \`ish chat endpoint init\`, …). When in doubt: \`ish --help\` or \`ish <noun> --help\`.
|
|
74
58
|
|
|
75
|
-
##
|
|
59
|
+
## Mental model
|
|
76
60
|
|
|
77
61
|
\`\`\`
|
|
78
62
|
Workspace (= product)
|
|
79
|
-
├── Tester
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
│
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
│ ├── questionnaire questions the tester answers
|
|
87
|
-
│ └── Iterations (i-…) one configured run; carries the URL or media
|
|
88
|
-
│ └── Testers (t-…) instance of a profile in this iteration
|
|
89
|
-
└── Ask (a-…) lightweight reaction artifact
|
|
90
|
-
└── Rounds unit of execution; audience fixed at ask creation
|
|
63
|
+
├── Tester Profile (tp-…) reusable AI persona
|
|
64
|
+
├── Study (s-…) persistent artifact for testing a real surface
|
|
65
|
+
│ └── Iteration (i-…) one configured run; carries the URL or media
|
|
66
|
+
├── Ask (a-…) lightweight artifact for reactions to text/image variants
|
|
67
|
+
│ └── Round unit of execution; audience fixed at ask creation
|
|
68
|
+
└── Chat Endpoint workspace-level definition of an external chatbot
|
|
69
|
+
(referenced by study modality: chat, mode: external_chatbot)
|
|
91
70
|
\`\`\`
|
|
92
71
|
|
|
93
|
-
|
|
94
|
-
- \`ish study run\` — dispatches simulations on the latest iteration of a study.
|
|
95
|
-
- \`ish ask run\` — appends a round to an ask (or \`--new\` to create one).
|
|
96
|
-
|
|
97
|
-
Use **study** when the tester must *do* something on a real surface;
|
|
98
|
-
use **ask** for quick reactions to text/image variants.
|
|
99
|
-
|
|
100
|
-
**Cold-start caveat — "create a fresh workspace" is conditional on
|
|
101
|
-
quota headroom.** \`workspace_create\` returns
|
|
102
|
-
\`error_code: usage_limit_reached\` the instant the account is at
|
|
103
|
-
\`maxProducts\` (FREE caps at 1). Always inspect with \`workspace_get\`
|
|
104
|
-
first and check the \`has_headroom\` flag per row, or use
|
|
105
|
-
\`ish workspace create --name <name> --ensure\` — idempotent: returns
|
|
106
|
-
the existing workspace by name when one exists, otherwise creates. See
|
|
107
|
-
\`ish docs get-page guides/cold-start\` before producing a
|
|
108
|
-
workspace_create call on a session you haven't already probed.
|
|
109
|
-
|
|
110
|
-
## High-frequency commands
|
|
111
|
-
|
|
112
|
-
\`\`\`bash
|
|
113
|
-
# First command on a cold start — confirms login + active context:
|
|
114
|
-
ish status # or: ish whoami
|
|
115
|
-
# → user, active workspace/study/ask, token validity, API url
|
|
116
|
-
|
|
117
|
-
# Auth & active selection (saved to ~/.ish/config.json)
|
|
118
|
-
ish login
|
|
119
|
-
ish workspace use w-6ec
|
|
120
|
-
ish study use s-b2c
|
|
121
|
-
ish ask use a-6ec
|
|
122
|
-
|
|
123
|
-
# Idempotent workspace create — returns existing if name matches.
|
|
124
|
-
# Use this on cold-start instead of a blind workspace_create that may
|
|
125
|
-
# hit usage_limit_reached. See \`ish docs get-page guides/cold-start\`.
|
|
126
|
-
ish workspace create --name "Acme — onboarding" --ensure
|
|
127
|
-
|
|
128
|
-
# Inspect
|
|
129
|
-
ish workspace list
|
|
130
|
-
ish study list
|
|
131
|
-
ish iteration list --study s-b2c
|
|
132
|
-
ish ask list
|
|
133
|
-
|
|
134
|
-
# Define / configure (one-shot — iteration A inline)
|
|
135
|
-
ish study create --modality interactive --name "..." --url https://example.com \
|
|
136
|
-
--assignment "..." --question "..."
|
|
137
|
-
ish study create --modality image --name "..." \
|
|
138
|
-
--image-urls "https://cdn.example.com/a.png,https://cdn.example.com/b.png" \
|
|
139
|
-
--assignment "Compare:Which feels more premium?"
|
|
140
|
-
ish study create --modality video --name "..." \
|
|
141
|
-
--content-url https://cdn.example.com/ad.mp4 --assignment "Watch:..."
|
|
142
|
-
|
|
143
|
-
# Or 2-step (when you want to A/B iterations later, or upload local files)
|
|
144
|
-
ish study create --name "..." --modality interactive --assignment "..."
|
|
145
|
-
ish iteration create --url https://example.com # auto-uploads local files
|
|
146
|
-
|
|
147
|
-
ish profile generate --description "..." --count 5
|
|
148
|
-
|
|
149
|
-
# Chat modality (external_chatbot — talk to a customer chatbot).
|
|
150
|
-
# Audience size lives on study run; study create defines the persistent shape only.
|
|
151
|
-
ish chat endpoint init --from-curl ./bot.curl --name my-bot
|
|
152
|
-
ish chat endpoint test my-bot -m "Hello"
|
|
153
|
-
ish study create --modality chat --endpoint my-bot --assignment "Sign up:Try to sign up"
|
|
154
|
-
# (then) ish study run --sample 5 --wait
|
|
155
|
-
|
|
156
|
-
# Chat modality (tester_pair — rehearse a conversation between two AI personas).
|
|
157
|
-
# Audiences are pinned to the iteration; study run refuses run-time audience
|
|
158
|
-
# overrides. Each side accepts EITHER explicit profiles OR a role-criteria
|
|
159
|
-
# filter (or both — criteria validates the explicit list).
|
|
160
|
-
ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
|
|
161
|
-
--audience-a tp-sales-1,tp-sales-2 --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
|
|
162
|
-
--scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
|
|
163
|
-
--assignment "Pitch:Try to win the meeting"
|
|
164
|
-
# (then) ish study run -y
|
|
165
|
-
|
|
166
|
-
# Criteria-driven variant — backend resolves the eligible pool per side.
|
|
167
|
-
# Persona-first: the persona is sacred, criteria filter who plays the role.
|
|
168
|
-
ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
|
|
169
|
-
--role-criteria-a '{"occupation":["sales"],"min_age":28}' \\
|
|
170
|
-
--role-criteria-b '{"occupation":["cto","vp engineering"],"country":["US","SE"]}' \\
|
|
171
|
-
--scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
|
|
172
|
-
--assignment "Pitch:Try to land a pilot"
|
|
173
|
-
|
|
174
|
-
# Run
|
|
175
|
-
ish study run --sample 5 --country SE --wait
|
|
176
|
-
ish ask run --new --name "..." --prompt "..." --variant text:"A" --variant text:"B" --sample 30 --wants-pick --wait
|
|
177
|
-
|
|
178
|
-
# Stage an ask for human review, then dispatch (no credits charged on stage)
|
|
179
|
-
ish ask create --name "..." --prompt "..." --variant text:"A" --variant text:"B" \
|
|
180
|
-
--sample 30 --wants-pick --no-dispatch
|
|
181
|
-
ish ask dispatch a-6ec --wait
|
|
182
|
-
|
|
183
|
-
# Results
|
|
184
|
-
ish study results
|
|
185
|
-
ish ask results a-6ec --round 1
|
|
186
|
-
|
|
187
|
-
# AI summary + key insights (any modality with completed testers)
|
|
188
|
-
ish study analyze --wait # trigger + block
|
|
189
|
-
ish study insights # read latest
|
|
190
|
-
|
|
191
|
-
# Screenshots (interactive studies — see what testers actually saw)
|
|
192
|
-
ish study screenshots # list, frame-grouped
|
|
193
|
-
ish study screenshots download <study-id> --id <scid> --out shot.png
|
|
194
|
-
ish study screenshots download <study-id> --all --out ./shots/
|
|
195
|
-
|
|
196
|
-
# Chat configurations (model + system prompt + tools per chatbot endpoint)
|
|
197
|
-
ish chat config list # active endpoint
|
|
198
|
-
ish chat config set --name v1 --model claude-sonnet-4-6 \\
|
|
199
|
-
--system-prompt-file ./prompt.txt --default
|
|
200
|
-
ish chat config get cc-abc --view iterations # cross-study use
|
|
201
|
-
|
|
202
|
-
# Read offline docs
|
|
203
|
-
ish docs overview
|
|
204
|
-
ish docs get-page <slug>
|
|
205
|
-
ish docs search <query>
|
|
206
|
-
\`\`\`
|
|
72
|
+
**Audience is a query, not an entity.** Both \`ask_run\` and \`study_run\` take an \`audience\` argument shaped as \`{ profile_ids: [...] }\` (explicit) or \`{ sample: N, filters: {...} }\` (sampled from an existing pool). There is no \`audience\` resource to create — you build profiles via \`audience_build\` (or reuse existing ones via \`profile_list\`) and pass them in.
|
|
207
73
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
-
|
|
263
|
-
|
|
264
|
-
\`
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
the
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
-
|
|
287
|
-
\`
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
- **
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
- **
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
-
|
|
360
|
-
the parity of \`ask create\` and \`ask run\`. Without \`--wait\` the
|
|
361
|
-
command returns after dispatch (round still running).
|
|
362
|
-
- **\`study extend <tester>\` resumes a terminal tester.** Use it when
|
|
363
|
-
a run hit \`--max-interactions\` before finishing, or pair with
|
|
364
|
-
\`study cancel\` to redirect mid-run via \`--instruction\` (inline,
|
|
365
|
-
\`@path\`, or stdin via \`-\`). Spawns a **new** tester branched from
|
|
366
|
-
the source's last interaction — source row untouched. Credits debit
|
|
367
|
-
per \`max(1, round(additional_steps / 10))\`. See workflow #11 and
|
|
368
|
-
\`ish docs get-page concepts/extending-a-simulation\`.
|
|
369
|
-
- **\`pick_confidence\` (0..1) is on every \`--wants-pick\` response.**
|
|
370
|
-
The model's self-reported confidence in its variant choice. Use it
|
|
371
|
-
to break ties when nominal pick counts are close. See
|
|
372
|
-
\`ish docs get-page concepts/ask\`.
|
|
373
|
-
- Exit codes carry meaning: 0 success, 2 usage/validation,
|
|
374
|
-
3 auth, 4 not-found, 5 transient. See
|
|
375
|
-
\`ish docs get-page reference/json-mode\`.
|
|
376
|
-
- **Tier limits surface as \`error_code: "usage_limit_reached"\`**
|
|
377
|
-
(HTTP 403, exit 1, non-retryable). The error body includes
|
|
378
|
-
\`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\`. Do not
|
|
379
|
-
retry — branch on the code and surface the upgrade link. See
|
|
380
|
-
\`ish docs get-page reference/billing-limits\`.
|
|
381
|
-
- Aliases (\`s-…\`, \`a-…\`, \`tp-…\`, \`i-…\`, \`t-…\`, \`tps-…\`, \`w-…\`)
|
|
382
|
-
are accepted anywhere a UUID is. See
|
|
383
|
-
\`ish docs get-page reference/aliases\`.
|
|
384
|
-
|
|
385
|
-
## Credits & cost preview
|
|
386
|
-
|
|
387
|
-
Every dispatched run costs **credits**. The CLI surfaces an upper-bound
|
|
388
|
-
estimate *before* you dispatch so you can budget:
|
|
389
|
-
|
|
390
|
-
- **Human output** — \`study run\` shows a \`Scale:\` + \`Credits (est):\`
|
|
391
|
-
line in the confirmation block (skipped under \`--yes\` or \`--json\`).
|
|
392
|
-
- **JSON output** — \`study run --json\` includes a \`credit_estimate\`
|
|
393
|
-
field. For tester-pair chat it nests under \`pair_preview\`; for
|
|
394
|
-
solo/media runs it's top-level. Shape:
|
|
395
|
-
\`{ upper_bound: number, formula: "media_per_tester" | "chat_solo" |
|
|
396
|
-
"chat_pair" | "ask_per_response", breakdown: string, unit: "credits" }\`.
|
|
397
|
-
- **\`formula\` is stable** — agents can branch on it.
|
|
398
|
-
|
|
399
|
-
Today every modality uses \`max(1, round(N / 10))\` per principal
|
|
400
|
-
(per tester for media/interactive, per side per conversation for chat,
|
|
401
|
-
×2 for tester-pair). Asks bill flat **1 credit per successful response**.
|
|
402
|
-
Insights cost **10 credits flat** (first per-study is free).
|
|
403
|
-
|
|
404
|
-
If you exceed the available budget at dispatch time, the backend rejects
|
|
405
|
-
with HTTP 402 / \`error_code: "insufficient_credits"\`. The envelope
|
|
406
|
-
carries \`required\`, \`available\`, \`upgrade_url\`. Don't retry — surface
|
|
407
|
-
the upgrade link.
|
|
408
|
-
|
|
409
|
-
The full table (per-modality rates, tier allotments, error envelope)
|
|
410
|
-
lives in \`ish docs get-page reference/credits\`.
|
|
411
|
-
|
|
412
|
-
## Common pitfalls (don't do these)
|
|
413
|
-
|
|
414
|
-
1. **Don't paste flags from memory.** The CLI evolves; flags change.
|
|
415
|
-
Run \`ish <command> --help\` to confirm before constructing a command.
|
|
416
|
-
2. **Don't pipe \`--json\` through \`python\`/\`jq\` to reshape output** —
|
|
417
|
-
the CLI already has the affordances:
|
|
418
|
-
- Inspect a few specific entities? \`ish profile get tp-1b9 tp-fc1
|
|
419
|
-
tp-2fc\` (also works for \`study get\`, \`iteration get\`, \`ask
|
|
420
|
-
get\`). Returns a \`{items:[...], total:N}\` envelope.
|
|
421
|
-
- Want only certain fields? \`--fields alias,name,country,occupation\`.
|
|
422
|
-
- Need counts of a nested array? \`ask get\` / \`ask create --wait\`
|
|
423
|
-
already include \`testers_count\`, \`responses_total\`,
|
|
424
|
-
\`responses_complete\` (per-round and aggregate). Don't recount.
|
|
425
|
-
- Want machine-readable A/B verdicts? \`ask results --json\` already
|
|
426
|
-
ships \`aggregates: { picks, ratings, winner }\` per round.
|
|
427
|
-
3. **Don't run \`ish study run\` against an empty study.** \`ish study
|
|
428
|
-
create\` and \`ish study generate\` no longer auto-create iteration
|
|
429
|
-
A — the first explicit \`ish iteration create\` becomes A. Running
|
|
430
|
-
\`study run\` on a study with zero iterations exits 2; create one
|
|
431
|
-
first via \`ish iteration create --url …\` / \`--content-url …\` /
|
|
432
|
-
\`--content-text …\`. Or pass \`--content-text\` / \`--url\` directly
|
|
433
|
-
on \`study create\` for a one-shot study + iteration A.
|
|
434
|
-
4. **Don't pass \`--profile\` together with demographic filters** — they
|
|
435
|
-
are mutually exclusive. Either explicit IDs or
|
|
436
|
-
\`--country\`/\`--gender\`/\`--min-age\`/\`--max-age\` + \`--sample\`.
|
|
437
|
-
5. **Don't change audience between rounds of an ask.** It's fixed at
|
|
438
|
-
ask creation. Use \`ish ask add-testers\` to *extend* it; you can't
|
|
439
|
-
replace it.
|
|
440
|
-
6. **Don't try to put credentials in the URL** for gated study URLs.
|
|
441
|
-
Configure them once on the workspace via
|
|
442
|
-
\`ish workspace site-access …\` (basic-auth, cookie, login).
|
|
443
|
-
See \`ish docs get-page concepts/site-access\`.
|
|
444
|
-
7. **Don't commit \`~/.ish/config.json\`** — it stores tokens and active
|
|
445
|
-
workspace/study/ask selections. It lives in \`$HOME\`, not the repo.
|
|
446
|
-
8. **Don't pass run-time audience flags to a tester_pair chat iteration.**
|
|
447
|
-
Pair iterations carry their own audiences (\`audience_a\` /
|
|
448
|
-
\`audience_b\` inside \`details.mode_details\`); \`ish study run\`
|
|
449
|
-
refuses \`--profile\` / \`--sample\` / \`--all\` / demographic filters
|
|
450
|
-
on them. To change audiences, update the iteration via
|
|
451
|
-
\`ish iteration update <id> --details-json '{...}'\`. When both sides
|
|
452
|
-
ship explicit \`--audience-a\` / \`--audience-b\` lists, lengths must
|
|
453
|
-
match (1:1 by index) — or use \`--role-criteria-a/-b\` and let the
|
|
454
|
-
backend resolve a pool.
|
|
455
|
-
9. **Don't cram demographic constraints into \`scenario_a/_b\` text.**
|
|
456
|
-
Demographics (occupation, age, country, gender) belong in
|
|
457
|
-
\`--role-criteria-a/-b\` so the persona stays sacred — filtering
|
|
458
|
-
happens upstream of the prompt. Scenario text is for voice, goal,
|
|
459
|
-
and knowledge of the role, not for who plays it. Mixing the two
|
|
460
|
-
breaks the asymmetry contract and produces incoherent characters.
|
|
461
|
-
10. **Don't retry \`usage_limit_reached\` errors.** Tier caps
|
|
462
|
-
(\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
|
|
463
|
-
\`maxCustomTesterProfiles\`) are enforced server-side. The error body
|
|
464
|
-
carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` — show
|
|
465
|
-
the upgrade link or delete an existing resource to free headroom.
|
|
466
|
-
See \`ish docs get-page reference/billing-limits\` for the table.
|
|
467
|
-
11. **Don't retry \`insufficient_credits\` errors either.** HTTP 402,
|
|
468
|
-
non-retryable. Read the \`credit_estimate\` field on \`study run --json\`
|
|
469
|
-
*before* dispatching to know what you'll spend; if the error fires
|
|
470
|
-
after, surface \`required\` / \`available\` / \`upgrade_url\` to the
|
|
471
|
-
human. See \`ish docs get-page reference/credits\`.
|
|
472
|
-
12. **Don't dispatch interactive/media runs without thinking about
|
|
473
|
-
\`--max-interactions\`.** \`ish study run\` defaults to a 20-step
|
|
474
|
-
cap (flag > iteration's stored value > 20), which is the right
|
|
475
|
-
answer for most onboarding/landing-page probes. Raise it
|
|
476
|
-
(\`--max-interactions 50\`) when testers genuinely need to roam
|
|
477
|
-
further; lower it (\`--max-interactions 5\`) for a smoke probe
|
|
478
|
-
against a surface you suspect is broken — a stuck tester on a
|
|
479
|
-
non-responsive page will otherwise burn the full cap before the
|
|
480
|
-
SDK gives up. The confirmation block prints the resolved value
|
|
481
|
-
and where it came from. Credits debit per
|
|
482
|
-
\`max(1, round(steps/10))\` per tester; see
|
|
483
|
-
\`ish docs get-page reference/credits\`.
|
|
484
|
-
13. **Don't call \`workspace_create\` blind on a cold start.** On a
|
|
485
|
-
saturated account it returns \`error_code: usage_limit_reached\`
|
|
486
|
-
immediately — the dogfood account hits this on the first call.
|
|
487
|
-
Always call \`workspace_get\` (or \`ish workspace list --json\`)
|
|
488
|
-
first and inspect \`has_headroom\` per row; if any existing
|
|
489
|
-
workspace fits the work, use it via \`ish workspace use <id>\`.
|
|
490
|
-
To programmatically reuse-or-create idempotently, prefer
|
|
491
|
-
\`ish workspace create --name <name> --ensure\` — returns the existing
|
|
492
|
-
workspace owned by the caller when the name matches, otherwise
|
|
493
|
-
creates a fresh one. Same response shape either way, so the
|
|
494
|
-
agent doesn't branch on success vs. reuse. See
|
|
495
|
-
\`ish docs get-page guides/cold-start\`.
|
|
496
|
-
14. **Don't trust \`occupation\` filters as whole-token matches.**
|
|
497
|
-
\`audience_build\` treats \`occupation\` as a **loose,
|
|
498
|
-
case-insensitive substring** — \`occupation=["manager"]\` matches
|
|
499
|
-
hotel managers, retail managers, bank branch managers, not just
|
|
500
|
-
the engineering managers you probably wanted. Two recovery
|
|
501
|
-
paths: enumerate the role surface explicitly
|
|
502
|
-
(\`occupation=["engineering manager", "software engineering
|
|
503
|
-
manager", "vp engineering", "tech lead"]\`) or read
|
|
504
|
-
\`match_preview\` on the \`audience_build\` response and iterate
|
|
505
|
-
on the filter before \`ask_run\` / \`study_run\`. The public
|
|
506
|
-
profile pool skews non-tech / non-Western, so even a precise
|
|
507
|
-
filter may resolve to a small count — preview before dispatching
|
|
508
|
-
a run that depends on reaching N matches. See
|
|
509
|
-
\`ish docs get-page concepts/audience\`.
|
|
510
|
-
|
|
511
|
-
## Authentication
|
|
512
|
-
|
|
513
|
-
\`ish login\` opens a browser and saves tokens to \`~/.ish/config.json\`.
|
|
514
|
-
The CLI also accepts \`--token <token>\` or \`ISH_TOKEN\` env var. If a
|
|
515
|
-
command exits with code 3 ("auth"), tell the user to re-run \`ish login\`.
|
|
516
|
-
|
|
517
|
-
## When ish is the wrong tool
|
|
518
|
-
|
|
519
|
-
If the user wants to *write code* against the Ish API directly, point
|
|
520
|
-
them at the API docs at https://ishlabs.io — this CLI is for
|
|
521
|
-
orchestration, not as an API client library.
|
|
522
|
-
|
|
523
|
-
---
|
|
524
|
-
|
|
525
|
-
**Skill version:** ${VERSION}
|
|
526
|
-
**Skill source of truth:** \`ish docs\` (offline, ships with the binary)
|
|
74
|
+
Two run verbs:
|
|
75
|
+
- **study run** — simulate on a real surface (URL, media, document, chat endpoint).
|
|
76
|
+
- **ask run** — react to text or image variants.
|
|
77
|
+
|
|
78
|
+
Heuristic: **study** for "test this prototype/page/flow"; **ask** for "which copy/image lands better".
|
|
79
|
+
|
|
80
|
+
## Workflow shapes
|
|
81
|
+
|
|
82
|
+
Each shape names the verb, the *required precursors*, and the **load-bearing knobs** — the arguments that change output quality, not just behavior. Look up the full schema in the MCP tool description or \`ish <command> --help\` once you've picked the shape.
|
|
83
|
+
|
|
84
|
+
Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` → \`ish ask run\`) and pass equivalent flags (\`profile_ids: [...]\` → \`--profile-id tp-… --profile-id tp-…\`).
|
|
85
|
+
|
|
86
|
+
### Compare text or image variants → \`ask_run\`
|
|
87
|
+
|
|
88
|
+
- **Precursor**: an audience (see "Audience is a query" above). If you don't already have suitable tester profiles, build them first via \`audience_build\`; reuse via \`profile_list\` when possible.
|
|
89
|
+
- **Load-bearing knobs**:
|
|
90
|
+
- \`wants_pick: true\` — adds an aggregate winner verdict. Without it you get prose reactions but no clear answer.
|
|
91
|
+
- \`wants_ratings: true\` — adds per-variant numeric scores.
|
|
92
|
+
- \`wait: true\` — block until done. Without it you get a round id and have to poll.
|
|
93
|
+
- \`variants\` — array of \`{ label, content }\` for text, or \`{ label, image_url }\` for hosted images. Two or more variants required for \`wants_pick\` to be meaningful (with N=1 it degrades to a prose reaction round). **Local image files**: only the CLI accepts them. Use \`--variant LABEL:@./path.png\` per file (the \`@\` prefix triggers upload); MCP requires a hosted URL.
|
|
94
|
+
- \`ask_id\` (optional) — passing an existing \`a-…\` id re-runs against that ask. Omit (or pass \`--new\` on the CLI) to create a new ask in one shot.
|
|
95
|
+
- **Shape**:
|
|
96
|
+
\`\`\`
|
|
97
|
+
ask_run({
|
|
98
|
+
variants: [ { label: "A", content: "..." }, { label: "B", content: "..." } ],
|
|
99
|
+
audience: { profile_ids: ["tp-…", ...] }, // or { sample: 10 }
|
|
100
|
+
wants_pick: true,
|
|
101
|
+
wants_ratings: true,
|
|
102
|
+
wait: true,
|
|
103
|
+
})
|
|
104
|
+
\`\`\`
|
|
105
|
+
- **Output**: per-tester reasoning + (if \`wants_pick\`) aggregate winner with confidence.
|
|
106
|
+
|
|
107
|
+
### Test a live page or prototype → \`study_run\` (modality: interactive)
|
|
108
|
+
|
|
109
|
+
- **Precursor**: a study with a URL. Either inline at create-time (\`study_create({ modality: "interactive", url: "..." })\`) or as a separate iteration (\`iteration_create({ study_id, url })\`) when you want to A/B iterations later or upload local files. An **assignment** is required — what the tester is supposed to attempt.
|
|
110
|
+
- **Audience**: pass \`audience: { profile_ids: [...] }\` or \`{ sample: N }\` to \`study_run\`, same contract as \`ask_run\`. Audience is set on the *run*, not the study.
|
|
111
|
+
- **Load-bearing knobs**:
|
|
112
|
+
- \`assignment\` (on \`study_create\`) — what the tester is supposed to do. Format: \`"<label>:<instruction>"\`. The whole run hinges on this being clear.
|
|
113
|
+
- \`wait\` (MCP) / \`--wait\` (CLI) — streams per-tester results as they complete. CLI streams to stdout in real-time; MCP blocks until the whole run finishes. For a watching user, prefer the CLI here.
|
|
114
|
+
- \`count\` (on \`study_run\`) — how many testers.
|
|
115
|
+
- **Shape**:
|
|
116
|
+
\`\`\`
|
|
117
|
+
study_create({
|
|
118
|
+
modality: "interactive",
|
|
119
|
+
url: "https://staging.acme.io/welcome",
|
|
120
|
+
assignment: "Complete signup:Go through the 4-step wizard end-to-end",
|
|
121
|
+
})
|
|
122
|
+
study_run({ study_id: "s-…", audience: { profile_ids: [...] }, count: 15, wait: true })
|
|
123
|
+
\`\`\`
|
|
124
|
+
- **Output**: per-tester journey transcripts + aggregate friction / blocker / positive-moment counts.
|
|
125
|
+
|
|
126
|
+
### Probe a customer chatbot → \`study_run\` (modality: chat, mode: external_chatbot)
|
|
127
|
+
|
|
128
|
+
- **Precursors**:
|
|
129
|
+
1. A **chat endpoint** definition at the workspace level. \`chat_endpoint_init\` from a curl spec (handles auth headers, request/response shape; **upsert-by-name** — safe to re-call with the same \`name\` to rotate auth or change the request shape) → \`chat_endpoint_test\` to confirm it responds correctly before dispatching simulated testers.
|
|
130
|
+
2. A study with \`modality: "chat"\`, \`mode: "external_chatbot"\`, the endpoint reference, and an \`assignment\`.
|
|
131
|
+
- **Audience**: same \`{ profile_ids } | { sample }\` contract; pass to \`study_run\`. For custom personas (e.g. "frustrated vs polite"), \`audience_build\` first.
|
|
132
|
+
- **Load-bearing knobs**:
|
|
133
|
+
- \`assignment\` — what the tester tries to do (\`"Cancel:Try to cancel your subscription"\`).
|
|
134
|
+
- \`count\` on the run.
|
|
135
|
+
- **Shape**:
|
|
136
|
+
\`\`\`
|
|
137
|
+
chat_endpoint_init({ name: "support-bot", from_curl: "..." }) // or describe request shape directly
|
|
138
|
+
chat_endpoint_test({ endpoint: "support-bot", message: "hi" })
|
|
139
|
+
study_create({ modality: "chat", mode: "external_chatbot", endpoint: "support-bot",
|
|
140
|
+
assignment: "Cancel:Try to cancel your subscription" })
|
|
141
|
+
study_run({ study_id: "s-…", audience: { profile_ids: [...] }, count: 8, wait: true })
|
|
142
|
+
\`\`\`
|
|
143
|
+
- **Output**: full conversation transcripts per tester + aggregate success / blocker analysis.
|
|
144
|
+
|
|
145
|
+
### Test a media artifact (document, image, video, audio) → \`study_run\`
|
|
146
|
+
|
|
147
|
+
- **Precursors**:
|
|
148
|
+
1. A study with the chosen modality: \`study_create({ modality: "document" | "image" | "video" | "audio", assignment: "..." })\`.
|
|
149
|
+
2. An **iteration** carrying the media. For local files, **CLI only** — \`ish iteration create --study s-… --media @./deck.pdf\` (the \`@\` prefix triggers upload). For hosted URLs, either driver works: \`iteration_create({ study_id, content_url: "https://..." })\`.
|
|
150
|
+
- **Audience**: same \`{ profile_ids } | { sample }\` contract; pass to \`study_run\`. Reusable across runs (see "Lifecycle" below).
|
|
151
|
+
- **Load-bearing knobs**:
|
|
152
|
+
- \`assignment\` on \`study_create\` — for review-style media (decks, ad creative), frame as decision: \`"Take a first meeting:Review this Series A deck and decide whether you'd take a first meeting"\`. Page/timestamp-level attribution depends on the assignment asking for it explicitly.
|
|
153
|
+
- \`wait\` / \`--wait\` — same streaming story as interactive.
|
|
154
|
+
- \`count\` on \`study_run\`.
|
|
155
|
+
- **Iterating on the artifact** (v2 deck, v3 deck): create a **new iteration** on the same study (\`iteration_create\`), reuse the audience's \`profile_ids\`. See "Lifecycle".
|
|
156
|
+
- **Output**: per-tester reactions to the artifact + aggregate themes.
|
|
157
|
+
|
|
158
|
+
### Rehearse a conversation between two AI personas → \`study_run\` (modality: chat, mode: tester_pair)
|
|
159
|
+
|
|
160
|
+
**If the user might want the same persona across multiple turns, pin profiles up-front — you can't retro-pin after a run.** Without pinning, personas are re-synthesized from the assignment text each time, so "the same VC from earlier" becomes prose-only continuity.
|
|
161
|
+
|
|
162
|
+
- **Precursor**: a workspace and (optionally) one or two tester profiles for persona pinning. If you skip the profiles, ish synthesizes both personas from the \`assignment\` text per-run — fine for one-shot rehearsals, drifts between iterations.
|
|
163
|
+
- **Audience**: optional. For persona continuity across iterations, build profiles via \`audience_build\` (or reuse via \`profile_list\`) and pass \`audience: { profile_ids: [...] }\` to \`study_run\` — the same profiles play the same roles each time.
|
|
164
|
+
- **Load-bearing knobs**:
|
|
165
|
+
- \`assignment\` — encodes BOTH personas and what each is trying to do. More prose-heavy than other assignments; be specific. Example: \`"Founder pitches Series A to skeptical VC. Founder: defends AI customer-support startup, $2M ARR, 15% MoM. VC: thinks SaaS-for-SaaS is saturated, probes moat and unit economics."\`
|
|
166
|
+
- \`count\` — typically 1 per run; set higher to generate variations.
|
|
167
|
+
- **Iterating the scenario** (turn-by-turn refinement): create a **new iteration** with a revised assignment; reuse the same \`profile_ids\` if you pinned personas. See "Lifecycle".
|
|
168
|
+
- **Output**: a full transcript per rehearsal.
|
|
169
|
+
|
|
170
|
+
### Generate a fresh audience → \`audience_build\`
|
|
171
|
+
|
|
172
|
+
- **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
|
|
173
|
+
- **Output**: a list of \`profile_ids\` to pass into \`ask_run\` or \`study_run\`.
|
|
174
|
+
- **Cost**: slow (~30-120s) + credit-bearing. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
|
|
175
|
+
- **Growing an audience**: build only the delta — don't rebuild. Concat the new \`profile_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
|
|
176
|
+
- **Shapes**:
|
|
177
|
+
\`\`\`
|
|
178
|
+
// Simple — description only
|
|
179
|
+
audience_build({
|
|
180
|
+
description: "Parents of toddlers (ages 1-3), US, evening-routine focused",
|
|
181
|
+
count: 8,
|
|
182
|
+
})
|
|
183
|
+
// → { profile_ids: ["tp-…", ...] }
|
|
184
|
+
|
|
185
|
+
// Seeded from real transcripts (CLI only for local files)
|
|
186
|
+
// ish audience build --description "..." --count 10 \\
|
|
187
|
+
// --source @./interviews/customer-1.md \\
|
|
188
|
+
// --source @./interviews/customer-2.md
|
|
189
|
+
\`\`\`
|
|
190
|
+
|
|
191
|
+
## Lifecycle (what to re-use vs create anew)
|
|
192
|
+
|
|
193
|
+
The most common multi-turn question: "user wants to change X — re-use the existing thing or create a new one?"
|
|
194
|
+
|
|
195
|
+
| Change you want | What to do |
|
|
196
|
+
|---|---|
|
|
197
|
+
| Same ask, **same audience**, new variants | Pass \`ask_id\` (MCP) or \`--ask\` (CLI) on \`ask_run\` — re-uses the locked audience. |
|
|
198
|
+
| Same ask, **different audience** | New ask: omit \`ask_id\` (MCP) or pass \`--new\` (CLI). Audience is locked at ask creation. |
|
|
199
|
+
| Same study, **new media** (v2 deck, new image) | New **iteration** on the same study (\`iteration_create({ study_id, content_url \\| --media @path })\`). Iterations are immutable once they have results — never edit. |
|
|
200
|
+
| Same study, **new assignment** | **New study.** Assignment lives on the study; there's no in-place edit. Keep the old study's id for side-by-side comparison. *(Tester-pair exception: the assignment IS the content there — use a new **iteration** on the same study, not a new study.)* |
|
|
201
|
+
| Same audience across multiple runs / studies | Reuse the \`profile_ids\` array. Profiles are workspace-scoped resources (\`tp-…\`) — they live independently of any ask or study. |
|
|
202
|
+
| Chat endpoint definition needs to change (auth rotate, URL change) | \`chat_endpoint_init\` is **upsert-by-name** — re-init with the same \`name\` and a new \`from_curl\` spec. Re-run \`chat_endpoint_test\` to confirm. |
|
|
203
|
+
| Persona reuse in tester-pair | Pin via \`profile_ids\` on the first \`study_run\`; pass the same ids on subsequent runs. Without pinning, personas are re-synthesized from the assignment per run. |
|
|
204
|
+
|
|
205
|
+
When in doubt: side-by-side comparison usually beats in-place edits. Ids are cheap; result history isn't.
|
|
206
|
+
|
|
207
|
+
## Pitfalls
|
|
208
|
+
|
|
209
|
+
- **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
|
|
210
|
+
- **Ask audience vs variants** — see Lifecycle table for the re-use vs new-ask decision.
|
|
211
|
+
- **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
|
|
212
|
+
- **Credit costs**: \`ask_run\`, \`study_run\`, and \`audience_build\` consume credits. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` testers + 2 variants is usually within budget.
|
|
213
|
+
- **\`audience_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`profile_ids\` count, don't trust the requested \`count\` blindly.
|
|
214
|
+
- **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
|
|
215
|
+
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching testers.
|
|
216
|
+
- **Chatbot auth drift**: tokens/sessions baked into \`--from-curl\` expire. If transcripts come back as identical short error strings, re-run \`chat_endpoint_test\` and refresh the curl spec.
|
|
217
|
+
- **401 surfaces as fake blocker**: an unauthenticated endpoint produces "tester got stuck on auth screen" — looks like a UX blocker but is config. Always confirm endpoint auth before reading transcripts as user-research data.
|
|
218
|
+
- **No per-page/per-timestamp scoping for media**: there's no "evaluate just slide 14" or "react to seconds 0-30" API. State the focus explicitly in the \`assignment\` text, or pre-stitch the artifact (e.g. replace one slide locally, upload as a new iteration).
|
|
219
|
+
|
|
220
|
+
## When in doubt
|
|
221
|
+
|
|
222
|
+
\`ish docs\` (deep concept references, CLI-side) and live MCP tool descriptions (argument schemas, MCP-side) are closer to source-of-truth than this skill. **Trust them over this skill if they conflict.**
|
|
223
|
+
|
|
224
|
+
- **CLI present**: \`ish docs overview\`, \`ish docs get-page concepts/run-verbs\`, \`ish docs get-page guides/cold-start\`, \`ish docs search <keyword>\`.
|
|
225
|
+
- **MCP only**: read the tool description of the MCP tool you're about to call; cross-reference against this skill's "Shape" blocks. The MCP server's own \`instructions\` block (delivered automatically with the tool list) covers vocabulary and posture and is authoritative.
|
|
527
226
|
`;
|
|
528
227
|
const WORKFLOWS_MD = `# ish workflows — worked examples
|
|
529
228
|
|
|
@@ -694,6 +393,14 @@ ish study run --country SE --min-age 35 --max-age 50 --sample 5 --wait
|
|
|
694
393
|
|
|
695
394
|
# Second run — every female profile in the workspace, same iteration:
|
|
696
395
|
ish study run --gender female --all --wait
|
|
396
|
+
|
|
397
|
+
# Free-text filters: --search matches the profile **name**, --bio
|
|
398
|
+
# matches the profile **bio**, --occupation matches the profile
|
|
399
|
+
# **occupation** (repeatable, OR-joined). All are case-insensitive
|
|
400
|
+
# substrings — the same flag set works on \`ish profile list\`,
|
|
401
|
+
# \`ish ask run\`, \`ish ask add-testers\`, and \`ish ask create\`.
|
|
402
|
+
ish study run --bio "screen reader" --all --wait
|
|
403
|
+
ish study run --occupation founder --occupation designer --sample 6 --wait
|
|
697
404
|
\`\`\`
|
|
698
405
|
|
|
699
406
|
If you don't pass any audience flags, \`ish study run\` reuses the
|
|
@@ -1349,7 +1056,6 @@ function buildSkillMd() {
|
|
|
1349
1056
|
"metadata:",
|
|
1350
1057
|
" author: ish",
|
|
1351
1058
|
` version: ${JSON.stringify(VERSION)}`,
|
|
1352
|
-
"allowed-tools: Bash(ish:*)",
|
|
1353
1059
|
"---",
|
|
1354
1060
|
"",
|
|
1355
1061
|
].join("\n");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ishlabs/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.2",
|
|
4
4
|
"description": "The command-line interface for ish",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -10,8 +10,11 @@
|
|
|
10
10
|
"build": "tsc",
|
|
11
11
|
"patch:playwright": "node scripts/patch-playwright-core.mjs",
|
|
12
12
|
"build:binary": "npm run patch:playwright && bun build --compile --external chromium-bidi --external electron src/index.ts --outfile ish",
|
|
13
|
+
"build:skills-repo": "npm run build && node scripts/generate-skills-repo.mjs",
|
|
14
|
+
"verify:skills-parity": "npm run build && node scripts/verify-skills-parity.mjs",
|
|
13
15
|
"dev": "tsc --watch",
|
|
14
|
-
"
|
|
16
|
+
"test": "npm run build && node --test --test-concurrency=1 tests/*.test.mjs",
|
|
17
|
+
"prepublishOnly": "npm test"
|
|
15
18
|
},
|
|
16
19
|
"engines": {
|
|
17
20
|
"node": ">=18.0.0"
|