@ishlabs/cli 0.14.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/commands/profile.js +237 -1
- package/dist/commands/study-run.js +269 -32
- package/dist/commands/workspace.js +3 -2
- package/dist/lib/api-client.d.ts +7 -0
- package/dist/lib/api-client.js +9 -0
- package/dist/lib/command-helpers.js +1 -1
- package/dist/lib/docs.js +320 -1
- package/dist/lib/enums.d.ts +8 -0
- package/dist/lib/enums.js +12 -0
- package/dist/lib/skill-content.js +112 -7
- package/dist/lib/study-events.d.ts +46 -0
- package/dist/lib/study-events.js +126 -0
- package/dist/lib/types.d.ts +56 -1
- package/package.json +1 -1
package/dist/lib/docs.js
CHANGED
|
@@ -1213,6 +1213,10 @@ The legacy \`--tech-savviness\` flag was removed in
|
|
|
1213
1213
|
|
|
1214
1214
|
- \`concepts/source\` — the inputs to \`profile generate\`.
|
|
1215
1215
|
- \`concepts/audience\` — how profiles get selected into a run.
|
|
1216
|
+
- \`guides/build-specific-tester\` — iterative probe loop
|
|
1217
|
+
(\`profile suggest-scenarios\` + \`profile evidence add\`/\`list\`)
|
|
1218
|
+
for crafting one specific persona, distinct from the
|
|
1219
|
+
audience-generation flow.
|
|
1216
1220
|
- \`reference/billing-limits\` — \`maxCustomTesterProfiles\` cap on profile creation.
|
|
1217
1221
|
`;
|
|
1218
1222
|
const CONCEPT_SOURCE = `# concept: source
|
|
@@ -1264,7 +1268,12 @@ flags. Two ways to select:
|
|
|
1264
1268
|
- \`--min-age 25\`
|
|
1265
1269
|
- \`--max-age 50\`
|
|
1266
1270
|
- \`--search "early adopter"\`
|
|
1267
|
-
- \`--visibility shared|
|
|
1271
|
+
- \`--visibility workspace|shared|platform\` (filter by where the
|
|
1272
|
+
profile lives: your workspace, the community-published pool, or
|
|
1273
|
+
the admin-curated platform pool; old values \`private\` /
|
|
1274
|
+
\`public\` still accepted as aliases for \`workspace\` /
|
|
1275
|
+
\`platform\` until the next release with a server-side
|
|
1276
|
+
deprecation warning)
|
|
1268
1277
|
|
|
1269
1278
|
The two modes are **mutually exclusive** — pass either \`--profile\` or
|
|
1270
1279
|
the filter set, not both.
|
|
@@ -1526,18 +1535,200 @@ ish study run --study s-b2c -y --json | jq -r '.tester_aliases[]' # → t-072,
|
|
|
1526
1535
|
ish study poll <tester_id> # one-shot status for one tester
|
|
1527
1536
|
ish study wait <tester_id> --timeout 600 # block until that tester finishes
|
|
1528
1537
|
ish study cancel <tester_id> # cancel a running simulation
|
|
1538
|
+
ish study extend <tester_id> --add-steps 10 # resume a terminal tester with N more steps
|
|
1529
1539
|
\`\`\`
|
|
1530
1540
|
|
|
1531
1541
|
\`<tester_id>\` accepts a tester alias (\`t-…\`) or a full UUID. The
|
|
1532
1542
|
study-level \`poll\`/\`wait\` forms also exist (\`--study <id>\` /
|
|
1533
1543
|
\`--iteration <id>\`) for whole-batch progress.
|
|
1534
1544
|
|
|
1545
|
+
\`cancel\` and \`extend\` form a reversible stop/start pair. \`cancel\`
|
|
1546
|
+
walks a running tester to a terminal \`cancelled\` status (no row
|
|
1547
|
+
removed); \`extend\` then spawns a fresh tester branched from the
|
|
1548
|
+
cancelled tester's last interaction. See
|
|
1549
|
+
\`concepts/extending-a-simulation\` for the full mental model.
|
|
1550
|
+
|
|
1535
1551
|
## Related
|
|
1536
1552
|
|
|
1537
1553
|
- \`reference/json-mode\` — output modes (display vs capture vs chain).
|
|
1538
1554
|
Use \`--get tester_aliases\` to capture the run's testers without
|
|
1539
1555
|
piping through \`jq\`. \`--human\` forces table output even through
|
|
1540
1556
|
\`tee\`/redirection.
|
|
1557
|
+
- \`concepts/extending-a-simulation\` — \`study extend\` flow, when to
|
|
1558
|
+
use it, and the mid-run \`--instruction\` UX.
|
|
1559
|
+
`;
|
|
1560
|
+
const CONCEPT_EXTENDING_SIMULATION = `# concept: extending a simulation
|
|
1561
|
+
|
|
1562
|
+
\`ish study extend <tester_id>\` resumes a **terminal** tester with
|
|
1563
|
+
more interactions — and optionally a mid-run instruction. The source
|
|
1564
|
+
tester is left untouched; a **new** tester row is spawned under the
|
|
1565
|
+
same iteration, branched from the source's last interaction. Use it
|
|
1566
|
+
when a run hits the \`--max-interactions\` cap before the tester
|
|
1567
|
+
finished, or when you want to probe a "what if I had told them X
|
|
1568
|
+
mid-run?" scenario without restarting from scratch.
|
|
1569
|
+
|
|
1570
|
+
## When extend is the right verb
|
|
1571
|
+
|
|
1572
|
+
- Run hit the step cap (\`--max-interactions\`) before the tester
|
|
1573
|
+
completed the assignment — give it 10 more steps to push through.
|
|
1574
|
+
- Tester veered off into a dead-end — cancel it, then extend with an
|
|
1575
|
+
instruction redirecting it ("Stop browsing the blog. Open the pricing
|
|
1576
|
+
page and try to add a seat.").
|
|
1577
|
+
- You want to test how a tester reacts to a mid-run change you didn't
|
|
1578
|
+
capture in the original assignment — without re-running the whole
|
|
1579
|
+
cohort.
|
|
1580
|
+
|
|
1581
|
+
When extend is **not** the right verb:
|
|
1582
|
+
|
|
1583
|
+
- Source tester is still RUNNING. \`cancel\` it first, then extend.
|
|
1584
|
+
Extend refuses non-terminal sources server-side.
|
|
1585
|
+
- You want a fresh cohort with new audience flags. Use \`study run\`
|
|
1586
|
+
with \`--profile\` / \`--sample\` / \`--all\` instead — extend is a
|
|
1587
|
+
per-tester resume, not a batch op.
|
|
1588
|
+
- You want to change the iteration's URL or content. Edit the iteration
|
|
1589
|
+
itself (\`iteration update\` or a fresh iteration) — extend always
|
|
1590
|
+
inherits the source's iteration config.
|
|
1591
|
+
|
|
1592
|
+
## Mental model — cancel + extend are a reversible pair
|
|
1593
|
+
|
|
1594
|
+
\`cancel\` and \`extend\` are siblings in the tester lifecycle:
|
|
1595
|
+
|
|
1596
|
+
\`\`\`
|
|
1597
|
+
RUNNING ──(cancel)──▶ CANCELLED ──(extend)──▶ new RUNNING tester
|
|
1598
|
+
(branched from the
|
|
1599
|
+
cancelled tester's
|
|
1600
|
+
last interaction)
|
|
1601
|
+
|
|
1602
|
+
COMPLETED / FAILED ──(extend)──▶ new RUNNING tester
|
|
1603
|
+
\`\`\`
|
|
1604
|
+
|
|
1605
|
+
\`cancel\` is non-destructive — the tester row, every interaction, every
|
|
1606
|
+
screenshot, and the questionnaire answers all survive. \`extend\` then
|
|
1607
|
+
forks from the last interaction to keep the new tester's history
|
|
1608
|
+
seamlessly continuous.
|
|
1609
|
+
|
|
1610
|
+
## Flags
|
|
1611
|
+
|
|
1612
|
+
\`\`\`
|
|
1613
|
+
ish study extend <tester_id>
|
|
1614
|
+
[--add-steps <n>] # extra steps, 1-50, default 10
|
|
1615
|
+
[--instruction <text|@path|->] # optional mid-run user message
|
|
1616
|
+
[--wait] # block until terminal
|
|
1617
|
+
[--timeout <s>] # wait timeout (default 300)
|
|
1618
|
+
[--dispatch-timeout <s>] # POST timeout (default 120)
|
|
1619
|
+
\`\`\`
|
|
1620
|
+
|
|
1621
|
+
\`--instruction\` accepts three input shapes, matching the rest of the
|
|
1622
|
+
CLI:
|
|
1623
|
+
|
|
1624
|
+
\`\`\`bash
|
|
1625
|
+
# Inline:
|
|
1626
|
+
ish study extend t-072 --instruction "Switch to the German pricing page."
|
|
1627
|
+
|
|
1628
|
+
# From a file (long-form prompts, version-controlled):
|
|
1629
|
+
ish study extend t-072 --instruction @/tmp/redirect.md
|
|
1630
|
+
|
|
1631
|
+
# From stdin (pipe-friendly):
|
|
1632
|
+
echo "Try the search bar instead." | ish study extend t-072 --instruction -
|
|
1633
|
+
\`\`\`
|
|
1634
|
+
|
|
1635
|
+
The instruction is sent to the backend as \`user_message\`. The new
|
|
1636
|
+
tester treats it as **overriding direction** for the rest of the run —
|
|
1637
|
+
the backend surfaces it in a dedicated \`<user_added_instructions>\`
|
|
1638
|
+
block on every prompt, not just the first turn, so the LLM doesn't
|
|
1639
|
+
forget about it as the run goes on.
|
|
1640
|
+
|
|
1641
|
+
## JSON output (lean, write-path)
|
|
1642
|
+
|
|
1643
|
+
Default (no \`--wait\`):
|
|
1644
|
+
|
|
1645
|
+
\`\`\`json
|
|
1646
|
+
{
|
|
1647
|
+
"tester_id": "<new-uuid>",
|
|
1648
|
+
"tester_alias": "t-xyz",
|
|
1649
|
+
"source_tester_id": "<source-uuid>",
|
|
1650
|
+
"source_alias": "t-abc",
|
|
1651
|
+
"study_id": "<study-uuid>",
|
|
1652
|
+
"job_id": "<job-uuid>",
|
|
1653
|
+
"additional_steps": 10,
|
|
1654
|
+
"instruction": "Switch to the German pricing page.",
|
|
1655
|
+
"message": "Simulation queued"
|
|
1656
|
+
}
|
|
1657
|
+
\`\`\`
|
|
1658
|
+
|
|
1659
|
+
With \`--wait\`, a \`result\` field is appended once the new tester
|
|
1660
|
+
reaches a terminal status:
|
|
1661
|
+
|
|
1662
|
+
\`\`\`json
|
|
1663
|
+
{
|
|
1664
|
+
...,
|
|
1665
|
+
"result": {
|
|
1666
|
+
"status": "completed",
|
|
1667
|
+
"interaction_count": 14,
|
|
1668
|
+
"tester_name": "Anna, 34, Munich"
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
\`\`\`
|
|
1672
|
+
|
|
1673
|
+
UUID fields (\`tester_id\`, \`source_tester_id\`, \`study_id\`, \`job_id\`)
|
|
1674
|
+
are preserved in lean output because the new \`tester_id\` is the
|
|
1675
|
+
load-bearing return value — same exception \`study run\` makes.
|
|
1676
|
+
|
|
1677
|
+
## Errors
|
|
1678
|
+
|
|
1679
|
+
| Backend | CLI behavior | Exit |
|
|
1680
|
+
|---|---|---|
|
|
1681
|
+
| Source not terminal (RUNNING / QUEUED) | \`Tester is still running — cancel it first or wait for completion.\` | 2 |
|
|
1682
|
+
| Source tester not found | \`Tester not found: <id>\` | 4 |
|
|
1683
|
+
| \`additional_steps\` out of range | Client-side parser rejects before the network call | 2 |
|
|
1684
|
+
| Insufficient credits | Bubbles the server message; retry only after topping up | 5 |
|
|
1685
|
+
| Wait timed out (\`--wait\` only) | \`WaitTimeoutError\` envelope with current status under \`progress.rows[0]\` — the run keeps going server-side; resume with \`study wait <new-tester>\` | 5 |
|
|
1686
|
+
|
|
1687
|
+
## Cost model
|
|
1688
|
+
|
|
1689
|
+
\`extend\` charges credits for **only \`additional_steps\`**, not for
|
|
1690
|
+
the source's original \`max_interactions\` cap. The formula is the same
|
|
1691
|
+
as \`study run\` for interactive runs: \`max(1, round(N / 10))\` per
|
|
1692
|
+
tester. So \`--add-steps 10\` costs **1 credit**; \`--add-steps 50\`
|
|
1693
|
+
costs **5 credits**. See \`reference/credits\` for the full table.
|
|
1694
|
+
|
|
1695
|
+
## Worked example — push past the step cap
|
|
1696
|
+
|
|
1697
|
+
\`\`\`bash
|
|
1698
|
+
# 1. Run a study with a small step cap to feel the limit:
|
|
1699
|
+
ish study run --sample 1 --max-interactions 5 --wait
|
|
1700
|
+
# → tester t-072 (status: completed_with_errors, hit cap on step 5)
|
|
1701
|
+
|
|
1702
|
+
# 2. Inspect what happened:
|
|
1703
|
+
ish study tester t-072 --summary
|
|
1704
|
+
|
|
1705
|
+
# 3. Give it 15 more steps:
|
|
1706
|
+
ish study extend t-072 --add-steps 15 --wait --timeout 600
|
|
1707
|
+
# → new tester t-9af, status: completed, 18 interactions total
|
|
1708
|
+
|
|
1709
|
+
# 4. Read the new tester's transcript:
|
|
1710
|
+
ish study tester t-9af --summary
|
|
1711
|
+
\`\`\`
|
|
1712
|
+
|
|
1713
|
+
## Worked example — redirect mid-run
|
|
1714
|
+
|
|
1715
|
+
\`\`\`bash
|
|
1716
|
+
# Tester wandered into the wrong flow. Cancel, then redirect:
|
|
1717
|
+
ish study cancel t-072
|
|
1718
|
+
ish study extend t-072 \\
|
|
1719
|
+
--instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
|
|
1720
|
+
--add-steps 10 --wait
|
|
1721
|
+
\`\`\`
|
|
1722
|
+
|
|
1723
|
+
## Related
|
|
1724
|
+
|
|
1725
|
+
- \`concepts/run-verbs\` — the top-level decision rule (\`study run\` vs
|
|
1726
|
+
\`ask run\`); extend is a lifecycle verb downstream of either.
|
|
1727
|
+
- \`reference/credits\` — per-modality cost formulas. \`extend\` follows
|
|
1728
|
+
the interactive formula scaled to \`additional_steps\`.
|
|
1729
|
+
- \`reference/aliases\` — the \`t-…\` prefix and how aliases resolve.
|
|
1730
|
+
- \`reference/json-mode\` — capture-mode (\`--get tester_alias\`) for
|
|
1731
|
+
chaining the new tester into the next call.
|
|
1541
1732
|
`;
|
|
1542
1733
|
const REFERENCE_ALIASES = `# reference: aliases
|
|
1543
1734
|
|
|
@@ -3236,6 +3427,122 @@ without a second round-trip.
|
|
|
3236
3427
|
- \`reference/json-mode\` — error envelope shape and exit code mapping
|
|
3237
3428
|
(\`usage_limit_reached\` is HTTP 403, exit 1, non-retryable).
|
|
3238
3429
|
`;
|
|
3430
|
+
const GUIDE_BUILD_SPECIFIC_TESTER = `# guide: build a specific simulated tester from notes
|
|
3431
|
+
|
|
3432
|
+
\`profile generate\` is the right tool for *audiences* (many profiles
|
|
3433
|
+
from a description or interview sources). When you want **one specific
|
|
3434
|
+
tester** — modelling a real prospect, rebuilding a persona from a
|
|
3435
|
+
single interview, or simulating a named stakeholder for a pitch
|
|
3436
|
+
rehearsal — use the iterative probe loop:
|
|
3437
|
+
|
|
3438
|
+
1. \`ish profile suggest-scenarios\` — describe what you already
|
|
3439
|
+
know; the LLM returns 1–10 scenario probes designed to expose what
|
|
3440
|
+
you don't.
|
|
3441
|
+
2. Answer the probes locally (in chat, with the user, or from
|
|
3442
|
+
transcripts).
|
|
3443
|
+
3. \`ish profile create --file ...\` — save the profile shell.
|
|
3444
|
+
4. \`ish profile evidence add <id>\` — persist the answered probes
|
|
3445
|
+
as structured evidence on the profile so they survive into runtime
|
|
3446
|
+
persona injection.
|
|
3447
|
+
5. \`ish profile evidence list <id>\` — read back what's saved,
|
|
3448
|
+
newest first. Useful for verifying a session or branching on prior
|
|
3449
|
+
state before the next probe round.
|
|
3450
|
+
|
|
3451
|
+
## Probe types
|
|
3452
|
+
|
|
3453
|
+
\`suggest-scenarios\` returns four discriminated shapes. Each is meant
|
|
3454
|
+
to surface a different facet of the persona:
|
|
3455
|
+
|
|
3456
|
+
- \`situation\` — \`{situation, options[2..4]}\`: "you're in scenario
|
|
3457
|
+
X; which option fits?" Multiple-choice, lets the persona pick
|
|
3458
|
+
behavior.
|
|
3459
|
+
- \`voice\` — \`{situation, options[2..4]}\`: same shape as situation
|
|
3460
|
+
but framed around tone/phrasing the tester would actually use.
|
|
3461
|
+
- \`binary\` — \`{description, option_a, option_b}\`: forced choice
|
|
3462
|
+
between two competing values or trade-offs.
|
|
3463
|
+
- \`micro-story\` — \`{prompt}\`: open-ended; the persona narrates a
|
|
3464
|
+
short story. Answer with a multi-sentence free-text reply.
|
|
3465
|
+
|
|
3466
|
+
The wire format keeps \`option_a\` / \`option_b\` (snake_case). The
|
|
3467
|
+
CLI passes them through verbatim — don't transform to camelCase.
|
|
3468
|
+
|
|
3469
|
+
**Identity rule** — when building \`traces.json\` after answering a
|
|
3470
|
+
probe, copy the scenario's \`type\` straight into the trace's
|
|
3471
|
+
\`source\`. Same enum, two field names. The mechanical mapping:
|
|
3472
|
+
|
|
3473
|
+
| Suggested scenario field | Trace field |
|
|
3474
|
+
|--------------------------|------------------|
|
|
3475
|
+
| \`scenario.type\` | \`trace.source\` |
|
|
3476
|
+
| \`scenario.situation\` / \`scenario.description\` / \`scenario.prompt\` | \`trace.scenario_prompt\` (one line, whatever question label the user actually answered) |
|
|
3477
|
+
| (user's answer) | \`trace.text\` |
|
|
3478
|
+
|
|
3479
|
+
## Worked example
|
|
3480
|
+
|
|
3481
|
+
\`\`\`
|
|
3482
|
+
# 1. Suggest 5 probes from a context blob
|
|
3483
|
+
ish profile suggest-scenarios \\
|
|
3484
|
+
--context "Staff platform engineer at a Stripe-using fintech. \\
|
|
3485
|
+
Owns on-call for the payments edge. Burned by a Black Friday \\
|
|
3486
|
+
outage last year." \\
|
|
3487
|
+
--count 5
|
|
3488
|
+
# → {scenarios: [{type: "situation", ...}, {type: "binary", ...}, ...]}
|
|
3489
|
+
|
|
3490
|
+
# 2. (offline) answer the probes — build a local answers.json:
|
|
3491
|
+
# [
|
|
3492
|
+
# {"text": "Page the staff engineer first, then start the runbook.",
|
|
3493
|
+
# "source": "situation",
|
|
3494
|
+
# "scenario_prompt": "PagerDuty fires at 02:00 on payments edge."},
|
|
3495
|
+
# {"text": "Option A — cut the rollout, take the revenue hit.",
|
|
3496
|
+
# "source": "binary",
|
|
3497
|
+
# "scenario_prompt": "Ship the migration or hold for incident review?"}
|
|
3498
|
+
# ]
|
|
3499
|
+
|
|
3500
|
+
# 3. Create the profile shell
|
|
3501
|
+
ish profile create --file ./persona.json
|
|
3502
|
+
# → tp-d4e
|
|
3503
|
+
|
|
3504
|
+
# 4. Persist the answered probes as evidence
|
|
3505
|
+
ish profile evidence add tp-d4e --traces-file ./answers.json
|
|
3506
|
+
# → {items: [{id, text, source, scenario_prompt, created_at}, ...], total: N}
|
|
3507
|
+
|
|
3508
|
+
# 5. Read back what got saved (also useful before the next probe round)
|
|
3509
|
+
ish profile evidence list tp-d4e
|
|
3510
|
+
ish profile evidence list tp-d4e --get source # one source per line
|
|
3511
|
+
\`\`\`
|
|
3512
|
+
|
|
3513
|
+
## Iterating the probe loop
|
|
3514
|
+
|
|
3515
|
+
To go deeper on a follow-up pass, feed the prior round back in so the
|
|
3516
|
+
LLM doesn't paraphrase what you already asked:
|
|
3517
|
+
|
|
3518
|
+
\`\`\`
|
|
3519
|
+
ish profile suggest-scenarios \\
|
|
3520
|
+
--context-file ./notes.md \\
|
|
3521
|
+
--count 3 \\
|
|
3522
|
+
--already-surfaced '["PagerDuty fires at 02:00 on payments edge."]' \\
|
|
3523
|
+
--previous-answers @./answers.json
|
|
3524
|
+
\`\`\`
|
|
3525
|
+
|
|
3526
|
+
\`--previous-answers\` is the array of \`{type, prompt, answer}\` rows
|
|
3527
|
+
already collected. \`--already-surfaced\` is the array of prompt
|
|
3528
|
+
labels already shown — the LLM uses these to avoid re-asking. Both
|
|
3529
|
+
cap at 40 entries.
|
|
3530
|
+
|
|
3531
|
+
## When to reach for which command
|
|
3532
|
+
|
|
3533
|
+
| Need | Command |
|
|
3534
|
+
|---|---|
|
|
3535
|
+
| Many profiles from a description or interview | \`ish profile generate\` |
|
|
3536
|
+
| One specific persona, iterative probe loop | \`ish profile suggest-scenarios\` + \`evidence add\`/\`list\` |
|
|
3537
|
+
| Exact profile from a JSON spec, no LLM | \`ish profile create --file\` |
|
|
3538
|
+
|
|
3539
|
+
## Related
|
|
3540
|
+
|
|
3541
|
+
- \`concepts/profile\` — what a tester profile is; structured fields.
|
|
3542
|
+
- \`concepts/source\` — interview transcripts / audio / PDF inputs
|
|
3543
|
+
for the audience-generation flow.
|
|
3544
|
+
- \`reference/aliases\` — \`tp-…\` is the profile alias prefix.
|
|
3545
|
+
`;
|
|
3239
3546
|
const PAGES = [
|
|
3240
3547
|
{
|
|
3241
3548
|
slug: "overview",
|
|
@@ -3321,6 +3628,12 @@ const PAGES = [
|
|
|
3321
3628
|
description: "Side-by-side; decision rule for choosing one over the other.",
|
|
3322
3629
|
body: CONCEPT_RUN_VERBS,
|
|
3323
3630
|
},
|
|
3631
|
+
{
|
|
3632
|
+
slug: "concepts/extending-a-simulation",
|
|
3633
|
+
title: "concept: extending a simulation (study extend)",
|
|
3634
|
+
description: "Resume a terminal tester with more steps and an optional mid-run instruction. Cancel + extend as a reversible stop/start pair.",
|
|
3635
|
+
body: CONCEPT_EXTENDING_SIMULATION,
|
|
3636
|
+
},
|
|
3324
3637
|
{
|
|
3325
3638
|
slug: "concepts/active-context",
|
|
3326
3639
|
title: "concept: active context",
|
|
@@ -3375,6 +3688,12 @@ const PAGES = [
|
|
|
3375
3688
|
description: "What to do when workspace_create returns usage_limit_reached on a saturated account. Inspect workspace_get (has_headroom / child_counts / last_activity_at), pick a reuse target, or call ish workspace create --ensure name.",
|
|
3376
3689
|
body: GUIDE_COLD_START,
|
|
3377
3690
|
},
|
|
3691
|
+
{
|
|
3692
|
+
slug: "guides/build-specific-tester",
|
|
3693
|
+
title: "guide: build a specific simulated tester from notes",
|
|
3694
|
+
description: "Iterative probe loop for one specific persona: profile suggest-scenarios returns LLM probes; answer them locally; profile evidence add persists answers; profile evidence list reads them back.",
|
|
3695
|
+
body: GUIDE_BUILD_SPECIFIC_TESTER,
|
|
3696
|
+
},
|
|
3378
3697
|
];
|
|
3379
3698
|
const PAGES_BY_SLUG = new Map(PAGES.map((p) => [p.slug, p]));
|
|
3380
3699
|
export function listPages() {
|
package/dist/lib/enums.d.ts
CHANGED
|
@@ -44,6 +44,14 @@ export declare const LOCALE_TYPES: readonly ["urban", "suburban", "small_town",
|
|
|
44
44
|
export type LocaleType = typeof LOCALE_TYPES[number];
|
|
45
45
|
export declare const INCOME_LEVELS: readonly ["lower", "lower_middle", "middle", "upper_middle", "upper", "prefer_not_to_say"];
|
|
46
46
|
export type IncomeLevel = typeof INCOME_LEVELS[number];
|
|
47
|
+
/**
|
|
48
|
+
* Source kinds for a persisted scenario answer (EvidenceTrace.source). Matches
|
|
49
|
+
* the backend `EvidenceSource` literal union — one value is hyphenated
|
|
50
|
+
* (`micro-story`) so the wire format is mixed; `assertEnumValue` is strict
|
|
51
|
+
* about this and does not fold hyphens to underscores.
|
|
52
|
+
*/
|
|
53
|
+
export declare const EVIDENCE_SOURCES: readonly ["situation", "voice", "binary", "micro-story"];
|
|
54
|
+
export type EvidenceSourceEnum = typeof EVIDENCE_SOURCES[number];
|
|
47
55
|
export declare const EMPLOYMENT_STATUSES: readonly ["employed_full_time", "employed_part_time", "self_employed", "unemployed_seeking", "student", "homemaker", "retired", "unable_to_work", "other"];
|
|
48
56
|
export type EmploymentStatus = typeof EMPLOYMENT_STATUSES[number];
|
|
49
57
|
/**
|
package/dist/lib/enums.js
CHANGED
|
@@ -76,6 +76,18 @@ export const INCOME_LEVELS = [
|
|
|
76
76
|
"upper",
|
|
77
77
|
"prefer_not_to_say",
|
|
78
78
|
];
|
|
79
|
+
/**
|
|
80
|
+
* Source kinds for a persisted scenario answer (EvidenceTrace.source). Matches
|
|
81
|
+
* the backend `EvidenceSource` literal union — one value is hyphenated
|
|
82
|
+
* (`micro-story`) so the wire format is mixed; `assertEnumValue` is strict
|
|
83
|
+
* about this and does not fold hyphens to underscores.
|
|
84
|
+
*/
|
|
85
|
+
export const EVIDENCE_SOURCES = [
|
|
86
|
+
"situation",
|
|
87
|
+
"voice",
|
|
88
|
+
"binary",
|
|
89
|
+
"micro-story",
|
|
90
|
+
];
|
|
79
91
|
export const EMPLOYMENT_STATUSES = [
|
|
80
92
|
"employed_full_time",
|
|
81
93
|
"employed_part_time",
|
|
@@ -213,6 +213,7 @@ See \`references/workflows.md\` in this skill for end-to-end transcripts:
|
|
|
213
213
|
- Generating profiles from a transcript or audio source
|
|
214
214
|
- Targeting a gated URL (basic auth, session cookie, login form)
|
|
215
215
|
- Re-running a study with a fresh audience
|
|
216
|
+
- Extending a tester past its step cap (or redirecting mid-run with \`study extend\`)
|
|
216
217
|
|
|
217
218
|
## Display vs. capture: the right output mode
|
|
218
219
|
|
|
@@ -358,6 +359,13 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
|
|
|
358
359
|
- **\`ask add-questions\` supports \`--wait\` / \`--timeout\`.** Match
|
|
359
360
|
the parity of \`ask create\` and \`ask run\`. Without \`--wait\` the
|
|
360
361
|
command returns after dispatch (round still running).
|
|
362
|
+
- **\`study extend <tester>\` resumes a terminal tester.** Use it when
|
|
363
|
+
a run hit \`--max-interactions\` before finishing, or pair with
|
|
364
|
+
\`study cancel\` to redirect mid-run via \`--instruction\` (inline,
|
|
365
|
+
\`@path\`, or stdin via \`-\`). Spawns a **new** tester branched from
|
|
366
|
+
the source's last interaction — source row untouched. Credits debit
|
|
367
|
+
per \`max(1, round(additional_steps / 10))\`. See workflow #11 and
|
|
368
|
+
\`ish docs get-page concepts/extending-a-simulation\`.
|
|
361
369
|
- **\`pick_confidence\` (0..1) is on every \`--wants-pick\` response.**
|
|
362
370
|
The model's self-reported confidence in its variant choice. Use it
|
|
363
371
|
to break ties when nominal pick counts are close. See
|
|
@@ -607,7 +615,50 @@ ish profile generate --source tps-3a4 --propose-count
|
|
|
607
615
|
ish profile generate --source tps-3a4 --count 4
|
|
608
616
|
\`\`\`
|
|
609
617
|
|
|
610
|
-
## 4.
|
|
618
|
+
## 4. Build a specific simulated tester from notes
|
|
619
|
+
|
|
620
|
+
Goal: rebuild one named persona (a real prospect, a stakeholder for
|
|
621
|
+
a pitch rehearsal) via the iterative probe loop — distinct from
|
|
622
|
+
\`profile generate\`, which is for audiences.
|
|
623
|
+
|
|
624
|
+
\`\`\`bash
|
|
625
|
+
# 1. Suggest 5 probes from a context blob
|
|
626
|
+
ish profile suggest-scenarios \\
|
|
627
|
+
--context "Staff platform engineer at a Stripe-using fintech. \\
|
|
628
|
+
Owns oncall for the payments edge. Burned by a Black Friday \\
|
|
629
|
+
outage last year." \\
|
|
630
|
+
--count 5
|
|
631
|
+
# → {scenarios: [{type:"situation",...},{type:"binary",...},...]}
|
|
632
|
+
|
|
633
|
+
# 2. (offline) Answer the probes — build answers.json:
|
|
634
|
+
# [{"text":"...","source":"situation","scenario_prompt":"..."}, ...]
|
|
635
|
+
# Valid source values: situation, voice, binary, micro-story
|
|
636
|
+
|
|
637
|
+
# 3. Save the profile shell
|
|
638
|
+
ish profile create --file ./persona.json
|
|
639
|
+
# → tp-d4e
|
|
640
|
+
|
|
641
|
+
# 4. Persist the answers as structured evidence
|
|
642
|
+
ish profile evidence add tp-d4e --traces-file ./answers.json
|
|
643
|
+
|
|
644
|
+
# 5. Read back what's saved (also useful before the next probe round)
|
|
645
|
+
ish profile evidence list tp-d4e
|
|
646
|
+
\`\`\`
|
|
647
|
+
|
|
648
|
+
To iterate, feed prior prompts/answers back in so the LLM doesn't
|
|
649
|
+
paraphrase what you already asked:
|
|
650
|
+
|
|
651
|
+
\`\`\`bash
|
|
652
|
+
ish profile suggest-scenarios \\
|
|
653
|
+
--context-file ./notes.md --count 3 \\
|
|
654
|
+
--already-surfaced '["PagerDuty fires at 02:00."]' \\
|
|
655
|
+
--previous-answers @./answers.json
|
|
656
|
+
\`\`\`
|
|
657
|
+
|
|
658
|
+
See \`ish docs get-page guides/build-specific-tester\` for the full
|
|
659
|
+
walkthrough including the four probe-type shapes.
|
|
660
|
+
|
|
661
|
+
## 5. Target a gated URL (Vercel preview / staging gate / login form)
|
|
611
662
|
|
|
612
663
|
Configure credentials once on the workspace; testers reuse them.
|
|
613
664
|
|
|
@@ -633,7 +684,7 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
|
|
|
633
684
|
--username alice --password -
|
|
634
685
|
\`\`\`
|
|
635
686
|
|
|
636
|
-
##
|
|
687
|
+
## 6. Re-run a study with a fresh audience
|
|
637
688
|
|
|
638
689
|
Goal: same study, same iteration, but compare audiences.
|
|
639
690
|
|
|
@@ -649,7 +700,7 @@ If you don't pass any audience flags, \`ish study run\` reuses the
|
|
|
649
700
|
iteration's existing testers — useful for re-running after fixing the
|
|
650
701
|
target page.
|
|
651
702
|
|
|
652
|
-
##
|
|
703
|
+
## 7. Localhost target (dev environment)
|
|
653
704
|
|
|
654
705
|
Expose a port via a Cloudflare tunnel; \`ish connect\` prints the public
|
|
655
706
|
URL the study iteration can point at. \`connect\` is foreground and
|
|
@@ -675,7 +726,7 @@ URL=$(jq -r 'select(.status=="connected") | .tunnel_url' /tmp/ish-tunnel.log | h
|
|
|
675
726
|
ish iteration create --url "$URL"
|
|
676
727
|
\`\`\`
|
|
677
728
|
|
|
678
|
-
##
|
|
729
|
+
## 8. Chat-modality study (drive a chatbot endpoint)
|
|
679
730
|
|
|
680
731
|
The chat modality has **two modes**, picked by
|
|
681
732
|
\`iteration.details.mode_details.mode\`:
|
|
@@ -991,7 +1042,7 @@ ish iteration get <iter-id> --json \\
|
|
|
991
1042
|
ish study results <study-id> --transcript <tester-id> --json
|
|
992
1043
|
\`\`\`
|
|
993
1044
|
|
|
994
|
-
##
|
|
1045
|
+
## 9. Stage an ask for human review, then dispatch
|
|
995
1046
|
|
|
996
1047
|
Goal: prepare a billable A/B but let the user inspect and approve the
|
|
997
1048
|
audience + prompt before any credits are spent. Two-step flow with a
|
|
@@ -1025,7 +1076,7 @@ status as a column.
|
|
|
1025
1076
|
wait for. Pass \`--wait\` to \`ish ask dispatch\` instead if you want to
|
|
1026
1077
|
block until the round settles.
|
|
1027
1078
|
|
|
1028
|
-
##
|
|
1079
|
+
## 10. Display-vs-capture: a script that does both
|
|
1029
1080
|
|
|
1030
1081
|
Goal: drive an A/B in a script, capture aliases without \`jq\`, and
|
|
1031
1082
|
still show the human a readable result table at the end.
|
|
@@ -1054,6 +1105,60 @@ The mental rule: **\`--get\` is for capture, bare commands / \`--human\`
|
|
|
1054
1105
|
are for display, \`--json\` is for chaining (multiple fields at once).**
|
|
1055
1106
|
If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
|
|
1056
1107
|
|
|
1108
|
+
## 11. Extend a tester past its step cap (or redirect mid-run)
|
|
1109
|
+
|
|
1110
|
+
Goal: a tester hit the \`--max-interactions\` cap before finishing, or
|
|
1111
|
+
veered off into the wrong flow. Resume it with more steps and an
|
|
1112
|
+
optional mid-run instruction — without re-running the whole cohort.
|
|
1113
|
+
|
|
1114
|
+
\`\`\`bash
|
|
1115
|
+
# 1. Source run with a small cap to feel the limit:
|
|
1116
|
+
ish study run --sample 1 --max-interactions 5 --wait
|
|
1117
|
+
SRC=$(ish study run --sample 1 --max-interactions 5 --wait \\
|
|
1118
|
+
--get tester_aliases | head -1)
|
|
1119
|
+
|
|
1120
|
+
# 2. Inspect what stopped (optional, useful for the LLM to choose
|
|
1121
|
+
# a redirect instruction):
|
|
1122
|
+
ish study tester "$SRC" --summary
|
|
1123
|
+
|
|
1124
|
+
# 3a. Add 15 more steps, no new instruction — let the tester continue:
|
|
1125
|
+
ish study extend "$SRC" --add-steps 15 --wait --timeout 600
|
|
1126
|
+
|
|
1127
|
+
# 3b. OR redirect with a mid-run instruction (captured as user_message;
|
|
1128
|
+
# the backend surfaces it on every prompt for the rest of the run):
|
|
1129
|
+
ish study extend "$SRC" \\
|
|
1130
|
+
--instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
|
|
1131
|
+
--add-steps 10 --wait
|
|
1132
|
+
|
|
1133
|
+
# 4. Capture the new tester alias to chain into results:
|
|
1134
|
+
NEW=$(ish study extend "$SRC" --add-steps 10 --get tester_alias)
|
|
1135
|
+
ish study tester "$NEW" --summary
|
|
1136
|
+
\`\`\`
|
|
1137
|
+
|
|
1138
|
+
Rules to remember:
|
|
1139
|
+
- Source tester must be **terminal** (\`completed\` / \`failed\` /
|
|
1140
|
+
\`cancelled\`). If it's still running, \`ish study cancel <src>\` first.
|
|
1141
|
+
\`cancel\` is non-destructive — every interaction, screenshot, and
|
|
1142
|
+
questionnaire answer survives. \`cancel\` + \`extend\` form a
|
|
1143
|
+
reversible stop/start pair.
|
|
1144
|
+
- A **new** tester id is created under the same iteration (the backend
|
|
1145
|
+
branches from the source's last interaction). The source row is left
|
|
1146
|
+
untouched. Get the new id from \`.tester_id\` / \`.tester_alias\` on
|
|
1147
|
+
\`--json\`.
|
|
1148
|
+
- \`--add-steps\` is **only** the extra budget; it does NOT include the
|
|
1149
|
+
source's original cap. Credits debit per
|
|
1150
|
+
\`max(1, round(additional_steps / 10))\` — same formula as
|
|
1151
|
+
\`study run\` interactive, just scoped to the extension.
|
|
1152
|
+
- \`--instruction\` accepts three input shapes (matching the rest of
|
|
1153
|
+
the CLI): inline text, \`@/path/to/file\`, or \`-\` for stdin. Empty
|
|
1154
|
+
values after trimming are rejected client-side.
|
|
1155
|
+
- Don't use \`extend\` to change the iteration's URL / content. Edit
|
|
1156
|
+
the iteration directly (\`iteration update\`) or run a fresh
|
|
1157
|
+
\`study run\`. Extend always inherits the source's iteration config.
|
|
1158
|
+
|
|
1159
|
+
See \`ish docs get-page concepts/extending-a-simulation\` for the full
|
|
1160
|
+
mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
1161
|
+
|
|
1057
1162
|
## Tips for chaining commands as an agent
|
|
1058
1163
|
|
|
1059
1164
|
- Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
|
|
@@ -1174,7 +1279,7 @@ ish <command> --help
|
|
|
1174
1279
|
| \`study\` | Persistent research artifact | concepts/study |
|
|
1175
1280
|
| \`iteration\` | One configured run of a study (URL or media) | concepts/iteration |
|
|
1176
1281
|
| \`ask\` | Lightweight reaction artifact | concepts/ask |
|
|
1177
|
-
| \`profile\` | Tester profiles +
|
|
1282
|
+
| \`profile\` | Tester profiles, audience generation, and the \`suggest-scenarios\` + \`evidence add\`/\`list\` probe loop for crafting one specific persona | concepts/profile |
|
|
1178
1283
|
| \`source\` | Upload sources for profile generation | concepts/source |
|
|
1179
1284
|
| \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
|
|
1180
1285
|
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode tester_pair\` | guides/chat |
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSE consumer for the backend's per-study event stream.
|
|
3
|
+
*
|
|
4
|
+
* Used by `study run --wait` to wake up the poll loop as soon as a tester
|
|
5
|
+
* status / interaction event arrives, instead of waiting for the next poll
|
|
6
|
+
* tick. The canonical truth source remains `GET /studies/{id}` — SSE here
|
|
7
|
+
* only shortens the latency between a backend event and the next status
|
|
8
|
+
* fetch; the poll fallback still runs on a slow timer in case events are
|
|
9
|
+
* missed.
|
|
10
|
+
*
|
|
11
|
+
* Best-effort:
|
|
12
|
+
* - Mints a short-lived stream token (POST /auth/stream-token).
|
|
13
|
+
* - Opens `GET /studies/{id}/events?token=…` via `fetch` and streams the
|
|
14
|
+
* response body.
|
|
15
|
+
* - Returns (silently exits the iterator) on any failure — token mint
|
|
16
|
+
* 503 (server not configured), endpoint 503 (broker offline on this
|
|
17
|
+
* instance), network error, abort. The caller's polling rhythm is the
|
|
18
|
+
* safety net; we never raise.
|
|
19
|
+
*
|
|
20
|
+
* Stream-token TTL is 1h on the backend. For runs longer than that the
|
|
21
|
+
* fetch will end (server closes); the caller falls back to pure polling
|
|
22
|
+
* for the remainder.
|
|
23
|
+
*/
|
|
24
|
+
import { ApiClient, ApiError } from "./api-client.js";
|
|
25
|
+
export interface StudyEvent {
|
|
26
|
+
type: string;
|
|
27
|
+
study_id: string;
|
|
28
|
+
iteration_id?: string | null;
|
|
29
|
+
tester_id?: string | null;
|
|
30
|
+
interaction_id?: string | null;
|
|
31
|
+
frame_id?: string | null;
|
|
32
|
+
frame_version_id?: string | null;
|
|
33
|
+
tester_status?: string | null;
|
|
34
|
+
ts: string;
|
|
35
|
+
seq: number;
|
|
36
|
+
payload?: unknown;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Async generator that yields parsed StudyEvents from the backend SSE
|
|
40
|
+
* stream. Exits silently (without throwing) on failure or abort — callers
|
|
41
|
+
* MUST have a polling fallback that drives correctness.
|
|
42
|
+
*/
|
|
43
|
+
export declare function streamStudyEvents(client: ApiClient, studyId: string, signal: AbortSignal): AsyncGenerator<StudyEvent, void, void>;
|
|
44
|
+
/** Type narrower used by callers to skip the synthetic LAG marker. */
|
|
45
|
+
export declare function isLagEvent(event: StudyEvent): boolean;
|
|
46
|
+
export { ApiError };
|