@ishlabs/cli 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -5
- package/dist/commands/ask.d.ts +12 -0
- package/dist/commands/ask.js +127 -2
- package/dist/commands/chat.d.ts +17 -0
- package/dist/commands/chat.js +589 -0
- package/dist/commands/iteration.js +134 -14
- package/dist/commands/secret.d.ts +20 -0
- package/dist/commands/secret.js +246 -0
- package/dist/commands/study-run.d.ts +38 -0
- package/dist/commands/study-run.js +199 -80
- package/dist/commands/study-tester.js +17 -2
- package/dist/commands/study.js +309 -37
- package/dist/commands/workspace.js +81 -0
- package/dist/config.d.ts +3 -0
- package/dist/connect.d.ts +3 -0
- package/dist/connect.js +346 -22
- package/dist/index.js +64 -6
- package/dist/lib/alias-hydrate.d.ts +42 -0
- package/dist/lib/alias-hydrate.js +175 -0
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +28 -1
- package/dist/lib/auth.js +4 -2
- package/dist/lib/chat-endpoint-formatters.d.ts +39 -0
- package/dist/lib/chat-endpoint-formatters.js +104 -0
- package/dist/lib/command-helpers.d.ts +18 -0
- package/dist/lib/command-helpers.js +105 -3
- package/dist/lib/docs.js +542 -17
- package/dist/lib/modality.d.ts +42 -0
- package/dist/lib/modality.js +192 -0
- package/dist/lib/output.d.ts +41 -0
- package/dist/lib/output.js +453 -19
- package/dist/lib/paths.d.ts +1 -0
- package/dist/lib/paths.js +3 -0
- package/dist/lib/skill-content.js +182 -12
- package/dist/lib/types.d.ts +15 -0
- package/package.json +1 -1
package/dist/lib/docs.js
CHANGED
|
@@ -98,11 +98,35 @@ ish workspace list
|
|
|
98
98
|
ish workspace create --name "My product" --base-url https://example.com
|
|
99
99
|
ish workspace use w-6ec # set as active
|
|
100
100
|
ish workspace get # show the active workspace
|
|
101
|
+
ish workspace info # usage counters + plan caps (see below)
|
|
101
102
|
ish workspace site-access status
|
|
102
103
|
\`\`\`
|
|
103
104
|
|
|
105
|
+
## Checking usage before destructive calls
|
|
106
|
+
|
|
107
|
+
\`ish workspace info\` shows usage counters so an agent can branch on
|
|
108
|
+
plan limits without burning a doomed \`study create\` attempt that
|
|
109
|
+
returns \`error_code: usage_limit_reached\`.
|
|
110
|
+
|
|
111
|
+
\`\`\`
|
|
112
|
+
ish workspace info --json
|
|
113
|
+
{
|
|
114
|
+
"studies_used": 2,
|
|
115
|
+
"studies_max": 3,
|
|
116
|
+
"testers_used": 0,
|
|
117
|
+
"testers_max": 3,
|
|
118
|
+
"tier": "free"
|
|
119
|
+
}
|
|
120
|
+
\`\`\`
|
|
121
|
+
|
|
122
|
+
A \`null\` value on a \`*_max\` field means "unlimited" (paid tiers).
|
|
123
|
+
Branch on \`studies_used >= studies_max\` before \`study create\`,
|
|
124
|
+
likewise for \`testers_used\` before \`study run --sample\`.
|
|
125
|
+
|
|
104
126
|
## Related
|
|
105
127
|
|
|
128
|
+
- \`concepts/secret\` — per-workspace secrets used in chatbot endpoint
|
|
129
|
+
headers via \`{{secret:KEY}}\` placeholders.
|
|
106
130
|
- \`reference/billing-limits\` — \`maxProducts\` cap on workspace creation.
|
|
107
131
|
`;
|
|
108
132
|
const CONCEPT_STUDY = `# concept: study
|
|
@@ -130,25 +154,65 @@ its iterations. Think: a study is the recipe; an iteration is one batch.
|
|
|
130
154
|
3. \`ish study run --sample 5 --country SE\` — dispatches simulations.
|
|
131
155
|
4. \`ish study results\` or \`ish study wait\` to gather outputs.
|
|
132
156
|
|
|
133
|
-
### One-shot variant
|
|
157
|
+
### One-shot variant (inline iteration A)
|
|
158
|
+
|
|
159
|
+
\`study create\` accepts a per-modality content flag and creates
|
|
160
|
+
iteration A inline in the same call. Useful when you have a single
|
|
161
|
+
test artifact and don't need to A/B iterations:
|
|
134
162
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
163
|
+
| Modality | Inline content flag |
|
|
164
|
+
|-----------------|------------------------------------------------------|
|
|
165
|
+
| \`interactive\` | \`--url <url>\` (\`--screen-format desktop\` is the default; pass \`mobile_portrait\` for mobile) |
|
|
166
|
+
| \`text\` | \`--content-text <text-or-@file>\` |
|
|
167
|
+
| \`image\` | \`--image-urls <url1,url2,...>\` |
|
|
168
|
+
| \`video\` | \`--content-url <url>\` |
|
|
169
|
+
| \`audio\` | \`--content-url <url>\` |
|
|
170
|
+
| \`document\` | \`--content-url <url>\` |
|
|
171
|
+
| \`chat\` | \`--endpoint <id>\` or \`--endpoint-config <file>\` |
|
|
139
172
|
|
|
140
173
|
\`\`\`
|
|
174
|
+
# Text — single email artifact:
|
|
141
175
|
ish study create --modality text --content-type email \\
|
|
142
176
|
--name "Daily Brief concept" \\
|
|
143
177
|
--assignment "Read:Read the email and react" \\
|
|
144
178
|
--question "What stood out?" \\
|
|
145
179
|
--content-text @./brief.md
|
|
146
|
-
# → study + iteration A in one call, ready for \`study run\`.
|
|
147
|
-
\`\`\`
|
|
148
180
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
181
|
+
# Interactive — URL + screen format inline:
|
|
182
|
+
ish study create --modality interactive \\
|
|
183
|
+
--name "HN scan" --url https://news.ycombinator.com \\
|
|
184
|
+
--screen-format desktop \\
|
|
185
|
+
--assignment "Skim:Skim the top stories"
|
|
186
|
+
|
|
187
|
+
# Image A/B — two hero shots:
|
|
188
|
+
ish study create --modality image \\
|
|
189
|
+
--name "Hero shots" \\
|
|
190
|
+
--image-urls "https://cdn.example.com/a.png,https://cdn.example.com/b.png" \\
|
|
191
|
+
--assignment "Compare:Which feels more premium?"
|
|
192
|
+
|
|
193
|
+
# Video — one ad clip:
|
|
194
|
+
ish study create --modality video \\
|
|
195
|
+
--name "Product ad smoke" \\
|
|
196
|
+
--content-url https://cdn.example.com/ad.mp4 \\
|
|
197
|
+
--assignment "Watch:Watch and react"
|
|
198
|
+
|
|
199
|
+
# Document — a PDF whitepaper:
|
|
200
|
+
ish study create --modality document \\
|
|
201
|
+
--name "Whitepaper read-through" \\
|
|
202
|
+
--content-url https://cdn.example.com/report.pdf \\
|
|
203
|
+
--assignment "Skim:Summarise the report"
|
|
204
|
+
\`\`\`
|
|
205
|
+
|
|
206
|
+
Without an inline content flag no iteration is created — agents can no
|
|
207
|
+
longer trip the old "empty A" footgun where \`study run\` silently
|
|
208
|
+
targeted a placeholder. Add \`iteration create\` later if you want B/C
|
|
209
|
+
variants.
|
|
210
|
+
|
|
211
|
+
**Local files**: \`--content-url\` and \`--image-urls\` on \`study create\`
|
|
212
|
+
only accept http(s) URLs (the upload endpoint needs a study to upload
|
|
213
|
+
against). For local files, use the 2-step flow: \`study create\` (no
|
|
214
|
+
media flags) then \`iteration create --content-url ./file.mp4\` —
|
|
215
|
+
\`iteration create\` auto-uploads.
|
|
152
216
|
|
|
153
217
|
## Status fields (read \`runtime_status\`, not \`status\`)
|
|
154
218
|
|
|
@@ -526,7 +590,23 @@ choice. \`pick_confidence\` is only present on rounds run with
|
|
|
526
590
|
"picks": { "A": 3, "B": 0 },
|
|
527
591
|
"ratings": { "A": { "mean": 4.667, "n": 3 },
|
|
528
592
|
"B": { "mean": 2.000, "n": 3 } },
|
|
529
|
-
"winner": { "
|
|
593
|
+
"winner": { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
|
|
594
|
+
}
|
|
595
|
+
\`\`\`
|
|
596
|
+
|
|
597
|
+
\`winner.label\` is the picked variant's display label (matches
|
|
598
|
+
\`mcp__ish__get_ask_results\` so the same JQ path works either side).
|
|
599
|
+
\`winner.n\` is the completed-response sample the verdict was elected
|
|
600
|
+
from (NOT the pick count itself); \`winner.confidence\` is a coarse
|
|
601
|
+
summary: \`low\` for n<3 OR tied OR any errored response, \`medium\` for
|
|
602
|
+
3 ≤ n < 10 with no errors, \`high\` for n ≥ 10 with no errors. When more
|
|
603
|
+
than half of dispatched responses errored, the winner block is REPLACED
|
|
604
|
+
by a refusal envelope and you should run \`ish ask retry\` first:
|
|
605
|
+
|
|
606
|
+
\`\`\`json
|
|
607
|
+
{
|
|
608
|
+
"picks": { "A": 1, "B": 0 },
|
|
609
|
+
"winner": { "refused": true, "reason": "error_rate_too_high", "errored": 4, "total": 5 }
|
|
530
610
|
}
|
|
531
611
|
\`\`\`
|
|
532
612
|
|
|
@@ -538,13 +618,31 @@ When the ask has 2+ rounds, \`ask results\` also includes a top-level
|
|
|
538
618
|
\`\`\`json
|
|
539
619
|
"cross_round_summary": {
|
|
540
620
|
"rounds": [
|
|
541
|
-
{ "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"
|
|
542
|
-
{ "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"
|
|
621
|
+
{ "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"label": "B", "count": 2, "tied": false, "n": 3, "confidence": "low" } },
|
|
622
|
+
{ "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" } }
|
|
543
623
|
],
|
|
544
624
|
"picks_delta": { "A": +2, "B": -2 }
|
|
545
625
|
}
|
|
546
626
|
\`\`\`
|
|
547
627
|
|
|
628
|
+
## Retrying errored responses
|
|
629
|
+
|
|
630
|
+
\`ish ask retry <ask> --round N\` re-dispatches only the ERRORED
|
|
631
|
+
responses on a round. COMPLETED responses are left untouched (their
|
|
632
|
+
answers are the source of truth). Use this after a partial failure
|
|
633
|
+
(e.g. 4 of 5 testers errored on round 1) — fix the underlying cause,
|
|
634
|
+
then \`ask retry\` to backfill the missing rows. Idempotent: zero-errored
|
|
635
|
+
is a no-op. Add \`--wait\` to block until the retried round settles.
|
|
636
|
+
|
|
637
|
+
\`\`\`bash
|
|
638
|
+
$ ish ask retry a-d3e --round 1 --wait
|
|
639
|
+
\`\`\`
|
|
640
|
+
|
|
641
|
+
Errored responses carry \`error_message\` + \`error_kind\` (e.g.
|
|
642
|
+
\`first_impression_llm_failed\`, \`interview_llm_failed\`,
|
|
643
|
+
\`variant_preparation_failed\`) so an agent can branch on retry vs
|
|
644
|
+
abort without parsing prose.
|
|
645
|
+
|
|
548
646
|
## Adding follow-up questions to a round
|
|
549
647
|
|
|
550
648
|
\`ish ask add-questions --round N --questions ./qs.json\` is **additive
|
|
@@ -828,6 +926,72 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
|
|
|
828
926
|
--username alice --password -
|
|
829
927
|
\`\`\`
|
|
830
928
|
`;
|
|
929
|
+
const CONCEPT_SECRET = `# concept: secret
|
|
930
|
+
|
|
931
|
+
Per-workspace key/value secrets. Used at chatbot-dispatch time to
|
|
932
|
+
resolve \`{{secret:KEY}}\` placeholders in outgoing headers (or
|
|
933
|
+
anywhere else in the rendered request). Common shape:
|
|
934
|
+
|
|
935
|
+
\`\`\`
|
|
936
|
+
Authorization: Bearer {{secret:GROQ_KEY}}
|
|
937
|
+
X-API-Key: {{secret:CUSTOMER_BOT_KEY}}
|
|
938
|
+
\`\`\`
|
|
939
|
+
|
|
940
|
+
Distinct from site-access (\`concepts/site-access\`): site-access is
|
|
941
|
+
for interactive studies that gate a browser session against a UI;
|
|
942
|
+
secrets here are for chatbot endpoints, where ish dispatches the
|
|
943
|
+
HTTP request itself and the value lands in the wire request.
|
|
944
|
+
|
|
945
|
+
## Verbs
|
|
946
|
+
|
|
947
|
+
\`\`\`
|
|
948
|
+
ish secret list # list KEYS only. Values never returned.
|
|
949
|
+
ish secret set GROQ_KEY <value> # positional value (warning: shell history)
|
|
950
|
+
ish secret set GROQ_KEY --value-file ./grok.txt
|
|
951
|
+
printf %s "$VAL" | ish secret set GROQ_KEY --value-stdin
|
|
952
|
+
ish secret delete GROQ_KEY
|
|
953
|
+
\`\`\`
|
|
954
|
+
|
|
955
|
+
## Keep values out of shell history
|
|
956
|
+
|
|
957
|
+
Three input modes. Pick the safest for the source:
|
|
958
|
+
|
|
959
|
+
- **\`--value-stdin\`**: read from stdin. Best for piping from
|
|
960
|
+
another process (\`gcloud secrets ...\`, \`op read\`, etc.).
|
|
961
|
+
- **\`--value-file <path>\`**: read from a file. Use \`-\` to read
|
|
962
|
+
from stdin (alias for \`--value-stdin\`).
|
|
963
|
+
- **Positional value**: convenient but lands in shell history.
|
|
964
|
+
Avoid in scripts.
|
|
965
|
+
|
|
966
|
+
Exactly one source per call; passing two is a usage error
|
|
967
|
+
(\`error_code: validation_error\`, exit 2).
|
|
968
|
+
|
|
969
|
+
## How resolution works
|
|
970
|
+
|
|
971
|
+
At chatbot dispatch, the renderer looks up each \`{{secret:KEY}}\`
|
|
972
|
+
in the workspace's secret store. Missing keys render as the empty
|
|
973
|
+
string (no error). This matches the legacy ContextValueResolver
|
|
974
|
+
behavior and lets templates degrade silently instead of breaking
|
|
975
|
+
the request. The bot will most likely 401, which is a clear signal.
|
|
976
|
+
|
|
977
|
+
Reserved KEYs (\`BASIC_AUTH_*\`, \`SESSION_COOKIE_*\`,
|
|
978
|
+
\`LOGIN_*\`) are rejected client-side with a hint to use
|
|
979
|
+
\`ish workspace site-access\` instead. Those keys are owned by
|
|
980
|
+
the site-access flow and writing them as plain secrets would
|
|
981
|
+
silently break that path.
|
|
982
|
+
|
|
983
|
+
## When to use a secret vs. inline a header
|
|
984
|
+
|
|
985
|
+
If the value is the same across every customer / environment and
|
|
986
|
+
not sensitive (a vendor name, an API version), inline it in the
|
|
987
|
+
endpoint config's \`headers\` field. If it's per-workspace, rotates,
|
|
988
|
+
or shouldn't be committed to a config JSON file, use a secret.
|
|
989
|
+
|
|
990
|
+
## Related
|
|
991
|
+
|
|
992
|
+
- \`guides/chat\`: chat endpoint setup, including auth header examples.
|
|
993
|
+
- \`concepts/site-access\`: credentials for browser-rendered study URLs.
|
|
994
|
+
`;
|
|
831
995
|
const CONCEPT_RUN_VERBS = `# concept: run verbs — \`study run\` vs \`ask run\`
|
|
832
996
|
|
|
833
997
|
Both verbs dispatch simulations against an audience, but the lifecycle
|
|
@@ -1069,7 +1233,80 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
1069
1233
|
\`jq '.rounds[0].responses | length'\`.
|
|
1070
1234
|
- **\`study run --json\` exposes tester handles.** The top-level
|
|
1071
1235
|
\`tester_ids[]\` and \`tester_aliases[]\` arrays are the canonical
|
|
1072
|
-
inputs to \`ish study poll/wait/cancel\`.
|
|
1236
|
+
inputs to \`ish study poll/wait/cancel\`. The \`simulations[]\` array
|
|
1237
|
+
is collapsed to one batch entry per study (M13) with nested
|
|
1238
|
+
\`tester_ids[]\`, \`tester_aliases[]\`, \`job_ids[]\`, and \`count\` —
|
|
1239
|
+
an N-sample dispatch is a single row, not N near-duplicate rows.
|
|
1240
|
+
- **\`study results --json\` includes per-answer sentiment** (M10).
|
|
1241
|
+
Every \`interview_answers[].answers[]\` row carries \`sentiment\`
|
|
1242
|
+
(the tester's session-level label from \`tester_summary.sentiment\`),
|
|
1243
|
+
and every \`testers[]\` row carries \`sentiment\` + \`comment\`. No
|
|
1244
|
+
\`study tester <id>\` round-trip required.
|
|
1245
|
+
- **\`study results --summary\`** is a lean projection: counts +
|
|
1246
|
+
sentiment histogram + per-tester {alias, status, sentiment, comment,
|
|
1247
|
+
error_message}. Drops \`interview_answers\` and per-interaction
|
|
1248
|
+
breakdowns. Cheapest "did this run land?" shape.
|
|
1249
|
+
- **\`study results --transcript <tester_id>\`** is the chat-modality
|
|
1250
|
+
projection. Returns \`{tester_id, tester_alias, transcript: [...],
|
|
1251
|
+
unique_bot_replies, tester_summary}\`. Each transcript entry is
|
|
1252
|
+
\`{role, text, turn_index, ...}\` — bot turns add \`failure\`
|
|
1253
|
+
(set when the dispatch crashed); tester turns add \`action_type\`,
|
|
1254
|
+
\`option_label\`, and \`sentiment\`. \`text\` is null on tester
|
|
1255
|
+
turns whose action carries no text (\`select_option\`,
|
|
1256
|
+
\`ignore_offered\`); read intent from \`action_type\` +
|
|
1257
|
+
\`option_label\`. Same shape as the MCP \`get_chat_transcript\`
|
|
1258
|
+
tool. \`unique_bot_replies = 1\` on a multi-turn run is the M2 loop
|
|
1259
|
+
signature.
|
|
1260
|
+
- **\`study tester --summary\`** drops the action timeline and
|
|
1261
|
+
returns just \`{tester, interaction_count, sentiment, comment,
|
|
1262
|
+
error_message?, error_kind?}\`.
|
|
1263
|
+
- **\`study poll\` honors the active study.** Pass no \`--study\`
|
|
1264
|
+
flag and it falls back to the active study (set by
|
|
1265
|
+
\`ish study use\`), parity with \`study results\` /
|
|
1266
|
+
\`study wait\` / \`study run\`.
|
|
1267
|
+
- **\`iteration get --json\` testers carry \`alias\` + \`name\`** (M12).
|
|
1268
|
+
Same identifying triple as \`study results --json\`'s tester rows.
|
|
1269
|
+
- **\`ask results --json\` keeps \`variant_pick_id\` on every response**
|
|
1270
|
+
(C5-Bug4). It's the load-bearing field for "who picked what" — no
|
|
1271
|
+
\`--verbose\` required. Same logic on \`ask get --json\`.
|
|
1272
|
+
- **Every verb's \`--help\` ends with a "Tips:" footer** naming
|
|
1273
|
+
\`--get\` and \`--fields\`. If you're reaching for \`jq -r .x\` you
|
|
1274
|
+
almost certainly wanted \`--get x\`.
|
|
1275
|
+
- **\`study run --wait\` returns \`error_code: "wait_timeout"\`**
|
|
1276
|
+
(exit 5, retryable) when the wait timer expires — distinct from
|
|
1277
|
+
the api-client's generic timeout / network / server families. The
|
|
1278
|
+
envelope carries \`progress: {study_id, iteration_id?,
|
|
1279
|
+
timeout_seconds, done, total, pending, rows[]}\` so the agent
|
|
1280
|
+
can resume by polling rather than re-dispatching. Same shape on
|
|
1281
|
+
\`study wait\` (single-tester rows[] has length 1).
|
|
1282
|
+
- **\`study run\` accepts \`--dispatch-timeout <s>\`** (default 120)
|
|
1283
|
+
for the per-POST testers/batch + simulation/start budget. On
|
|
1284
|
+
timeout (or any dispatch failure), the error envelope includes
|
|
1285
|
+
\`seeded_but_not_dispatched_ids[]\` + \`seeded_but_not_dispatched_aliases[]\`
|
|
1286
|
+
listing the testers that exist server-side but didn't get
|
|
1287
|
+
dispatched. Resume by polling those instead of re-running
|
|
1288
|
+
\`study run\` (which would create another batch on top).
|
|
1289
|
+
- **\`ask run --new\` is non-idempotent and marked \`retryable: false\`**
|
|
1290
|
+
on any failure — agents auto-retrying would create a duplicate
|
|
1291
|
+
ask. The error envelope's \`suggestions\` includes a pointer to
|
|
1292
|
+
\`ish ask list --workspace <id>\` so the agent can confirm
|
|
1293
|
+
whether the resource already exists before retrying manually.
|
|
1294
|
+
- **\`ish connect --detach\` blocks until tunnel registration is
|
|
1295
|
+
confirmed** (\`registered: true\` in the lock file). The
|
|
1296
|
+
registration POST retries up to 4 times with exponential backoff
|
|
1297
|
+
(~7s worst case) before giving up; the heartbeat re-registers
|
|
1298
|
+
on a transient 404 instead of burning through the 3-strike
|
|
1299
|
+
countdown. If the heartbeat path persistently 404s even after
|
|
1300
|
+
several successful re-register cycles (D1: backend keeps
|
|
1301
|
+
forgetting the connection between heartbeats), the CLI emits
|
|
1302
|
+
a single stderr Notice and keeps the tunnel up rather than
|
|
1303
|
+
dying — the route is the problem, not the tunnel. Subsequent
|
|
1304
|
+
simulations may still hit \`TunnelInactive\` on dispatch in
|
|
1305
|
+
that case; investigate the backend's /connect route.
|
|
1306
|
+
- **The "Could not verify token (network error)…" stderr warning
|
|
1307
|
+
is gone** on green runs. The probe is best-effort; if there's a
|
|
1308
|
+
real auth failure, the subsequent API call surfaces it with a
|
|
1309
|
+
proper exit code 3.
|
|
1073
1310
|
- **Study responses carry a derived \`runtime_status\` field**
|
|
1074
1311
|
(\`draft | running | completed | completed_with_errors | cancelled\`).
|
|
1075
1312
|
Prefer this over the raw \`status\` field — \`runtime_status\` is
|
|
@@ -1124,7 +1361,7 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
1124
1361
|
"picks": { "A": 3, "B": 0 },
|
|
1125
1362
|
"ratings": { "A": { "mean": 4.667, "n": 3 },
|
|
1126
1363
|
"B": { "mean": 2.000, "n": 3 } },
|
|
1127
|
-
"winner": { "
|
|
1364
|
+
"winner": { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
|
|
1128
1365
|
}
|
|
1129
1366
|
}
|
|
1130
1367
|
\`\`\`
|
|
@@ -1132,8 +1369,23 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
1132
1369
|
\`picks\` is present iff \`wants_pick\`; \`ratings\` is present iff
|
|
1133
1370
|
\`wants_ratings\` and ≥ 1 rating was submitted; \`winner\` is the
|
|
1134
1371
|
highest pick count (\`tied: true\` if multiple variants share the
|
|
1135
|
-
top). \`
|
|
1136
|
-
|
|
1372
|
+
top). \`winner.n\` is the completed-response sample;
|
|
1373
|
+
\`winner.confidence\` is \`low\` for n<3 / tied / any errors,
|
|
1374
|
+
\`medium\` for clean 3–9, \`high\` for clean 10+. When >50% of
|
|
1375
|
+
dispatched responses errored the winner block is replaced by
|
|
1376
|
+
\`{ refused: true, reason: "error_rate_too_high", errored, total }\` —
|
|
1377
|
+
run \`ish ask retry <ask> --round N\` first. \`mean\` is rounded to 3
|
|
1378
|
+
decimal places; \`n\` (on ratings) is the rating count for that variant.
|
|
1379
|
+
- **Errored ask responses carry \`error_message\` + \`error_kind\`.**
|
|
1380
|
+
Each \`responses[]\` entry whose \`status: errored\` exposes the
|
|
1381
|
+
classified failure (e.g. \`first_impression_llm_failed\`,
|
|
1382
|
+
\`interview_llm_failed\`, \`variant_preparation_failed\`) so an agent
|
|
1383
|
+
can branch on retry vs abort without parsing prose. Both fields are
|
|
1384
|
+
\`null\` on \`pending\` and \`completed\` rows.
|
|
1385
|
+
- **\`ish ask retry <ask> --round N\` re-dispatches errored responses.**
|
|
1386
|
+
COMPLETED rows are left untouched; only ERRORED responses are reset
|
|
1387
|
+
to PENDING and re-run from scratch. Idempotent: zero-errored is a
|
|
1388
|
+
no-op. Add \`--wait\` to block until the retry settles.
|
|
1137
1389
|
- **\`ask results --json\` deduplicates tester profile snapshots.** When
|
|
1138
1390
|
\`tester_profile\` and \`tester_profile_snapshot\` share all
|
|
1139
1391
|
overlapping fields (the common case — they only diverge if the
|
|
@@ -1456,6 +1708,267 @@ upgrade or delete an existing resource to free up headroom.
|
|
|
1456
1708
|
- \`concepts/profile\` — \`maxCustomTesterProfiles\` gates profile creation.
|
|
1457
1709
|
- \`reference/json-mode\` — full error envelope shape and exit codes.
|
|
1458
1710
|
`;
|
|
1711
|
+
const GUIDE_CHAT = `# guide: chat-modality studies
|
|
1712
|
+
|
|
1713
|
+
Goal: from a customer chatbot endpoint to a finished chat-modality
|
|
1714
|
+
study with parsed transcripts, end to end via the CLI. The flow has
|
|
1715
|
+
three phases: configure the endpoint, smoke test it, run a study.
|
|
1716
|
+
|
|
1717
|
+
## 1. Configure the endpoint
|
|
1718
|
+
|
|
1719
|
+
Two starting points:
|
|
1720
|
+
|
|
1721
|
+
### From a curl example (recommended for first-time setup)
|
|
1722
|
+
|
|
1723
|
+
The agent has a curl request that talks to the customer's bot. Save
|
|
1724
|
+
it to a file and run \`init\`:
|
|
1725
|
+
|
|
1726
|
+
\`\`\`
|
|
1727
|
+
ish chat endpoint init \\
|
|
1728
|
+
--from-curl ./bot.curl \\
|
|
1729
|
+
--name my-bot
|
|
1730
|
+
\`\`\`
|
|
1731
|
+
|
|
1732
|
+
\`init\` posts the curl to \`/chat/auto-detect-shape\`, infers the
|
|
1733
|
+
config (URL, method, headers, body template, response paths,
|
|
1734
|
+
mode, async-poll if applicable), and saves it as a chatbot endpoint
|
|
1735
|
+
resource. Output JSON shape:
|
|
1736
|
+
|
|
1737
|
+
\`\`\`json
|
|
1738
|
+
{
|
|
1739
|
+
"success": true,
|
|
1740
|
+
"saved": true,
|
|
1741
|
+
"endpoint_id": "ep_abc",
|
|
1742
|
+
"alias": "ep-abc",
|
|
1743
|
+
"config": { /* full ChatbotEndpointConfig */ },
|
|
1744
|
+
"tunnel_backed": true,
|
|
1745
|
+
"tunnel_backed_detected": true,
|
|
1746
|
+
"confidence": "high",
|
|
1747
|
+
"explanation": "...",
|
|
1748
|
+
"warnings": []
|
|
1749
|
+
}
|
|
1750
|
+
\`\`\`
|
|
1751
|
+
|
|
1752
|
+
For local bots (URL host is \`localhost\` / \`127.0.0.1\` /
|
|
1753
|
+
\`0.0.0.0\`), \`tunnel_backed\` is auto-set to \`true\`. Override
|
|
1754
|
+
explicitly with \`--tunnel-backed\` / \`--no-tunnel-backed\`.
|
|
1755
|
+
Pass \`--no-save\` to inspect the inferred config without persisting.
|
|
1756
|
+
|
|
1757
|
+
### From a hand-written config
|
|
1758
|
+
|
|
1759
|
+
\`\`\`
|
|
1760
|
+
ish chat endpoint create --endpoint-config ./bot-config.json --name "my-bot"
|
|
1761
|
+
\`\`\`
|
|
1762
|
+
|
|
1763
|
+
The file is the bare \`ChatbotEndpointConfig\` shape (or a full
|
|
1764
|
+
endpoint envelope with \`id\` / \`name\` / \`config\` keys —
|
|
1765
|
+
\`.config\` is extracted automatically). Pipe from stdin via \`-\`.
|
|
1766
|
+
|
|
1767
|
+
### Editing a saved endpoint
|
|
1768
|
+
|
|
1769
|
+
The dialog and the CLI both PUT the full config to
|
|
1770
|
+
\`/chatbot-endpoints/{id}\` on save (no patch semantics). The CLI
|
|
1771
|
+
exposes that round-trip cleanly:
|
|
1772
|
+
|
|
1773
|
+
\`\`\`
|
|
1774
|
+
# Single-field edits via shorthand flags
|
|
1775
|
+
ish chat endpoint update ep-abc --name "Production support bot"
|
|
1776
|
+
ish chat endpoint update ep-abc --url https://api.example.com/v2/chat
|
|
1777
|
+
ish chat endpoint update ep-abc --mode stateless
|
|
1778
|
+
ish chat endpoint update ep-abc --tunnel-backed # or --no-tunnel-backed
|
|
1779
|
+
|
|
1780
|
+
# Richer edits via fetch | jq | replace
|
|
1781
|
+
ish chat endpoint get ep-abc --verbose \\
|
|
1782
|
+
| jq '.config.outgoing.headers["X-API-Key"] = "{{secret:KEY}}"' \\
|
|
1783
|
+
| ish chat endpoint update ep-abc --endpoint-config -
|
|
1784
|
+
|
|
1785
|
+
ish chat endpoint get ep-abc --verbose \\
|
|
1786
|
+
| jq '.config.incoming.slotsContainerPaths += ["response.options"]
|
|
1787
|
+
| .config.incoming.slotsKindHints["response.options"] = "alternatives"' \\
|
|
1788
|
+
| ish chat endpoint update ep-abc --endpoint-config -
|
|
1789
|
+
\`\`\`
|
|
1790
|
+
|
|
1791
|
+
\`get --verbose\` (or piped) emits the round-trippable envelope
|
|
1792
|
+
\`{id, name, isTunnelBacked, config}\` — exactly what
|
|
1793
|
+
\`update --endpoint-config -\` accepts. Field-shorthand flags win on
|
|
1794
|
+
conflict with \`--endpoint-config\`.
|
|
1795
|
+
|
|
1796
|
+
### Body template placeholders
|
|
1797
|
+
|
|
1798
|
+
The renderer expands these tokens at request time:
|
|
1799
|
+
|
|
1800
|
+
- \`{{action.text}}\`: the persona's outgoing user message this turn.
|
|
1801
|
+
- \`{{history}}\`: past turns as \`[{role, content}, ...]\`. Past
|
|
1802
|
+
turns only; current turn is in \`{{action.text}}\`.
|
|
1803
|
+
- \`{{history_with_current}}\`: \`{{history}}\` plus a synthetic
|
|
1804
|
+
\`{role: "user", content: action.text}\` at the tail. **Use this for
|
|
1805
|
+
OpenAI-shape bots that take a single \`messages: [...]\` array
|
|
1806
|
+
containing prior turns and the current user message.**
|
|
1807
|
+
- \`{{turn.role}}\` / \`{{turn.text}}\`: per-turn expansion. Place
|
|
1808
|
+
one element with these tokens inside an array literal; the
|
|
1809
|
+
renderer expands it to one entry per past turn.
|
|
1810
|
+
- \`{{tester.name}}\` / \`{{tester.locale}}\`: persona attributes.
|
|
1811
|
+
- \`{{conversation_id}}\`: bot-supplied session id (stateful mode).
|
|
1812
|
+
- \`{{secret:KEY}}\`: workspace secret (see below).
|
|
1813
|
+
|
|
1814
|
+
\`{{history_with_current}}\` lands the typical OpenAI/Anthropic/Pollinations shape:
|
|
1815
|
+
|
|
1816
|
+
\`\`\`json
|
|
1817
|
+
{
|
|
1818
|
+
"model": "gpt-4o-mini",
|
|
1819
|
+
"messages": "{{history_with_current}}"
|
|
1820
|
+
}
|
|
1821
|
+
\`\`\`
|
|
1822
|
+
|
|
1823
|
+
### Auth via workspace secrets
|
|
1824
|
+
|
|
1825
|
+
For bots behind an API key, store the value as a workspace secret
|
|
1826
|
+
once and reference it from the endpoint's headers:
|
|
1827
|
+
|
|
1828
|
+
\`\`\`
|
|
1829
|
+
printf %s "$GROQ_KEY" | ish secret set GROQ_KEY --value-stdin
|
|
1830
|
+
ish chat endpoint update ep-abc --endpoint-config - <<'EOF'
|
|
1831
|
+
{ "config": { "outgoing": { "headers": { "Authorization": "Bearer {{secret:GROQ_KEY}}" } } } }
|
|
1832
|
+
EOF
|
|
1833
|
+
\`\`\`
|
|
1834
|
+
|
|
1835
|
+
The renderer resolves \`{{secret:GROQ_KEY}}\` from the workspace
|
|
1836
|
+
secret store at dispatch time. Missing keys render empty, which
|
|
1837
|
+
typically surfaces as a 401 from the bot. That's an actionable signal.
|
|
1838
|
+
|
|
1839
|
+
See \`concepts/secret\` for the full set of input modes
|
|
1840
|
+
(\`--value-file\`, \`--value-stdin\`, positional) and the reserved-key
|
|
1841
|
+
list.
|
|
1842
|
+
|
|
1843
|
+
## 2. Smoke test the connection
|
|
1844
|
+
|
|
1845
|
+
Before launching a study, verify the bot answers cleanly:
|
|
1846
|
+
|
|
1847
|
+
\`\`\`
|
|
1848
|
+
ish chat endpoint test ep-abc -m "Hello"
|
|
1849
|
+
\`\`\`
|
|
1850
|
+
|
|
1851
|
+
Output:
|
|
1852
|
+
\`\`\`json
|
|
1853
|
+
{
|
|
1854
|
+
"success": true,
|
|
1855
|
+
"text": "Hi! How can I help?",
|
|
1856
|
+
"conversation_id": "...",
|
|
1857
|
+
"slots": [...],
|
|
1858
|
+
"references": [...],
|
|
1859
|
+
"bot_latency_ms": 240,
|
|
1860
|
+
"end_of_conversation": false
|
|
1861
|
+
}
|
|
1862
|
+
\`\`\`
|
|
1863
|
+
|
|
1864
|
+
For tunnel-backed endpoints (\`isTunnelBacked: true\`), the CLI
|
|
1865
|
+
runs a tunnel pre-flight against \`/connect/active\` first and
|
|
1866
|
+
exits \`5\` with \`error_kind: "TunnelInactive"\` when no tunnel is
|
|
1867
|
+
running. Run \`ish connect <port>\` in another shell first, then
|
|
1868
|
+
retry.
|
|
1869
|
+
|
|
1870
|
+
For stateful endpoints, thread the conversation across script
|
|
1871
|
+
invocations:
|
|
1872
|
+
|
|
1873
|
+
\`\`\`
|
|
1874
|
+
CID=$(ish chat endpoint test ep-abc -m "Hi" | jq -r .conversation_id)
|
|
1875
|
+
ish chat endpoint test ep-abc -m "Tell me more" --conversation-id "$CID"
|
|
1876
|
+
\`\`\`
|
|
1877
|
+
|
|
1878
|
+
For multi-turn validation use \`ish study run --sample 1\` against
|
|
1879
|
+
a draft study (next phase).
|
|
1880
|
+
|
|
1881
|
+
## 3. Run a chat-modality study
|
|
1882
|
+
|
|
1883
|
+
Use the existing study flow with the new chat flags. \`study create\`
|
|
1884
|
+
fetches the saved endpoint and embeds its config inline at
|
|
1885
|
+
\`iteration.details.endpoint\` plus the lineage id at
|
|
1886
|
+
\`iteration.details.chatbot_endpoint_id\`:
|
|
1887
|
+
|
|
1888
|
+
\`\`\`
|
|
1889
|
+
ish study create \\
|
|
1890
|
+
--modality chat \\
|
|
1891
|
+
--endpoint ep-abc \\
|
|
1892
|
+
--name "Sign-up Q1" \\
|
|
1893
|
+
--assignment "Sign up:Try to sign up"
|
|
1894
|
+
\`\`\`
|
|
1895
|
+
|
|
1896
|
+
Or pass an inline config when there's no saved endpoint to reference
|
|
1897
|
+
(mutually exclusive with \`--endpoint\`):
|
|
1898
|
+
|
|
1899
|
+
\`\`\`
|
|
1900
|
+
cat ./bot-config.json | ish study create \\
|
|
1901
|
+
--modality chat --endpoint-config - \\
|
|
1902
|
+
--name "Sign-up Q1" --assignment "Sign up:Try to sign up"
|
|
1903
|
+
\`\`\`
|
|
1904
|
+
|
|
1905
|
+
Optional \`--max-turns <n>\` (default 12) caps the chat per tester.
|
|
1906
|
+
|
|
1907
|
+
Audience size is set at run time. Use \`--sample <N>\` to pick N
|
|
1908
|
+
random simulatable profiles, or \`--all\` for the full pool.
|
|
1909
|
+
\`--profile <id>\` is also supported for explicit selection:
|
|
1910
|
+
\`\`\`
|
|
1911
|
+
ish study run stu-xyz --sample 5 --wait
|
|
1912
|
+
\`\`\`
|
|
1913
|
+
|
|
1914
|
+
Pull raw interactions:
|
|
1915
|
+
\`\`\`
|
|
1916
|
+
ish study results stu-xyz --json | jq '.interactions'
|
|
1917
|
+
\`\`\`
|
|
1918
|
+
|
|
1919
|
+
Note: chat is currently excluded from the LLM-analysis route; the
|
|
1920
|
+
results call returns raw interactions, not an analyzed summary.
|
|
1921
|
+
|
|
1922
|
+
## Iteration shortcuts
|
|
1923
|
+
|
|
1924
|
+
Add a chat iteration to an existing chat study post-hoc. The
|
|
1925
|
+
iteration type is inherited from the parent study's modality —
|
|
1926
|
+
no \`--type\` flag is needed:
|
|
1927
|
+
|
|
1928
|
+
\`\`\`
|
|
1929
|
+
ish iteration create --study stu-xyz --endpoint ep-abc --max-turns 10
|
|
1930
|
+
ish iteration create --study stu-xyz --endpoint-config ./bot.json
|
|
1931
|
+
\`\`\`
|
|
1932
|
+
|
|
1933
|
+
Same flag set as \`study create\`'s chat shortcut.
|
|
1934
|
+
|
|
1935
|
+
## Active-endpoint convention
|
|
1936
|
+
|
|
1937
|
+
\`ish chat endpoint use <id>\` writes the endpoint to
|
|
1938
|
+
\`~/.ish/config.json\` (\`chat_endpoint\` key). After that, every
|
|
1939
|
+
\`chat endpoint *\` verb that takes \`[endpoint-id]\` defaults to the
|
|
1940
|
+
active endpoint when the positional is omitted:
|
|
1941
|
+
|
|
1942
|
+
\`\`\`
|
|
1943
|
+
ish chat endpoint use ep-abc
|
|
1944
|
+
ish chat endpoint test -m "Hello" # uses ep-abc
|
|
1945
|
+
ish chat endpoint get --verbose # uses ep-abc
|
|
1946
|
+
\`\`\`
|
|
1947
|
+
|
|
1948
|
+
Mirrors \`workspace use\` / \`study use\` / \`ask use\`.
|
|
1949
|
+
|
|
1950
|
+
## Common errors
|
|
1951
|
+
|
|
1952
|
+
- \`error_kind: "TunnelInactive"\` (exit 5) — tunnel-backed endpoint
|
|
1953
|
+
but no active tunnel. Run \`ish connect <port>\` first.
|
|
1954
|
+
- \`error_code: "validation_error"\` (exit 2) — usage error
|
|
1955
|
+
(mutually exclusive flags both set, missing required input,
|
|
1956
|
+
modality mismatch). The error envelope's \`valid_options\` field
|
|
1957
|
+
surfaces the accepted shape.
|
|
1958
|
+
- \`error_kind: "BotInvalidResponseError"\` (exit 1) — the bot
|
|
1959
|
+
responded but the configured \`incoming.*\` paths didn't resolve.
|
|
1960
|
+
Edit the response shape via \`update --endpoint-config\` or rerun
|
|
1961
|
+
\`init\` with a fresher curl sample.
|
|
1962
|
+
|
|
1963
|
+
## Related
|
|
1964
|
+
|
|
1965
|
+
- \`concepts/iteration\` — chat iteration shape (\`details.endpoint\`,
|
|
1966
|
+
\`details.chatbot_endpoint_id\`, \`details.max_turns\`).
|
|
1967
|
+
- \`concepts/study\` — modality + assignments + iteration nesting.
|
|
1968
|
+
- \`reference/json-mode\` — JSON output, error envelope, exit codes.
|
|
1969
|
+
- \`guides/first-study\` — the same pattern for an interactive
|
|
1970
|
+
modality study.
|
|
1971
|
+
`;
|
|
1459
1972
|
const PAGES = [
|
|
1460
1973
|
{
|
|
1461
1974
|
slug: "overview",
|
|
@@ -1529,6 +2042,12 @@ const PAGES = [
|
|
|
1529
2042
|
description: "Credentials for gated URLs (basic auth, cookies, login forms).",
|
|
1530
2043
|
body: CONCEPT_SITE_ACCESS,
|
|
1531
2044
|
},
|
|
2045
|
+
{
|
|
2046
|
+
slug: "concepts/secret",
|
|
2047
|
+
title: "concept: secret",
|
|
2048
|
+
description: "Per-workspace KV store for {{secret:KEY}} placeholders in chatbot endpoint headers.",
|
|
2049
|
+
body: CONCEPT_SECRET,
|
|
2050
|
+
},
|
|
1532
2051
|
{
|
|
1533
2052
|
slug: "concepts/run-verbs",
|
|
1534
2053
|
title: "concept: run verbs — study run vs ask run",
|
|
@@ -1565,6 +2084,12 @@ const PAGES = [
|
|
|
1565
2084
|
description: "Login → workspace → audience → study → iteration → run → results.",
|
|
1566
2085
|
body: GUIDE_FIRST_STUDY,
|
|
1567
2086
|
},
|
|
2087
|
+
{
|
|
2088
|
+
slug: "guides/chat",
|
|
2089
|
+
title: "guide: chat-modality studies",
|
|
2090
|
+
description: "Configure a chatbot endpoint, smoke test it, run a chat-modality study.",
|
|
2091
|
+
body: GUIDE_CHAT,
|
|
2092
|
+
},
|
|
1568
2093
|
];
|
|
1569
2094
|
const PAGES_BY_SLUG = new Map(PAGES.map((p) => [p.slug, p]));
|
|
1570
2095
|
export function listPages() {
|