@ishlabs/cli 0.8.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -6
- package/dist/auth.d.ts +23 -4
- package/dist/auth.js +165 -39
- package/dist/commands/ask.d.ts +12 -0
- package/dist/commands/ask.js +127 -2
- package/dist/commands/chat.d.ts +17 -0
- package/dist/commands/chat.js +589 -0
- package/dist/commands/iteration.js +232 -13
- package/dist/commands/secret.d.ts +20 -0
- package/dist/commands/secret.js +246 -0
- package/dist/commands/source.js +24 -2
- package/dist/commands/study-run.d.ts +38 -0
- package/dist/commands/study-run.js +199 -80
- package/dist/commands/study-tester.js +17 -2
- package/dist/commands/study.js +311 -39
- package/dist/commands/workspace.js +81 -0
- package/dist/config.d.ts +7 -0
- package/dist/connect.d.ts +3 -0
- package/dist/connect.js +359 -24
- package/dist/index.js +67 -9
- package/dist/lib/alias-hydrate.d.ts +42 -0
- package/dist/lib/alias-hydrate.js +175 -0
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +28 -1
- package/dist/lib/auth.js +11 -3
- package/dist/lib/chat-endpoint-formatters.d.ts +39 -0
- package/dist/lib/chat-endpoint-formatters.js +104 -0
- package/dist/lib/command-helpers.d.ts +18 -0
- package/dist/lib/command-helpers.js +188 -53
- package/dist/lib/docs.js +662 -34
- package/dist/lib/modality.d.ts +42 -0
- package/dist/lib/modality.js +192 -0
- package/dist/lib/output.d.ts +41 -0
- package/dist/lib/output.js +453 -19
- package/dist/lib/paths.d.ts +1 -0
- package/dist/lib/paths.js +3 -0
- package/dist/lib/skill-content.js +183 -13
- package/dist/lib/types.d.ts +15 -0
- package/package.json +3 -3
package/dist/lib/docs.js
CHANGED
|
@@ -18,7 +18,7 @@ Workspace (= product)
|
|
|
18
18
|
├── Tester Profiles ────── reusable audience personas (alias: tp-…)
|
|
19
19
|
│ └── Sources ──────── transcripts/audio/images that seed generation
|
|
20
20
|
├── Study ──────────────── persistent research artifact (alias: s-…)
|
|
21
|
-
│ ├── modality ──────── interactive | text | video | audio | image | document
|
|
21
|
+
│ ├── modality ──────── interactive | text | video | audio | image | document | chat
|
|
22
22
|
│ ├── assignments ───── tasks the tester does
|
|
23
23
|
│ ├── questionnaire ─── questions the tester answers
|
|
24
24
|
│ └── Iterations ────── one configured run (URL or content) (alias: i-…)
|
|
@@ -98,18 +98,43 @@ ish workspace list
|
|
|
98
98
|
ish workspace create --name "My product" --base-url https://example.com
|
|
99
99
|
ish workspace use w-6ec # set as active
|
|
100
100
|
ish workspace get # show the active workspace
|
|
101
|
+
ish workspace info # usage counters + plan caps (see below)
|
|
101
102
|
ish workspace site-access status
|
|
102
103
|
\`\`\`
|
|
103
104
|
|
|
105
|
+
## Checking usage before destructive calls
|
|
106
|
+
|
|
107
|
+
\`ish workspace info\` shows usage counters so an agent can branch on
|
|
108
|
+
plan limits without burning a doomed \`study create\` attempt that
|
|
109
|
+
returns \`error_code: usage_limit_reached\`.
|
|
110
|
+
|
|
111
|
+
\`\`\`
|
|
112
|
+
ish workspace info --json
|
|
113
|
+
{
|
|
114
|
+
"studies_used": 2,
|
|
115
|
+
"studies_max": 3,
|
|
116
|
+
"testers_used": 0,
|
|
117
|
+
"testers_max": 3,
|
|
118
|
+
"tier": "free"
|
|
119
|
+
}
|
|
120
|
+
\`\`\`
|
|
121
|
+
|
|
122
|
+
A \`null\` value on a \`*_max\` field means "unlimited" (paid tiers).
|
|
123
|
+
Branch on \`studies_used >= studies_max\` before \`study create\`,
|
|
124
|
+
likewise for \`testers_used\` before \`study run --sample\`.
|
|
125
|
+
|
|
104
126
|
## Related
|
|
105
127
|
|
|
128
|
+
- \`concepts/secret\` — per-workspace secrets used in chatbot endpoint
|
|
129
|
+
headers via \`{{secret:KEY}}\` placeholders.
|
|
106
130
|
- \`reference/billing-limits\` — \`maxProducts\` cap on workspace creation.
|
|
107
131
|
`;
|
|
108
132
|
const CONCEPT_STUDY = `# concept: study
|
|
109
133
|
|
|
110
134
|
A **study** is the persistent research artifact. It defines:
|
|
111
|
-
- \`modality\`: \`interactive\` (the tester drives a real browser)
|
|
112
|
-
\`text | video | audio | image | document\` (media reaction studies)
|
|
135
|
+
- \`modality\`: \`interactive\` (the tester drives a real browser), one of
|
|
136
|
+
\`text | video | audio | image | document\` (media reaction studies),
|
|
137
|
+
or \`chat\` (multi-turn probe against an external chatbot endpoint).
|
|
113
138
|
- \`content_type\` (media studies only): \`email | social_post | ad | …\` —
|
|
114
139
|
controls the framing the tester is given.
|
|
115
140
|
- \`assignments\`: the tasks the tester performs. See \`concepts/assignment\`.
|
|
@@ -129,25 +154,65 @@ its iterations. Think: a study is the recipe; an iteration is one batch.
|
|
|
129
154
|
3. \`ish study run --sample 5 --country SE\` — dispatches simulations.
|
|
130
155
|
4. \`ish study results\` or \`ish study wait\` to gather outputs.
|
|
131
156
|
|
|
132
|
-
### One-shot variant
|
|
157
|
+
### One-shot variant (inline iteration A)
|
|
158
|
+
|
|
159
|
+
\`study create\` accepts a per-modality content flag and creates
|
|
160
|
+
iteration A inline in the same call. Useful when you have a single
|
|
161
|
+
test artifact and don't need to A/B iterations:
|
|
133
162
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
163
|
+
| Modality | Inline content flag |
|
|
164
|
+
|-----------------|------------------------------------------------------|
|
|
165
|
+
| \`interactive\` | \`--url <url>\` (\`--screen-format desktop\` is the default; pass \`mobile_portrait\` for mobile) |
|
|
166
|
+
| \`text\` | \`--content-text <text-or-@file>\` |
|
|
167
|
+
| \`image\` | \`--image-urls <url1,url2,...>\` |
|
|
168
|
+
| \`video\` | \`--content-url <url>\` |
|
|
169
|
+
| \`audio\` | \`--content-url <url>\` |
|
|
170
|
+
| \`document\` | \`--content-url <url>\` |
|
|
171
|
+
| \`chat\` | \`--endpoint <id>\` or \`--endpoint-config <file>\` |
|
|
138
172
|
|
|
139
173
|
\`\`\`
|
|
174
|
+
# Text — single email artifact:
|
|
140
175
|
ish study create --modality text --content-type email \\
|
|
141
176
|
--name "Daily Brief concept" \\
|
|
142
177
|
--assignment "Read:Read the email and react" \\
|
|
143
178
|
--question "What stood out?" \\
|
|
144
179
|
--content-text @./brief.md
|
|
145
|
-
# → study + iteration A in one call, ready for \`study run\`.
|
|
146
|
-
\`\`\`
|
|
147
180
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
181
|
+
# Interactive — URL + screen format inline:
|
|
182
|
+
ish study create --modality interactive \\
|
|
183
|
+
--name "HN scan" --url https://news.ycombinator.com \\
|
|
184
|
+
--screen-format desktop \\
|
|
185
|
+
--assignment "Skim:Skim the top stories"
|
|
186
|
+
|
|
187
|
+
# Image A/B — two hero shots:
|
|
188
|
+
ish study create --modality image \\
|
|
189
|
+
--name "Hero shots" \\
|
|
190
|
+
--image-urls "https://cdn.example.com/a.png,https://cdn.example.com/b.png" \\
|
|
191
|
+
--assignment "Compare:Which feels more premium?"
|
|
192
|
+
|
|
193
|
+
# Video — one ad clip:
|
|
194
|
+
ish study create --modality video \\
|
|
195
|
+
--name "Product ad smoke" \\
|
|
196
|
+
--content-url https://cdn.example.com/ad.mp4 \\
|
|
197
|
+
--assignment "Watch:Watch and react"
|
|
198
|
+
|
|
199
|
+
# Document — a PDF whitepaper:
|
|
200
|
+
ish study create --modality document \\
|
|
201
|
+
--name "Whitepaper read-through" \\
|
|
202
|
+
--content-url https://cdn.example.com/report.pdf \\
|
|
203
|
+
--assignment "Skim:Summarise the report"
|
|
204
|
+
\`\`\`
|
|
205
|
+
|
|
206
|
+
Without an inline content flag no iteration is created — agents can no
|
|
207
|
+
longer trip the old "empty A" footgun where \`study run\` silently
|
|
208
|
+
targeted a placeholder. Add \`iteration create\` later if you want B/C
|
|
209
|
+
variants.
|
|
210
|
+
|
|
211
|
+
**Local files**: \`--content-url\` and \`--image-urls\` on \`study create\`
|
|
212
|
+
only accept http(s) URLs (the upload endpoint needs a study to upload
|
|
213
|
+
against). For local files, use the 2-step flow: \`study create\` (no
|
|
214
|
+
media flags) then \`iteration create --content-url ./file.mp4\` —
|
|
215
|
+
\`iteration create\` auto-uploads.
|
|
151
216
|
|
|
152
217
|
## Status fields (read \`runtime_status\`, not \`status\`)
|
|
153
218
|
|
|
@@ -200,9 +265,9 @@ pick was wrong.
|
|
|
200
265
|
const CONCEPT_ITERATION = `# concept: iteration
|
|
201
266
|
|
|
202
267
|
An **iteration** is one configured run of a study. It carries the
|
|
203
|
-
volatile bits — the URL (interactive)
|
|
204
|
-
while the study carries the persistent
|
|
205
|
-
modality).
|
|
268
|
+
volatile bits — the URL (interactive), the media (video/text/etc.), or
|
|
269
|
+
the chatbot endpoint (chat) — while the study carries the persistent
|
|
270
|
+
shape (assignments, questionnaire, modality).
|
|
206
271
|
|
|
207
272
|
- Alias prefix: \`i-\`
|
|
208
273
|
- A study has 1..N iterations. \`ish study run\` defaults to the latest.
|
|
@@ -224,9 +289,19 @@ ish iteration create --study s-b2c --url https://example.com
|
|
|
224
289
|
# Interactive on mobile screen format:
|
|
225
290
|
ish iteration create --url https://example.com --screen-format mobile_portrait
|
|
226
291
|
|
|
292
|
+
# Figma interactive (file_key + start_node_id required):
|
|
293
|
+
ish iteration create --platform figma --url https://figma.com/proto \\
|
|
294
|
+
--screen-format mobile_portrait --file-key abc123 --start-node-id 0:1 \\
|
|
295
|
+
--flow-name "Onboarding A"
|
|
296
|
+
|
|
227
297
|
# Text/email content from a file:
|
|
228
298
|
ish iteration create --content-text @./email.html --title "Newsletter"
|
|
229
299
|
|
|
300
|
+
# Email iteration with sender + featured hero image:
|
|
301
|
+
ish iteration create --content-text @./email.txt --content-html @./email.html \\
|
|
302
|
+
--sender-name "Marketing" --sender-email "marketing@example.com" \\
|
|
303
|
+
--featured-image-url https://cdn.example.com/hero.png
|
|
304
|
+
|
|
230
305
|
# Video (URL or local file):
|
|
231
306
|
ish iteration create --content-url ./video.mp4
|
|
232
307
|
|
|
@@ -236,11 +311,113 @@ ish iteration create --image-urls "./a.png,./b.png"
|
|
|
236
311
|
# Document (PDF):
|
|
237
312
|
ish iteration create --content-url ./report.pdf
|
|
238
313
|
|
|
314
|
+
# Chat — probe a saved chatbot endpoint:
|
|
315
|
+
ish iteration create --chat-endpoint-id ce-... --max-turns 10 --early-termination
|
|
316
|
+
|
|
239
317
|
# Inspect:
|
|
240
318
|
ish iteration list --study s-b2c
|
|
241
319
|
ish iteration get i-d4e
|
|
242
320
|
\`\`\`
|
|
243
321
|
|
|
322
|
+
## Segments and segment labels
|
|
323
|
+
|
|
324
|
+
For media iterations (video, audio, text, image, document), reactions
|
|
325
|
+
can be collected per **segment** instead of over the whole asset. A
|
|
326
|
+
segment is a contiguous slice of the iteration's content — a 30-second
|
|
327
|
+
window of a video, a paragraph range of an email, a section of a PDF.
|
|
328
|
+
Each segment can carry a human-readable **label** ("Intro", "Pricing
|
|
329
|
+
section", "Call to action") that surfaces in the tester UI and in
|
|
330
|
+
results.
|
|
331
|
+
|
|
332
|
+
Segments live inside the iteration's \`segmentation\` field — there is
|
|
333
|
+
no separate segments resource. Three discriminated shapes:
|
|
334
|
+
|
|
335
|
+
- **time_based** (video, audio): boundaries in seconds. Segment 0 runs
|
|
336
|
+
from \`intervals_seconds[0]\` to \`intervals_seconds[1]\`, etc.
|
|
337
|
+
Optional \`labels[]\` names each segment.
|
|
338
|
+
|
|
339
|
+
\`\`\`json
|
|
340
|
+
{
|
|
341
|
+
"type": "time_based",
|
|
342
|
+
"intervals_seconds": [0, 30, 60, 90],
|
|
343
|
+
"labels": ["Hook", "Feature 1", "Feature 2", "CTA"]
|
|
344
|
+
}
|
|
345
|
+
\`\`\`
|
|
346
|
+
|
|
347
|
+
- **section_based** (text, document, image copy): explicit list of
|
|
348
|
+
named sections, either marker-bounded or paragraph-bounded.
|
|
349
|
+
|
|
350
|
+
\`\`\`json
|
|
351
|
+
{
|
|
352
|
+
"type": "section_based",
|
|
353
|
+
"sections": [
|
|
354
|
+
{ "name": "intro", "label": "Intro", "paragraph_start": 0, "paragraph_end": 1 },
|
|
355
|
+
{ "name": "body", "label": "Body", "paragraph_start": 1, "paragraph_end": 4 },
|
|
356
|
+
{ "name": "cta", "label": "Call to action", "paragraph_start": 4, "paragraph_end": 5 }
|
|
357
|
+
]
|
|
358
|
+
}
|
|
359
|
+
\`\`\`
|
|
360
|
+
|
|
361
|
+
- **page_based** (document): pages are auto-derived from the document.
|
|
362
|
+
No additional fields.
|
|
363
|
+
|
|
364
|
+
Pass via \`--segmentation-json '<json>'\` on \`iteration create\`.
|
|
365
|
+
|
|
366
|
+
### Default segmentation for text/image iterations
|
|
367
|
+
|
|
368
|
+
For text- and image-modality iterations created without
|
|
369
|
+
\`--segmentation-json\`, the worker synthesises a single whole-content
|
|
370
|
+
section so a minimal \`ish iteration create --content-text "..."\` runs
|
|
371
|
+
end-to-end. Author your own segmentation when you want section-level
|
|
372
|
+
reactions; otherwise the default just works.
|
|
373
|
+
|
|
374
|
+
### content_config — early termination + selected segments
|
|
375
|
+
|
|
376
|
+
A sibling of \`segmentation\` that controls how the tester progresses
|
|
377
|
+
through segments:
|
|
378
|
+
|
|
379
|
+
- \`early_termination: true\` — stop the session once every selected
|
|
380
|
+
segment has been seen.
|
|
381
|
+
- \`selected_segment_indices: [0, 2]\` — only show these segment
|
|
382
|
+
indices; \`null\` (default) means all segments are active.
|
|
383
|
+
|
|
384
|
+
Pass via \`--content-config-json '<json>'\`.
|
|
385
|
+
|
|
386
|
+
## HTML content (text + media captions)
|
|
387
|
+
|
|
388
|
+
- **Text modality**: pair plain \`--content-text\` with rich
|
|
389
|
+
\`--content-html\` to render emails / articles with formatting. The
|
|
390
|
+
plain text is what testers reason over; the HTML is what they see.
|
|
391
|
+
- **Media captions** (video, audio, image): \`--copy-text\` and
|
|
392
|
+
\`--copy-html\` attach a caption to the media — the social-post
|
|
393
|
+
pattern. Add \`--social-platform\` (instagram/tiktok/facebook/linkedin/x)
|
|
394
|
+
for platform-specific framing, and \`--copy-position before|after\`
|
|
395
|
+
for ordering relative to the media.
|
|
396
|
+
|
|
397
|
+
Captions can carry their own segmentation when you want
|
|
398
|
+
paragraph-by-paragraph reactions to a long caption. Use the
|
|
399
|
+
\`--details-json\` escape hatch to pass a nested
|
|
400
|
+
\`copy_content.segmentation\`.
|
|
401
|
+
|
|
402
|
+
## Chat modality
|
|
403
|
+
|
|
404
|
+
Chat iterations probe an external chatbot endpoint by having a tester
|
|
405
|
+
hold a multi-turn conversation against it. Two ways to wire the
|
|
406
|
+
endpoint:
|
|
407
|
+
|
|
408
|
+
\`\`\`
|
|
409
|
+
# Reference a saved endpoint row (recommended — reproducible):
|
|
410
|
+
ish iteration create --chat-endpoint-id ce-...
|
|
411
|
+
|
|
412
|
+
# Inline endpoint config (one-off):
|
|
413
|
+
ish iteration create --chat-endpoint-json '{"url":"https://...","headers":{...}}'
|
|
414
|
+
\`\`\`
|
|
415
|
+
|
|
416
|
+
Tunables:
|
|
417
|
+
- \`--max-turns N\` — cap the conversation length (default 12, max 50).
|
|
418
|
+
- \`--early-termination\` — let the worker end the session early when
|
|
419
|
+
the tester signals the conversation is over.
|
|
420
|
+
|
|
244
421
|
## No more auto-empty iteration A
|
|
245
422
|
|
|
246
423
|
\`ish study create\` and \`ish study generate\` **do not auto-create
|
|
@@ -261,16 +438,6 @@ then retry.
|
|
|
261
438
|
|
|
262
439
|
Treat this as actionable, not transient — re-running won't change anything.
|
|
263
440
|
|
|
264
|
-
## Default segmentation for text/image iterations
|
|
265
|
-
|
|
266
|
-
For text-modality iterations created with just \`--content-text\` (and
|
|
267
|
-
similarly \`--image-urls\` for image), the worker now synthesises a
|
|
268
|
-
single whole-content section if no \`segmentation\` was supplied. This
|
|
269
|
-
means a minimal \`ish iteration create --study s-XYZ --content-text
|
|
270
|
-
"..."\` actually runs end-to-end without you needing to author a
|
|
271
|
-
SegmentationConfig manually. Author your own segmentation when you
|
|
272
|
-
want section-level reactions; otherwise the default just works.
|
|
273
|
-
|
|
274
441
|
## Related
|
|
275
442
|
|
|
276
443
|
- \`concepts/study\` — the parent artifact.
|
|
@@ -423,7 +590,23 @@ choice. \`pick_confidence\` is only present on rounds run with
|
|
|
423
590
|
"picks": { "A": 3, "B": 0 },
|
|
424
591
|
"ratings": { "A": { "mean": 4.667, "n": 3 },
|
|
425
592
|
"B": { "mean": 2.000, "n": 3 } },
|
|
426
|
-
"winner": { "
|
|
593
|
+
"winner": { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
|
|
594
|
+
}
|
|
595
|
+
\`\`\`
|
|
596
|
+
|
|
597
|
+
\`winner.label\` is the picked variant's display label (matches
|
|
598
|
+
\`mcp__ish__get_ask_results\` so the same JQ path works either side).
|
|
599
|
+
\`winner.n\` is the completed-response sample the verdict was elected
|
|
600
|
+
from (NOT the pick count itself); \`winner.confidence\` is a coarse
|
|
601
|
+
summary: \`low\` for n<3 OR tied OR any errored response, \`medium\` for
|
|
602
|
+
3 ≤ n < 10 with no errors, \`high\` for n ≥ 10 with no errors. When more
|
|
603
|
+
than half of dispatched responses errored, the winner block is REPLACED
|
|
604
|
+
by a refusal envelope and you should run \`ish ask retry\` first:
|
|
605
|
+
|
|
606
|
+
\`\`\`json
|
|
607
|
+
{
|
|
608
|
+
"picks": { "A": 1, "B": 0 },
|
|
609
|
+
"winner": { "refused": true, "reason": "error_rate_too_high", "errored": 4, "total": 5 }
|
|
427
610
|
}
|
|
428
611
|
\`\`\`
|
|
429
612
|
|
|
@@ -435,13 +618,31 @@ When the ask has 2+ rounds, \`ask results\` also includes a top-level
|
|
|
435
618
|
\`\`\`json
|
|
436
619
|
"cross_round_summary": {
|
|
437
620
|
"rounds": [
|
|
438
|
-
{ "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"
|
|
439
|
-
{ "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"
|
|
621
|
+
{ "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"label": "B", "count": 2, "tied": false, "n": 3, "confidence": "low" } },
|
|
622
|
+
{ "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" } }
|
|
440
623
|
],
|
|
441
624
|
"picks_delta": { "A": +2, "B": -2 }
|
|
442
625
|
}
|
|
443
626
|
\`\`\`
|
|
444
627
|
|
|
628
|
+
## Retrying errored responses
|
|
629
|
+
|
|
630
|
+
\`ish ask retry <ask> --round N\` re-dispatches only the ERRORED
|
|
631
|
+
responses on a round. COMPLETED responses are left untouched (their
|
|
632
|
+
answers are the source of truth). Use this after a partial failure
|
|
633
|
+
(e.g. 4 of 5 testers errored on round 1) — fix the underlying cause,
|
|
634
|
+
then \`ask retry\` to backfill the missing rows. Idempotent: zero-errored
|
|
635
|
+
is a no-op. Add \`--wait\` to block until the retried round settles.
|
|
636
|
+
|
|
637
|
+
\`\`\`bash
|
|
638
|
+
$ ish ask retry a-d3e --round 1 --wait
|
|
639
|
+
\`\`\`
|
|
640
|
+
|
|
641
|
+
Errored responses carry \`error_message\` + \`error_kind\` (e.g.
|
|
642
|
+
\`first_impression_llm_failed\`, \`interview_llm_failed\`,
|
|
643
|
+
\`variant_preparation_failed\`) so an agent can branch on retry vs
|
|
644
|
+
abort without parsing prose.
|
|
645
|
+
|
|
445
646
|
## Adding follow-up questions to a round
|
|
446
647
|
|
|
447
648
|
\`ish ask add-questions --round N --questions ./qs.json\` is **additive
|
|
@@ -725,6 +926,72 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
|
|
|
725
926
|
--username alice --password -
|
|
726
927
|
\`\`\`
|
|
727
928
|
`;
|
|
929
|
+
const CONCEPT_SECRET = `# concept: secret
|
|
930
|
+
|
|
931
|
+
Per-workspace key/value secrets. Used at chatbot-dispatch time to
|
|
932
|
+
resolve \`{{secret:KEY}}\` placeholders in outgoing headers (or
|
|
933
|
+
anywhere else in the rendered request). Common shape:
|
|
934
|
+
|
|
935
|
+
\`\`\`
|
|
936
|
+
Authorization: Bearer {{secret:GROQ_KEY}}
|
|
937
|
+
X-API-Key: {{secret:CUSTOMER_BOT_KEY}}
|
|
938
|
+
\`\`\`
|
|
939
|
+
|
|
940
|
+
Distinct from site-access (\`concepts/site-access\`): site-access is
|
|
941
|
+
for interactive studies that gate a browser session against a UI;
|
|
942
|
+
secrets here are for chatbot endpoints, where ish dispatches the
|
|
943
|
+
HTTP request itself and the value lands in the wire request.
|
|
944
|
+
|
|
945
|
+
## Verbs
|
|
946
|
+
|
|
947
|
+
\`\`\`
|
|
948
|
+
ish secret list # list KEYS only. Values never returned.
|
|
949
|
+
ish secret set GROQ_KEY <value> # positional value (warning: shell history)
|
|
950
|
+
ish secret set GROQ_KEY --value-file ./grok.txt
|
|
951
|
+
printf %s "$VAL" | ish secret set GROQ_KEY --value-stdin
|
|
952
|
+
ish secret delete GROQ_KEY
|
|
953
|
+
\`\`\`
|
|
954
|
+
|
|
955
|
+
## Keep values out of shell history
|
|
956
|
+
|
|
957
|
+
Three input modes. Pick the safest for the source:
|
|
958
|
+
|
|
959
|
+
- **\`--value-stdin\`**: read from stdin. Best for piping from
|
|
960
|
+
another process (\`gcloud secrets ...\`, \`op read\`, etc.).
|
|
961
|
+
- **\`--value-file <path>\`**: read from a file. Use \`-\` to read
|
|
962
|
+
from stdin (alias for \`--value-stdin\`).
|
|
963
|
+
- **Positional value**: convenient but lands in shell history.
|
|
964
|
+
Avoid in scripts.
|
|
965
|
+
|
|
966
|
+
Exactly one source per call; passing two is a usage error
|
|
967
|
+
(\`error_code: validation_error\`, exit 2).
|
|
968
|
+
|
|
969
|
+
## How resolution works
|
|
970
|
+
|
|
971
|
+
At chatbot dispatch, the renderer looks up each \`{{secret:KEY}}\`
|
|
972
|
+
in the workspace's secret store. Missing keys render as the empty
|
|
973
|
+
string (no error). This matches the legacy ContextValueResolver
|
|
974
|
+
behavior and lets templates degrade silently instead of breaking
|
|
975
|
+
the request. The bot will most likely 401, which is a clear signal.
|
|
976
|
+
|
|
977
|
+
Reserved KEYs (\`BASIC_AUTH_*\`, \`SESSION_COOKIE_*\`,
|
|
978
|
+
\`LOGIN_*\`) are rejected client-side with a hint to use
|
|
979
|
+
\`ish workspace site-access\` instead. Those keys are owned by
|
|
980
|
+
the site-access flow and writing them as plain secrets would
|
|
981
|
+
silently break that path.
|
|
982
|
+
|
|
983
|
+
## When to use a secret vs. inline a header
|
|
984
|
+
|
|
985
|
+
If the value is the same across every customer / environment and
|
|
986
|
+
not sensitive (a vendor name, an API version), inline it in the
|
|
987
|
+
endpoint config's \`headers\` field. If it's per-workspace, rotates,
|
|
988
|
+
or shouldn't be committed to a config JSON file, use a secret.
|
|
989
|
+
|
|
990
|
+
## Related
|
|
991
|
+
|
|
992
|
+
- \`guides/chat\`: chat endpoint setup, including auth header examples.
|
|
993
|
+
- \`concepts/site-access\`: credentials for browser-rendered study URLs.
|
|
994
|
+
`;
|
|
728
995
|
const CONCEPT_RUN_VERBS = `# concept: run verbs — \`study run\` vs \`ask run\`
|
|
729
996
|
|
|
730
997
|
Both verbs dispatch simulations against an audience, but the lifecycle
|
|
@@ -966,7 +1233,80 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
966
1233
|
\`jq '.rounds[0].responses | length'\`.
|
|
967
1234
|
- **\`study run --json\` exposes tester handles.** The top-level
|
|
968
1235
|
\`tester_ids[]\` and \`tester_aliases[]\` arrays are the canonical
|
|
969
|
-
inputs to \`ish study poll/wait/cancel\`.
|
|
1236
|
+
inputs to \`ish study poll/wait/cancel\`. The \`simulations[]\` array
|
|
1237
|
+
is collapsed to one batch entry per study (M13) with nested
|
|
1238
|
+
\`tester_ids[]\`, \`tester_aliases[]\`, \`job_ids[]\`, and \`count\` —
|
|
1239
|
+
an N-sample dispatch is a single row, not N near-duplicate rows.
|
|
1240
|
+
- **\`study results --json\` includes per-answer sentiment** (M10).
|
|
1241
|
+
Every \`interview_answers[].answers[]\` row carries \`sentiment\`
|
|
1242
|
+
(the tester's session-level label from \`tester_summary.sentiment\`),
|
|
1243
|
+
and every \`testers[]\` row carries \`sentiment\` + \`comment\`. No
|
|
1244
|
+
\`study tester <id>\` round-trip required.
|
|
1245
|
+
- **\`study results --summary\`** is a lean projection: counts +
|
|
1246
|
+
sentiment histogram + per-tester {alias, status, sentiment, comment,
|
|
1247
|
+
error_message}. Drops \`interview_answers\` and per-interaction
|
|
1248
|
+
breakdowns. Cheapest "did this run land?" shape.
|
|
1249
|
+
- **\`study results --transcript <tester_id>\`** is the chat-modality
|
|
1250
|
+
projection. Returns \`{tester_id, tester_alias, transcript: [...],
|
|
1251
|
+
unique_bot_replies, tester_summary}\`. Each transcript entry is
|
|
1252
|
+
\`{role, text, turn_index, ...}\` — bot turns add \`failure\`
|
|
1253
|
+
(set when the dispatch crashed); tester turns add \`action_type\`,
|
|
1254
|
+
\`option_label\`, and \`sentiment\`. \`text\` is null on tester
|
|
1255
|
+
turns whose action carries no text (\`select_option\`,
|
|
1256
|
+
\`ignore_offered\`); read intent from \`action_type\` +
|
|
1257
|
+
\`option_label\`. Same shape as the MCP \`get_chat_transcript\`
|
|
1258
|
+
tool. \`unique_bot_replies = 1\` on a multi-turn run is the M2 loop
|
|
1259
|
+
signature.
|
|
1260
|
+
- **\`study tester --summary\`** drops the action timeline and
|
|
1261
|
+
returns just \`{tester, interaction_count, sentiment, comment,
|
|
1262
|
+
error_message?, error_kind?}\`.
|
|
1263
|
+
- **\`study poll\` honors the active study.** Pass no \`--study\`
|
|
1264
|
+
flag and it falls back to the active study (set by
|
|
1265
|
+
\`ish study use\`), parity with \`study results\` /
|
|
1266
|
+
\`study wait\` / \`study run\`.
|
|
1267
|
+
- **\`iteration get --json\` testers carry \`alias\` + \`name\`** (M12).
|
|
1268
|
+
Same identifying triple as \`study results --json\`'s tester rows.
|
|
1269
|
+
- **\`ask results --json\` keeps \`variant_pick_id\` on every response**
|
|
1270
|
+
(C5-Bug4). It's the load-bearing field for "who picked what" — no
|
|
1271
|
+
\`--verbose\` required. Same logic on \`ask get --json\`.
|
|
1272
|
+
- **Every verb's \`--help\` ends with a "Tips:" footer** naming
|
|
1273
|
+
\`--get\` and \`--fields\`. If you're reaching for \`jq -r .x\` you
|
|
1274
|
+
almost certainly wanted \`--get x\`.
|
|
1275
|
+
- **\`study run --wait\` returns \`error_code: "wait_timeout"\`**
|
|
1276
|
+
(exit 5, retryable) when the wait timer expires — distinct from
|
|
1277
|
+
the api-client's generic timeout / network / server families. The
|
|
1278
|
+
envelope carries \`progress: {study_id, iteration_id?,
|
|
1279
|
+
timeout_seconds, done, total, pending, rows[]}\` so the agent
|
|
1280
|
+
can resume by polling rather than re-dispatching. Same shape on
|
|
1281
|
+
\`study wait\` (single-tester rows[] has length 1).
|
|
1282
|
+
- **\`study run\` accepts \`--dispatch-timeout <s>\`** (default 120)
|
|
1283
|
+
for the per-POST testers/batch + simulation/start budget. On
|
|
1284
|
+
timeout (or any dispatch failure), the error envelope includes
|
|
1285
|
+
\`seeded_but_not_dispatched_ids[]\` + \`seeded_but_not_dispatched_aliases[]\`
|
|
1286
|
+
listing the testers that exist server-side but didn't get
|
|
1287
|
+
dispatched. Resume by polling those instead of re-running
|
|
1288
|
+
\`study run\` (which would create another batch on top).
|
|
1289
|
+
- **\`ask run --new\` is non-idempotent and marked \`retryable: false\`**
|
|
1290
|
+
on any failure — agents auto-retrying would create a duplicate
|
|
1291
|
+
ask. The error envelope's \`suggestions\` includes a pointer to
|
|
1292
|
+
\`ish ask list --workspace <id>\` so the agent can confirm
|
|
1293
|
+
whether the resource already exists before retrying manually.
|
|
1294
|
+
- **\`ish connect --detach\` blocks until tunnel registration is
|
|
1295
|
+
confirmed** (\`registered: true\` in the lock file). The
|
|
1296
|
+
registration POST retries up to 4 times with exponential backoff
|
|
1297
|
+
(~7s worst case) before giving up; the heartbeat re-registers
|
|
1298
|
+
on a transient 404 instead of burning through the 3-strike
|
|
1299
|
+
countdown. If the heartbeat path persistently 404s even after
|
|
1300
|
+
several successful re-register cycles (D1: backend keeps
|
|
1301
|
+
forgetting the connection between heartbeats), the CLI emits
|
|
1302
|
+
a single stderr Notice and keeps the tunnel up rather than
|
|
1303
|
+
dying — the route is the problem, not the tunnel. Subsequent
|
|
1304
|
+
simulations may still hit \`TunnelInactive\` on dispatch in
|
|
1305
|
+
that case; investigate the backend's /connect route.
|
|
1306
|
+
- **The "Could not verify token (network error)…" stderr warning
|
|
1307
|
+
is gone** on green runs. The probe is best-effort; if there's a
|
|
1308
|
+
real auth failure, the subsequent API call surfaces it with a
|
|
1309
|
+
proper exit code 3.
|
|
970
1310
|
- **Study responses carry a derived \`runtime_status\` field**
|
|
971
1311
|
(\`draft | running | completed | completed_with_errors | cancelled\`).
|
|
972
1312
|
Prefer this over the raw \`status\` field — \`runtime_status\` is
|
|
@@ -1021,7 +1361,7 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
1021
1361
|
"picks": { "A": 3, "B": 0 },
|
|
1022
1362
|
"ratings": { "A": { "mean": 4.667, "n": 3 },
|
|
1023
1363
|
"B": { "mean": 2.000, "n": 3 } },
|
|
1024
|
-
"winner": { "
|
|
1364
|
+
"winner": { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
|
|
1025
1365
|
}
|
|
1026
1366
|
}
|
|
1027
1367
|
\`\`\`
|
|
@@ -1029,8 +1369,23 @@ The CLI guarantees these contracts so agents can chain safely:
|
|
|
1029
1369
|
\`picks\` is present iff \`wants_pick\`; \`ratings\` is present iff
|
|
1030
1370
|
\`wants_ratings\` and ≥ 1 rating was submitted; \`winner\` is the
|
|
1031
1371
|
highest pick count (\`tied: true\` if multiple variants share the
|
|
1032
|
-
top). \`
|
|
1033
|
-
|
|
1372
|
+
top). \`winner.n\` is the completed-response sample;
|
|
1373
|
+
\`winner.confidence\` is \`low\` for n<3 / tied / any errors,
|
|
1374
|
+
\`medium\` for clean 3–9, \`high\` for clean 10+. When >50% of
|
|
1375
|
+
dispatched responses errored the winner block is replaced by
|
|
1376
|
+
\`{ refused: true, reason: "error_rate_too_high", errored, total }\` —
|
|
1377
|
+
run \`ish ask retry <ask> --round N\` first. \`mean\` is rounded to 3
|
|
1378
|
+
decimal places; \`n\` (on ratings) is the rating count for that variant.
|
|
1379
|
+
- **Errored ask responses carry \`error_message\` + \`error_kind\`.**
|
|
1380
|
+
Each \`responses[]\` entry whose \`status: errored\` exposes the
|
|
1381
|
+
classified failure (e.g. \`first_impression_llm_failed\`,
|
|
1382
|
+
\`interview_llm_failed\`, \`variant_preparation_failed\`) so an agent
|
|
1383
|
+
can branch on retry vs abort without parsing prose. Both fields are
|
|
1384
|
+
\`null\` on \`pending\` and \`completed\` rows.
|
|
1385
|
+
- **\`ish ask retry <ask> --round N\` re-dispatches errored responses.**
|
|
1386
|
+
COMPLETED rows are left untouched; only ERRORED responses are reset
|
|
1387
|
+
to PENDING and re-run from scratch. Idempotent: zero-errored is a
|
|
1388
|
+
no-op. Add \`--wait\` to block until the retry settles.
|
|
1034
1389
|
- **\`ask results --json\` deduplicates tester profile snapshots.** When
|
|
1035
1390
|
\`tester_profile\` and \`tester_profile_snapshot\` share all
|
|
1036
1391
|
overlapping fields (the common case — they only diverge if the
|
|
@@ -1353,6 +1708,267 @@ upgrade or delete an existing resource to free up headroom.
|
|
|
1353
1708
|
- \`concepts/profile\` — \`maxCustomTesterProfiles\` gates profile creation.
|
|
1354
1709
|
- \`reference/json-mode\` — full error envelope shape and exit codes.
|
|
1355
1710
|
`;
|
|
1711
|
+
const GUIDE_CHAT = `# guide: chat-modality studies
|
|
1712
|
+
|
|
1713
|
+
Goal: from a customer chatbot endpoint to a finished chat-modality
|
|
1714
|
+
study with parsed transcripts, end to end via the CLI. The flow has
|
|
1715
|
+
three phases: configure the endpoint, smoke test it, run a study.
|
|
1716
|
+
|
|
1717
|
+
## 1. Configure the endpoint
|
|
1718
|
+
|
|
1719
|
+
Two starting points:
|
|
1720
|
+
|
|
1721
|
+
### From a curl example (recommended for first-time setup)
|
|
1722
|
+
|
|
1723
|
+
The agent has a curl request that talks to the customer's bot. Save
|
|
1724
|
+
it to a file and run \`init\`:
|
|
1725
|
+
|
|
1726
|
+
\`\`\`
|
|
1727
|
+
ish chat endpoint init \\
|
|
1728
|
+
--from-curl ./bot.curl \\
|
|
1729
|
+
--name my-bot
|
|
1730
|
+
\`\`\`
|
|
1731
|
+
|
|
1732
|
+
\`init\` posts the curl to \`/chat/auto-detect-shape\`, infers the
|
|
1733
|
+
config (URL, method, headers, body template, response paths,
|
|
1734
|
+
mode, async-poll if applicable), and saves it as a chatbot endpoint
|
|
1735
|
+
resource. Output JSON shape:
|
|
1736
|
+
|
|
1737
|
+
\`\`\`json
|
|
1738
|
+
{
|
|
1739
|
+
"success": true,
|
|
1740
|
+
"saved": true,
|
|
1741
|
+
"endpoint_id": "ep_abc",
|
|
1742
|
+
"alias": "ep-abc",
|
|
1743
|
+
"config": { /* full ChatbotEndpointConfig */ },
|
|
1744
|
+
"tunnel_backed": true,
|
|
1745
|
+
"tunnel_backed_detected": true,
|
|
1746
|
+
"confidence": "high",
|
|
1747
|
+
"explanation": "...",
|
|
1748
|
+
"warnings": []
|
|
1749
|
+
}
|
|
1750
|
+
\`\`\`
|
|
1751
|
+
|
|
1752
|
+
For local bots (URL host is \`localhost\` / \`127.0.0.1\` /
|
|
1753
|
+
\`0.0.0.0\`), \`tunnel_backed\` is auto-set to \`true\`. Override
|
|
1754
|
+
explicitly with \`--tunnel-backed\` / \`--no-tunnel-backed\`.
|
|
1755
|
+
Pass \`--no-save\` to inspect the inferred config without persisting.
|
|
1756
|
+
|
|
1757
|
+
### From a hand-written config
|
|
1758
|
+
|
|
1759
|
+
\`\`\`
|
|
1760
|
+
ish chat endpoint create --endpoint-config ./bot-config.json --name "my-bot"
|
|
1761
|
+
\`\`\`
|
|
1762
|
+
|
|
1763
|
+
The file is the bare \`ChatbotEndpointConfig\` shape (or a full
|
|
1764
|
+
endpoint envelope with \`id\` / \`name\` / \`config\` keys —
|
|
1765
|
+
\`.config\` is extracted automatically). Pipe from stdin via \`-\`.
|
|
1766
|
+
|
|
1767
|
+
### Editing a saved endpoint
|
|
1768
|
+
|
|
1769
|
+
The dialog and the CLI both PUT the full config to
|
|
1770
|
+
\`/chatbot-endpoints/{id}\` on save (no patch semantics). The CLI
|
|
1771
|
+
exposes that round-trip cleanly:
|
|
1772
|
+
|
|
1773
|
+
\`\`\`
|
|
1774
|
+
# Single-field edits via shorthand flags
|
|
1775
|
+
ish chat endpoint update ep-abc --name "Production support bot"
|
|
1776
|
+
ish chat endpoint update ep-abc --url https://api.example.com/v2/chat
|
|
1777
|
+
ish chat endpoint update ep-abc --mode stateless
|
|
1778
|
+
ish chat endpoint update ep-abc --tunnel-backed # or --no-tunnel-backed
|
|
1779
|
+
|
|
1780
|
+
# Richer edits via fetch | jq | replace
|
|
1781
|
+
ish chat endpoint get ep-abc --verbose \\
|
|
1782
|
+
| jq '.config.outgoing.headers["X-API-Key"] = "{{secret:KEY}}"' \\
|
|
1783
|
+
| ish chat endpoint update ep-abc --endpoint-config -
|
|
1784
|
+
|
|
1785
|
+
ish chat endpoint get ep-abc --verbose \\
|
|
1786
|
+
| jq '.config.incoming.slotsContainerPaths += ["response.options"]
|
|
1787
|
+
| .config.incoming.slotsKindHints["response.options"] = "alternatives"' \\
|
|
1788
|
+
| ish chat endpoint update ep-abc --endpoint-config -
|
|
1789
|
+
\`\`\`
|
|
1790
|
+
|
|
1791
|
+
\`get --verbose\` (or piped) emits the round-trippable envelope
|
|
1792
|
+
\`{id, name, isTunnelBacked, config}\` — exactly what
|
|
1793
|
+
\`update --endpoint-config -\` accepts. Field-shorthand flags win on
|
|
1794
|
+
conflict with \`--endpoint-config\`.
|
|
1795
|
+
|
|
1796
|
+
### Body template placeholders
|
|
1797
|
+
|
|
1798
|
+
The renderer expands these tokens at request time:
|
|
1799
|
+
|
|
1800
|
+
- \`{{action.text}}\`: the persona's outgoing user message this turn.
|
|
1801
|
+
- \`{{history}}\`: past turns as \`[{role, content}, ...]\`. Past
|
|
1802
|
+
turns only; current turn is in \`{{action.text}}\`.
|
|
1803
|
+
- \`{{history_with_current}}\`: \`{{history}}\` plus a synthetic
|
|
1804
|
+
\`{role: "user", content: action.text}\` at the tail. **Use this for
|
|
1805
|
+
OpenAI-shape bots that take a single \`messages: [...]\` array
|
|
1806
|
+
containing prior turns and the current user message.**
|
|
1807
|
+
- \`{{turn.role}}\` / \`{{turn.text}}\`: per-turn expansion. Place
|
|
1808
|
+
one element with these tokens inside an array literal; the
|
|
1809
|
+
renderer expands it to one entry per past turn.
|
|
1810
|
+
- \`{{tester.name}}\` / \`{{tester.locale}}\`: persona attributes.
|
|
1811
|
+
- \`{{conversation_id}}\`: bot-supplied session id (stateful mode).
|
|
1812
|
+
- \`{{secret:KEY}}\`: workspace secret (see below).
|
|
1813
|
+
|
|
1814
|
+
\`{{history_with_current}}\` lands the typical OpenAI/Anthropic/Pollinations shape:
|
|
1815
|
+
|
|
1816
|
+
\`\`\`json
|
|
1817
|
+
{
|
|
1818
|
+
"model": "gpt-4o-mini",
|
|
1819
|
+
"messages": "{{history_with_current}}"
|
|
1820
|
+
}
|
|
1821
|
+
\`\`\`
|
|
1822
|
+
|
|
1823
|
+
### Auth via workspace secrets
|
|
1824
|
+
|
|
1825
|
+
For bots behind an API key, store the value as a workspace secret
|
|
1826
|
+
once and reference it from the endpoint's headers:
|
|
1827
|
+
|
|
1828
|
+
\`\`\`
|
|
1829
|
+
printf %s "$GROQ_KEY" | ish secret set GROQ_KEY --value-stdin
|
|
1830
|
+
ish chat endpoint update ep-abc --endpoint-config - <<'EOF'
|
|
1831
|
+
{ "config": { "outgoing": { "headers": { "Authorization": "Bearer {{secret:GROQ_KEY}}" } } } }
|
|
1832
|
+
EOF
|
|
1833
|
+
\`\`\`
|
|
1834
|
+
|
|
1835
|
+
The renderer resolves \`{{secret:GROQ_KEY}}\` from the workspace
|
|
1836
|
+
secret store at dispatch time. Missing keys render empty, which
|
|
1837
|
+
typically surfaces as a 401 from the bot. That's an actionable signal.
|
|
1838
|
+
|
|
1839
|
+
See \`concepts/secret\` for the full set of input modes
|
|
1840
|
+
(\`--value-file\`, \`--value-stdin\`, positional) and the reserved-key
|
|
1841
|
+
list.
|
|
1842
|
+
|
|
1843
|
+
## 2. Smoke test the connection
|
|
1844
|
+
|
|
1845
|
+
Before launching a study, verify the bot answers cleanly:
|
|
1846
|
+
|
|
1847
|
+
\`\`\`
|
|
1848
|
+
ish chat endpoint test ep-abc -m "Hello"
|
|
1849
|
+
\`\`\`
|
|
1850
|
+
|
|
1851
|
+
Output:
|
|
1852
|
+
\`\`\`json
|
|
1853
|
+
{
|
|
1854
|
+
"success": true,
|
|
1855
|
+
"text": "Hi! How can I help?",
|
|
1856
|
+
"conversation_id": "...",
|
|
1857
|
+
"slots": [...],
|
|
1858
|
+
"references": [...],
|
|
1859
|
+
"bot_latency_ms": 240,
|
|
1860
|
+
"end_of_conversation": false
|
|
1861
|
+
}
|
|
1862
|
+
\`\`\`
|
|
1863
|
+
|
|
1864
|
+
For tunnel-backed endpoints (\`isTunnelBacked: true\`), the CLI
|
|
1865
|
+
runs a tunnel pre-flight against \`/connect/active\` first and
|
|
1866
|
+
exits \`5\` with \`error_kind: "TunnelInactive"\` when no tunnel is
|
|
1867
|
+
running. Run \`ish connect <port>\` in another shell first, then
|
|
1868
|
+
retry.
|
|
1869
|
+
|
|
1870
|
+
For stateful endpoints, thread the conversation across script
|
|
1871
|
+
invocations:
|
|
1872
|
+
|
|
1873
|
+
\`\`\`
|
|
1874
|
+
CID=$(ish chat endpoint test ep-abc -m "Hi" | jq -r .conversation_id)
|
|
1875
|
+
ish chat endpoint test ep-abc -m "Tell me more" --conversation-id "$CID"
|
|
1876
|
+
\`\`\`
|
|
1877
|
+
|
|
1878
|
+
For multi-turn validation use \`ish study run --sample 1\` against
|
|
1879
|
+
a draft study (next phase).
|
|
1880
|
+
|
|
1881
|
+
## 3. Run a chat-modality study
|
|
1882
|
+
|
|
1883
|
+
Use the existing study flow with the new chat flags. \`study create\`
|
|
1884
|
+
fetches the saved endpoint and embeds its config inline at
|
|
1885
|
+
\`iteration.details.endpoint\` plus the lineage id at
|
|
1886
|
+
\`iteration.details.chatbot_endpoint_id\`:
|
|
1887
|
+
|
|
1888
|
+
\`\`\`
|
|
1889
|
+
ish study create \\
|
|
1890
|
+
--modality chat \\
|
|
1891
|
+
--endpoint ep-abc \\
|
|
1892
|
+
--name "Sign-up Q1" \\
|
|
1893
|
+
--assignment "Sign up:Try to sign up"
|
|
1894
|
+
\`\`\`
|
|
1895
|
+
|
|
1896
|
+
Or pass an inline config when there's no saved endpoint to reference
|
|
1897
|
+
(mutually exclusive with \`--endpoint\`):
|
|
1898
|
+
|
|
1899
|
+
\`\`\`
|
|
1900
|
+
cat ./bot-config.json | ish study create \\
|
|
1901
|
+
--modality chat --endpoint-config - \\
|
|
1902
|
+
--name "Sign-up Q1" --assignment "Sign up:Try to sign up"
|
|
1903
|
+
\`\`\`
|
|
1904
|
+
|
|
1905
|
+
Optional \`--max-turns <n>\` (default 12) caps the chat per tester.
|
|
1906
|
+
|
|
1907
|
+
Audience size is set at run time. Use \`--sample <N>\` to pick N
|
|
1908
|
+
random simulatable profiles, or \`--all\` for the full pool.
|
|
1909
|
+
\`--profile <id>\` is also supported for explicit selection:
|
|
1910
|
+
\`\`\`
|
|
1911
|
+
ish study run stu-xyz --sample 5 --wait
|
|
1912
|
+
\`\`\`
|
|
1913
|
+
|
|
1914
|
+
Pull raw interactions:
|
|
1915
|
+
\`\`\`
|
|
1916
|
+
ish study results stu-xyz --json | jq '.interactions'
|
|
1917
|
+
\`\`\`
|
|
1918
|
+
|
|
1919
|
+
Note: chat is currently excluded from the LLM-analysis route; the
|
|
1920
|
+
results call returns raw interactions, not an analyzed summary.
|
|
1921
|
+
|
|
1922
|
+
## Iteration shortcuts
|
|
1923
|
+
|
|
1924
|
+
Add a chat iteration to an existing chat study post-hoc. The
|
|
1925
|
+
iteration type is inherited from the parent study's modality —
|
|
1926
|
+
no \`--type\` flag is needed:
|
|
1927
|
+
|
|
1928
|
+
\`\`\`
|
|
1929
|
+
ish iteration create --study stu-xyz --endpoint ep-abc --max-turns 10
|
|
1930
|
+
ish iteration create --study stu-xyz --endpoint-config ./bot.json
|
|
1931
|
+
\`\`\`
|
|
1932
|
+
|
|
1933
|
+
Same flag set as \`study create\`'s chat shortcut.
|
|
1934
|
+
|
|
1935
|
+
## Active-endpoint convention
|
|
1936
|
+
|
|
1937
|
+
\`ish chat endpoint use <id>\` writes the endpoint to
|
|
1938
|
+
\`~/.ish/config.json\` (\`chat_endpoint\` key). After that, every
|
|
1939
|
+
\`chat endpoint *\` verb that takes \`[endpoint-id]\` defaults to the
|
|
1940
|
+
active endpoint when the positional is omitted:
|
|
1941
|
+
|
|
1942
|
+
\`\`\`
|
|
1943
|
+
ish chat endpoint use ep-abc
|
|
1944
|
+
ish chat endpoint test -m "Hello" # uses ep-abc
|
|
1945
|
+
ish chat endpoint get --verbose # uses ep-abc
|
|
1946
|
+
\`\`\`
|
|
1947
|
+
|
|
1948
|
+
Mirrors \`workspace use\` / \`study use\` / \`ask use\`.
|
|
1949
|
+
|
|
1950
|
+
## Common errors
|
|
1951
|
+
|
|
1952
|
+
- \`error_kind: "TunnelInactive"\` (exit 5) — tunnel-backed endpoint
|
|
1953
|
+
but no active tunnel. Run \`ish connect <port>\` first.
|
|
1954
|
+
- \`error_code: "validation_error"\` (exit 2) — usage error
|
|
1955
|
+
(mutually exclusive flags both set, missing required input,
|
|
1956
|
+
modality mismatch). The error envelope's \`valid_options\` field
|
|
1957
|
+
surfaces the accepted shape.
|
|
1958
|
+
- \`error_kind: "BotInvalidResponseError"\` (exit 1) — the bot
|
|
1959
|
+
responded but the configured \`incoming.*\` paths didn't resolve.
|
|
1960
|
+
Edit the response shape via \`update --endpoint-config\` or rerun
|
|
1961
|
+
\`init\` with a fresher curl sample.
|
|
1962
|
+
|
|
1963
|
+
## Related
|
|
1964
|
+
|
|
1965
|
+
- \`concepts/iteration\` — chat iteration shape (\`details.endpoint\`,
|
|
1966
|
+
\`details.chatbot_endpoint_id\`, \`details.max_turns\`).
|
|
1967
|
+
- \`concepts/study\` — modality + assignments + iteration nesting.
|
|
1968
|
+
- \`reference/json-mode\` — JSON output, error envelope, exit codes.
|
|
1969
|
+
- \`guides/first-study\` — the same pattern for an interactive
|
|
1970
|
+
modality study.
|
|
1971
|
+
`;
|
|
1356
1972
|
const PAGES = [
|
|
1357
1973
|
{
|
|
1358
1974
|
slug: "overview",
|
|
@@ -1375,7 +1991,7 @@ const PAGES = [
|
|
|
1375
1991
|
{
|
|
1376
1992
|
slug: "concepts/iteration",
|
|
1377
1993
|
title: "concept: iteration",
|
|
1378
|
-
description: "One configured run of a study (URL or
|
|
1994
|
+
description: "One configured run of a study (URL, media, or chat). Covers segments, segment labels, and HTML content.",
|
|
1379
1995
|
body: CONCEPT_ITERATION,
|
|
1380
1996
|
},
|
|
1381
1997
|
{
|
|
@@ -1426,6 +2042,12 @@ const PAGES = [
|
|
|
1426
2042
|
description: "Credentials for gated URLs (basic auth, cookies, login forms).",
|
|
1427
2043
|
body: CONCEPT_SITE_ACCESS,
|
|
1428
2044
|
},
|
|
2045
|
+
{
|
|
2046
|
+
slug: "concepts/secret",
|
|
2047
|
+
title: "concept: secret",
|
|
2048
|
+
description: "Per-workspace KV store for {{secret:KEY}} placeholders in chatbot endpoint headers.",
|
|
2049
|
+
body: CONCEPT_SECRET,
|
|
2050
|
+
},
|
|
1429
2051
|
{
|
|
1430
2052
|
slug: "concepts/run-verbs",
|
|
1431
2053
|
title: "concept: run verbs — study run vs ask run",
|
|
@@ -1462,6 +2084,12 @@ const PAGES = [
|
|
|
1462
2084
|
description: "Login → workspace → audience → study → iteration → run → results.",
|
|
1463
2085
|
body: GUIDE_FIRST_STUDY,
|
|
1464
2086
|
},
|
|
2087
|
+
{
|
|
2088
|
+
slug: "guides/chat",
|
|
2089
|
+
title: "guide: chat-modality studies",
|
|
2090
|
+
description: "Configure a chatbot endpoint, smoke test it, run a chat-modality study.",
|
|
2091
|
+
body: GUIDE_CHAT,
|
|
2092
|
+
},
|
|
1465
2093
|
];
|
|
1466
2094
|
const PAGES_BY_SLUG = new Map(PAGES.map((p) => [p.slug, p]));
|
|
1467
2095
|
export function listPages() {
|