npm - @ishlabs/cli - Versions diffs - 0.9.0 → 0.10.0 - Mend

@ishlabs/cli 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +54 -5
package/dist/commands/ask.d.ts +12 -0
package/dist/commands/ask.js +127 -2
package/dist/commands/chat.d.ts +17 -0
package/dist/commands/chat.js +589 -0
package/dist/commands/iteration.js +134 -14
package/dist/commands/secret.d.ts +20 -0
package/dist/commands/secret.js +246 -0
package/dist/commands/study-run.d.ts +38 -0
package/dist/commands/study-run.js +199 -80
package/dist/commands/study-tester.js +17 -2
package/dist/commands/study.js +309 -37
package/dist/commands/workspace.js +81 -0
package/dist/config.d.ts +3 -0
package/dist/connect.d.ts +3 -0
package/dist/connect.js +346 -22
package/dist/index.js +64 -6
package/dist/lib/alias-hydrate.d.ts +42 -0
package/dist/lib/alias-hydrate.js +175 -0
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +28 -1
package/dist/lib/auth.js +4 -2
package/dist/lib/chat-endpoint-formatters.d.ts +39 -0
package/dist/lib/chat-endpoint-formatters.js +104 -0
package/dist/lib/command-helpers.d.ts +18 -0
package/dist/lib/command-helpers.js +105 -3
package/dist/lib/docs.js +542 -17
package/dist/lib/modality.d.ts +42 -0
package/dist/lib/modality.js +192 -0
package/dist/lib/output.d.ts +41 -0
package/dist/lib/output.js +453 -19
package/dist/lib/paths.d.ts +1 -0
package/dist/lib/paths.js +3 -0
package/dist/lib/skill-content.js +182 -12
package/dist/lib/types.d.ts +15 -0
package/package.json +1 -1

package/dist/lib/docs.js CHANGED Viewed

@@ -98,11 +98,35 @@ ish workspace list
 ish workspace create --name "My product" --base-url https://example.com
 ish workspace use w-6ec        # set as active
 ish workspace get              # show the active workspace
+ish workspace info             # usage counters + plan caps (see below)
 ish workspace site-access status
 \`\`\`
+## Checking usage before destructive calls
+\`ish workspace info\` shows usage counters so an agent can branch on
+plan limits without burning a doomed \`study create\` attempt that
+returns \`error_code: usage_limit_reached\`.
+\`\`\`
+ish workspace info --json
+{
+  "studies_used": 2,
+  "studies_max": 3,
+  "testers_used": 0,
+  "testers_max": 3,
+  "tier": "free"
+}
+\`\`\`
+A \`null\` value on a \`*_max\` field means "unlimited" (paid tiers).
+Branch on \`studies_used >= studies_max\` before \`study create\`,
+likewise for \`testers_used\` before \`study run --sample\`.
 ## Related
+- \`concepts/secret\` — per-workspace secrets used in chatbot endpoint
+  headers via \`{{secret:KEY}}\` placeholders.
 - \`reference/billing-limits\` — \`maxProducts\` cap on workspace creation.
 `;
 const CONCEPT_STUDY = `# concept: study
@@ -130,25 +154,65 @@ its iterations. Think: a study is the recipe; an iteration is one batch.
 3. \`ish study run --sample 5 --country SE\` — dispatches simulations.
 4. \`ish study results\` or \`ish study wait\` to gather outputs.
-### One-shot variant
+### One-shot variant (inline iteration A)
+\`study create\` accepts a per-modality content flag and creates
+iteration A inline in the same call. Useful when you have a single
+test artifact and don't need to A/B iterations:
-\`study create\` now accepts \`--content-text\` (text modality) or
-\`--url\` (interactive modality) inline; iteration A is created in the
-same call. Useful when you have a single test artifact and don't need
-to A/B iterations:
+| Modality        | Inline content flag                                  |
+|-----------------|------------------------------------------------------|
+| \`interactive\` | \`--url <url>\` (\`--screen-format desktop\` is the default; pass \`mobile_portrait\` for mobile) |
+| \`text\`        | \`--content-text <text-or-@file>\`                   |
+| \`image\`       | \`--image-urls <url1,url2,...>\`                     |
+| \`video\`       | \`--content-url <url>\`                              |
+| \`audio\`       | \`--content-url <url>\`                              |
+| \`document\`    | \`--content-url <url>\`                              |
+| \`chat\`        | \`--endpoint <id>\` or \`--endpoint-config <file>\`  |
 \`\`\`
+# Text — single email artifact:
 ish study create --modality text --content-type email \\
   --name "Daily Brief concept" \\
   --assignment "Read:Read the email and react" \\
   --question "What stood out?" \\
   --content-text @./brief.md
-# → study + iteration A in one call, ready for \`study run\`.
-\`\`\`
-Without those flags no iteration is created — agents can no longer
-trip the old "empty A" footgun where \`study run\` silently targeted a
-placeholder.
+# Interactive — URL + screen format inline:
+ish study create --modality interactive \\
+  --name "HN scan" --url https://news.ycombinator.com \\
+  --screen-format desktop \\
+  --assignment "Skim:Skim the top stories"
+# Image A/B — two hero shots:
+ish study create --modality image \\
+  --name "Hero shots" \\
+  --image-urls "https://cdn.example.com/a.png,https://cdn.example.com/b.png" \\
+  --assignment "Compare:Which feels more premium?"
+# Video — one ad clip:
+ish study create --modality video \\
+  --name "Product ad smoke" \\
+  --content-url https://cdn.example.com/ad.mp4 \\
+  --assignment "Watch:Watch and react"
+# Document — a PDF whitepaper:
+ish study create --modality document \\
+  --name "Whitepaper read-through" \\
+  --content-url https://cdn.example.com/report.pdf \\
+  --assignment "Skim:Summarise the report"
+\`\`\`
+Without an inline content flag no iteration is created — agents can no
+longer trip the old "empty A" footgun where \`study run\` silently
+targeted a placeholder. Add \`iteration create\` later if you want B/C
+variants.
+**Local files**: \`--content-url\` and \`--image-urls\` on \`study create\`
+only accept http(s) URLs (the upload endpoint needs a study to upload
+against). For local files, use the 2-step flow: \`study create\` (no
+media flags) then \`iteration create --content-url ./file.mp4\` —
+\`iteration create\` auto-uploads.
 ## Status fields (read \`runtime_status\`, not \`status\`)
@@ -526,7 +590,23 @@ choice. \`pick_confidence\` is only present on rounds run with
   "picks":   { "A": 3, "B": 0 },
   "ratings": { "A": { "mean": 4.667, "n": 3 },
                "B": { "mean": 2.000, "n": 3 } },
-  "winner":  { "letter": "A", "count": 3, "tied": false }
+  "winner":  { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
+}
+\`\`\`
+\`winner.label\` is the picked variant's display label (matches
+\`mcp__ish__get_ask_results\` so the same JQ path works either side).
+\`winner.n\` is the completed-response sample the verdict was elected
+from (NOT the pick count itself); \`winner.confidence\` is a coarse
+summary: \`low\` for n<3 OR tied OR any errored response, \`medium\` for
+3 ≤ n < 10 with no errors, \`high\` for n ≥ 10 with no errors. When more
+than half of dispatched responses errored, the winner block is REPLACED
+by a refusal envelope and you should run \`ish ask retry\` first:
+\`\`\`json
+{
+  "picks":   { "A": 1, "B": 0 },
+  "winner":  { "refused": true, "reason": "error_rate_too_high", "errored": 4, "total": 5 }
 }
 \`\`\`
@@ -538,13 +618,31 @@ When the ask has 2+ rounds, \`ask results\` also includes a top-level
 \`\`\`json
 "cross_round_summary": {
   "rounds": [
-    { "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"letter": "B", "count": 2, "tied": false } },
-    { "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"letter": "A", "count": 3, "tied": false } }
+    { "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"label": "B", "count": 2, "tied": false, "n": 3, "confidence": "low" } },
+    { "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" } }
   ],
   "picks_delta": { "A": +2, "B": -2 }
 }
 \`\`\`
+## Retrying errored responses
+\`ish ask retry <ask> --round N\` re-dispatches only the ERRORED
+responses on a round. COMPLETED responses are left untouched (their
+answers are the source of truth). Use this after a partial failure
+(e.g. 4 of 5 testers errored on round 1) — fix the underlying cause,
+then \`ask retry\` to backfill the missing rows. Idempotent: zero-errored
+is a no-op. Add \`--wait\` to block until the retried round settles.
+\`\`\`bash
+$ ish ask retry a-d3e --round 1 --wait
+\`\`\`
+Errored responses carry \`error_message\` + \`error_kind\` (e.g.
+\`first_impression_llm_failed\`, \`interview_llm_failed\`,
+\`variant_preparation_failed\`) so an agent can branch on retry vs
+abort without parsing prose.
 ## Adding follow-up questions to a round
 \`ish ask add-questions --round N --questions ./qs.json\` is **additive
@@ -828,6 +926,72 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
     --username alice --password -
 \`\`\`
 `;
+const CONCEPT_SECRET = `# concept: secret
+Per-workspace key/value secrets. Used at chatbot-dispatch time to
+resolve \`{{secret:KEY}}\` placeholders in outgoing headers (or
+anywhere else in the rendered request). Common shape:
+\`\`\`
+Authorization: Bearer {{secret:GROQ_KEY}}
+X-API-Key:     {{secret:CUSTOMER_BOT_KEY}}
+\`\`\`
+Distinct from site-access (\`concepts/site-access\`): site-access is
+for interactive studies that gate a browser session against a UI;
+secrets here are for chatbot endpoints, where ish dispatches the
+HTTP request itself and the value lands in the wire request.
+## Verbs
+\`\`\`
+ish secret list                       # list KEYS only. Values never returned.
+ish secret set GROQ_KEY <value>       # positional value (warning: shell history)
+ish secret set GROQ_KEY --value-file ./grok.txt
+printf %s "$VAL" | ish secret set GROQ_KEY --value-stdin
+ish secret delete GROQ_KEY
+\`\`\`
+## Keep values out of shell history
+Three input modes. Pick the safest for the source:
+- **\`--value-stdin\`**: read from stdin. Best for piping from
+  another process (\`gcloud secrets ...\`, \`op read\`, etc.).
+- **\`--value-file <path>\`**: read from a file. Use \`-\` to read
+  from stdin (alias for \`--value-stdin\`).
+- **Positional value**: convenient but lands in shell history.
+  Avoid in scripts.
+Exactly one source per call; passing two is a usage error
+(\`error_code: validation_error\`, exit 2).
+## How resolution works
+At chatbot dispatch, the renderer looks up each \`{{secret:KEY}}\`
+in the workspace's secret store. Missing keys render as the empty
+string (no error). This matches the legacy ContextValueResolver
+behavior and lets templates degrade silently instead of breaking
+the request. The bot will most likely 401, which is a clear signal.
+Reserved KEYs (\`BASIC_AUTH_*\`, \`SESSION_COOKIE_*\`,
+\`LOGIN_*\`) are rejected client-side with a hint to use
+\`ish workspace site-access\` instead. Those keys are owned by
+the site-access flow and writing them as plain secrets would
+silently break that path.
+## When to use a secret vs. inline a header
+If the value is the same across every customer / environment and
+not sensitive (a vendor name, an API version), inline it in the
+endpoint config's \`headers\` field. If it's per-workspace, rotates,
+or shouldn't be committed to a config JSON file, use a secret.
+## Related
+- \`guides/chat\`: chat endpoint setup, including auth header examples.
+- \`concepts/site-access\`: credentials for browser-rendered study URLs.
+`;
 const CONCEPT_RUN_VERBS = `# concept: run verbs — \`study run\` vs \`ask run\`
 Both verbs dispatch simulations against an audience, but the lifecycle
@@ -1069,7 +1233,80 @@ The CLI guarantees these contracts so agents can chain safely:
   \`jq '.rounds[0].responses | length'\`.
 - **\`study run --json\` exposes tester handles.** The top-level
   \`tester_ids[]\` and \`tester_aliases[]\` arrays are the canonical
-  inputs to \`ish study poll/wait/cancel\`.
+  inputs to \`ish study poll/wait/cancel\`. The \`simulations[]\` array
+  is collapsed to one batch entry per study (M13) with nested
+  \`tester_ids[]\`, \`tester_aliases[]\`, \`job_ids[]\`, and \`count\` —
+  an N-sample dispatch is a single row, not N near-duplicate rows.
+- **\`study results --json\` includes per-answer sentiment** (M10).
+  Every \`interview_answers[].answers[]\` row carries \`sentiment\`
+  (the tester's session-level label from \`tester_summary.sentiment\`),
+  and every \`testers[]\` row carries \`sentiment\` + \`comment\`. No
+  \`study tester <id>\` round-trip required.
+- **\`study results --summary\`** is a lean projection: counts +
+  sentiment histogram + per-tester {alias, status, sentiment, comment,
+  error_message}. Drops \`interview_answers\` and per-interaction
+  breakdowns. Cheapest "did this run land?" shape.
+- **\`study results --transcript <tester_id>\`** is the chat-modality
+  projection. Returns \`{tester_id, tester_alias, transcript: [...],
+  unique_bot_replies, tester_summary}\`. Each transcript entry is
+  \`{role, text, turn_index, ...}\` — bot turns add \`failure\`
+  (set when the dispatch crashed); tester turns add \`action_type\`,
+  \`option_label\`, and \`sentiment\`. \`text\` is null on tester
+  turns whose action carries no text (\`select_option\`,
+  \`ignore_offered\`); read intent from \`action_type\` +
+  \`option_label\`. Same shape as the MCP \`get_chat_transcript\`
+  tool. \`unique_bot_replies = 1\` on a multi-turn run is the M2 loop
+  signature.
+- **\`study tester --summary\`** drops the action timeline and
+  returns just \`{tester, interaction_count, sentiment, comment,
+  error_message?, error_kind?}\`.
+- **\`study poll\` honors the active study.** Pass no \`--study\`
+  flag and it falls back to the active study (set by
+  \`ish study use\`), parity with \`study results\` /
+  \`study wait\` / \`study run\`.
+- **\`iteration get --json\` testers carry \`alias\` + \`name\`** (M12).
+  Same identifying triple as \`study results --json\`'s tester rows.
+- **\`ask results --json\` keeps \`variant_pick_id\` on every response**
+  (C5-Bug4). It's the load-bearing field for "who picked what" — no
+  \`--verbose\` required. Same logic on \`ask get --json\`.
+- **Every verb's \`--help\` ends with a "Tips:" footer** naming
+  \`--get\` and \`--fields\`. If you're reaching for \`jq -r .x\` you
+  almost certainly wanted \`--get x\`.
+- **\`study run --wait\` returns \`error_code: "wait_timeout"\`**
+  (exit 5, retryable) when the wait timer expires — distinct from
+  the api-client's generic timeout / network / server families. The
+  envelope carries \`progress: {study_id, iteration_id?,
+  timeout_seconds, done, total, pending, rows[]}\` so the agent
+  can resume by polling rather than re-dispatching. Same shape on
+  \`study wait\` (single-tester rows[] has length 1).
+- **\`study run\` accepts \`--dispatch-timeout <s>\`** (default 120)
+  for the per-POST testers/batch + simulation/start budget. On
+  timeout (or any dispatch failure), the error envelope includes
+  \`seeded_but_not_dispatched_ids[]\` + \`seeded_but_not_dispatched_aliases[]\`
+  listing the testers that exist server-side but didn't get
+  dispatched. Resume by polling those instead of re-running
+  \`study run\` (which would create another batch on top).
+- **\`ask run --new\` is non-idempotent and marked \`retryable: false\`**
+  on any failure — agents auto-retrying would create a duplicate
+  ask. The error envelope's \`suggestions\` includes a pointer to
+  \`ish ask list --workspace <id>\` so the agent can confirm
+  whether the resource already exists before retrying manually.
+- **\`ish connect --detach\` blocks until tunnel registration is
+  confirmed** (\`registered: true\` in the lock file). The
+  registration POST retries up to 4 times with exponential backoff
+  (~7s worst case) before giving up; the heartbeat re-registers
+  on a transient 404 instead of burning through the 3-strike
+  countdown. If the heartbeat path persistently 404s even after
+  several successful re-register cycles (D1: backend keeps
+  forgetting the connection between heartbeats), the CLI emits
+  a single stderr Notice and keeps the tunnel up rather than
+  dying — the route is the problem, not the tunnel. Subsequent
+  simulations may still hit \`TunnelInactive\` on dispatch in
+  that case; investigate the backend's /connect route.
+- **The "Could not verify token (network error)…" stderr warning
+  is gone** on green runs. The probe is best-effort; if there's a
+  real auth failure, the subsequent API call surfaces it with a
+  proper exit code 3.
 - **Study responses carry a derived \`runtime_status\` field**
   (\`draft | running | completed | completed_with_errors | cancelled\`).
   Prefer this over the raw \`status\` field — \`runtime_status\` is
@@ -1124,7 +1361,7 @@ The CLI guarantees these contracts so agents can chain safely:
       "picks":   { "A": 3, "B": 0 },
       "ratings": { "A": { "mean": 4.667, "n": 3 },
                    "B": { "mean": 2.000, "n": 3 } },
-      "winner":  { "letter": "A", "count": 3, "tied": false }
+      "winner":  { "label": "A", "count": 3, "tied": false, "n": 3, "confidence": "medium" }
     }
   }
   \`\`\`
@@ -1132,8 +1369,23 @@ The CLI guarantees these contracts so agents can chain safely:
   \`picks\` is present iff \`wants_pick\`; \`ratings\` is present iff
   \`wants_ratings\` and ≥ 1 rating was submitted; \`winner\` is the
   highest pick count (\`tied: true\` if multiple variants share the
-  top). \`mean\` is rounded to 3 decimal places; \`n\` is the rating
-  count for that variant.
+  top). \`winner.n\` is the completed-response sample;
+  \`winner.confidence\` is \`low\` for n<3 / tied / any errors,
+  \`medium\` for clean 3–9, \`high\` for clean 10+. When >50% of
+  dispatched responses errored the winner block is replaced by
+  \`{ refused: true, reason: "error_rate_too_high", errored, total }\` —
+  run \`ish ask retry <ask> --round N\` first. \`mean\` is rounded to 3
+  decimal places; \`n\` (on ratings) is the rating count for that variant.
+- **Errored ask responses carry \`error_message\` + \`error_kind\`.**
+  Each \`responses[]\` entry whose \`status: errored\` exposes the
+  classified failure (e.g. \`first_impression_llm_failed\`,
+  \`interview_llm_failed\`, \`variant_preparation_failed\`) so an agent
+  can branch on retry vs abort without parsing prose. Both fields are
+  \`null\` on \`pending\` and \`completed\` rows.
+- **\`ish ask retry <ask> --round N\` re-dispatches errored responses.**
+  COMPLETED rows are left untouched; only ERRORED responses are reset
+  to PENDING and re-run from scratch. Idempotent: zero-errored is a
+  no-op. Add \`--wait\` to block until the retry settles.
 - **\`ask results --json\` deduplicates tester profile snapshots.** When
   \`tester_profile\` and \`tester_profile_snapshot\` share all
   overlapping fields (the common case — they only diverge if the
@@ -1456,6 +1708,267 @@ upgrade or delete an existing resource to free up headroom.
 - \`concepts/profile\`   — \`maxCustomTesterProfiles\` gates profile creation.
 - \`reference/json-mode\` — full error envelope shape and exit codes.
 `;
+const GUIDE_CHAT = `# guide: chat-modality studies
+Goal: from a customer chatbot endpoint to a finished chat-modality
+study with parsed transcripts, end to end via the CLI. The flow has
+three phases: configure the endpoint, smoke test it, run a study.
+## 1. Configure the endpoint
+Two starting points:
+### From a curl example (recommended for first-time setup)
+The agent has a curl request that talks to the customer's bot. Save
+it to a file and run \`init\`:
+\`\`\`
+ish chat endpoint init \\
+    --from-curl ./bot.curl \\
+    --name my-bot
+\`\`\`
+\`init\` posts the curl to \`/chat/auto-detect-shape\`, infers the
+config (URL, method, headers, body template, response paths,
+mode, async-poll if applicable), and saves it as a chatbot endpoint
+resource. Output JSON shape:
+\`\`\`json
+{
+  "success": true,
+  "saved": true,
+  "endpoint_id": "ep_abc",
+  "alias": "ep-abc",
+  "config": { /* full ChatbotEndpointConfig */ },
+  "tunnel_backed": true,
+  "tunnel_backed_detected": true,
+  "confidence": "high",
+  "explanation": "...",
+  "warnings": []
+}
+\`\`\`
+For local bots (URL host is \`localhost\` / \`127.0.0.1\` /
+\`0.0.0.0\`), \`tunnel_backed\` is auto-set to \`true\`. Override
+explicitly with \`--tunnel-backed\` / \`--no-tunnel-backed\`.
+Pass \`--no-save\` to inspect the inferred config without persisting.
+### From a hand-written config
+\`\`\`
+ish chat endpoint create --endpoint-config ./bot-config.json --name "my-bot"
+\`\`\`
+The file is the bare \`ChatbotEndpointConfig\` shape (or a full
+endpoint envelope with \`id\` / \`name\` / \`config\` keys —
+\`.config\` is extracted automatically). Pipe from stdin via \`-\`.
+### Editing a saved endpoint
+The dialog and the CLI both PUT the full config to
+\`/chatbot-endpoints/{id}\` on save (no patch semantics). The CLI
+exposes that round-trip cleanly:
+\`\`\`
+# Single-field edits via shorthand flags
+ish chat endpoint update ep-abc --name "Production support bot"
+ish chat endpoint update ep-abc --url https://api.example.com/v2/chat
+ish chat endpoint update ep-abc --mode stateless
+ish chat endpoint update ep-abc --tunnel-backed       # or --no-tunnel-backed
+# Richer edits via fetch | jq | replace
+ish chat endpoint get ep-abc --verbose \\
+  | jq '.config.outgoing.headers["X-API-Key"] = "{{secret:KEY}}"' \\
+  | ish chat endpoint update ep-abc --endpoint-config -
+ish chat endpoint get ep-abc --verbose \\
+  | jq '.config.incoming.slotsContainerPaths += ["response.options"]
+        | .config.incoming.slotsKindHints["response.options"] = "alternatives"' \\
+  | ish chat endpoint update ep-abc --endpoint-config -
+\`\`\`
+\`get --verbose\` (or piped) emits the round-trippable envelope
+\`{id, name, isTunnelBacked, config}\` — exactly what
+\`update --endpoint-config -\` accepts. Field-shorthand flags win on
+conflict with \`--endpoint-config\`.
+### Body template placeholders
+The renderer expands these tokens at request time:
+- \`{{action.text}}\`: the persona's outgoing user message this turn.
+- \`{{history}}\`: past turns as \`[{role, content}, ...]\`. Past
+  turns only; current turn is in \`{{action.text}}\`.
+- \`{{history_with_current}}\`: \`{{history}}\` plus a synthetic
+  \`{role: "user", content: action.text}\` at the tail. **Use this for
+  OpenAI-shape bots that take a single \`messages: [...]\` array
+  containing prior turns and the current user message.**
+- \`{{turn.role}}\` / \`{{turn.text}}\`: per-turn expansion. Place
+  one element with these tokens inside an array literal; the
+  renderer expands it to one entry per past turn.
+- \`{{tester.name}}\` / \`{{tester.locale}}\`: persona attributes.
+- \`{{conversation_id}}\`: bot-supplied session id (stateful mode).
+- \`{{secret:KEY}}\`: workspace secret (see below).
+\`{{history_with_current}}\` lands the typical OpenAI/Anthropic/Pollinations shape:
+\`\`\`json
+{
+  "model": "gpt-4o-mini",
+  "messages": "{{history_with_current}}"
+}
+\`\`\`
+### Auth via workspace secrets
+For bots behind an API key, store the value as a workspace secret
+once and reference it from the endpoint's headers:
+\`\`\`
+printf %s "$GROQ_KEY" | ish secret set GROQ_KEY --value-stdin
+ish chat endpoint update ep-abc --endpoint-config - <<'EOF'
+{ "config": { "outgoing": { "headers": { "Authorization": "Bearer {{secret:GROQ_KEY}}" } } } }
+EOF
+\`\`\`
+The renderer resolves \`{{secret:GROQ_KEY}}\` from the workspace
+secret store at dispatch time. Missing keys render empty, which
+typically surfaces as a 401 from the bot. That's an actionable signal.
+See \`concepts/secret\` for the full set of input modes
+(\`--value-file\`, \`--value-stdin\`, positional) and the reserved-key
+list.
+## 2. Smoke test the connection
+Before launching a study, verify the bot answers cleanly:
+\`\`\`
+ish chat endpoint test ep-abc -m "Hello"
+\`\`\`
+Output:
+\`\`\`json
+{
+  "success": true,
+  "text": "Hi! How can I help?",
+  "conversation_id": "...",
+  "slots": [...],
+  "references": [...],
+  "bot_latency_ms": 240,
+  "end_of_conversation": false
+}
+\`\`\`
+For tunnel-backed endpoints (\`isTunnelBacked: true\`), the CLI
+runs a tunnel pre-flight against \`/connect/active\` first and
+exits \`5\` with \`error_kind: "TunnelInactive"\` when no tunnel is
+running. Run \`ish connect <port>\` in another shell first, then
+retry.
+For stateful endpoints, thread the conversation across script
+invocations:
+\`\`\`
+CID=$(ish chat endpoint test ep-abc -m "Hi" | jq -r .conversation_id)
+ish chat endpoint test ep-abc -m "Tell me more" --conversation-id "$CID"
+\`\`\`
+For multi-turn validation use \`ish study run --sample 1\` against
+a draft study (next phase).
+## 3. Run a chat-modality study
+Use the existing study flow with the new chat flags. \`study create\`
+fetches the saved endpoint and embeds its config inline at
+\`iteration.details.endpoint\` plus the lineage id at
+\`iteration.details.chatbot_endpoint_id\`:
+\`\`\`
+ish study create \\
+    --modality chat \\
+    --endpoint ep-abc \\
+    --name "Sign-up Q1" \\
+    --assignment "Sign up:Try to sign up"
+\`\`\`
+Or pass an inline config when there's no saved endpoint to reference
+(mutually exclusive with \`--endpoint\`):
+\`\`\`
+cat ./bot-config.json | ish study create \\
+    --modality chat --endpoint-config - \\
+    --name "Sign-up Q1" --assignment "Sign up:Try to sign up"
+\`\`\`
+Optional \`--max-turns <n>\` (default 12) caps the chat per tester.
+Audience size is set at run time. Use \`--sample <N>\` to pick N
+random simulatable profiles, or \`--all\` for the full pool.
+\`--profile <id>\` is also supported for explicit selection:
+\`\`\`
+ish study run stu-xyz --sample 5 --wait
+\`\`\`
+Pull raw interactions:
+\`\`\`
+ish study results stu-xyz --json | jq '.interactions'
+\`\`\`
+Note: chat is currently excluded from the LLM-analysis route; the
+results call returns raw interactions, not an analyzed summary.
+## Iteration shortcuts
+Add a chat iteration to an existing chat study post-hoc. The
+iteration type is inherited from the parent study's modality —
+no \`--type\` flag is needed:
+\`\`\`
+ish iteration create --study stu-xyz --endpoint ep-abc --max-turns 10
+ish iteration create --study stu-xyz --endpoint-config ./bot.json
+\`\`\`
+Same flag set as \`study create\`'s chat shortcut.
+## Active-endpoint convention
+\`ish chat endpoint use <id>\` writes the endpoint to
+\`~/.ish/config.json\` (\`chat_endpoint\` key). After that, every
+\`chat endpoint *\` verb that takes \`[endpoint-id]\` defaults to the
+active endpoint when the positional is omitted:
+\`\`\`
+ish chat endpoint use ep-abc
+ish chat endpoint test -m "Hello"        # uses ep-abc
+ish chat endpoint get --verbose          # uses ep-abc
+\`\`\`
+Mirrors \`workspace use\` / \`study use\` / \`ask use\`.
+## Common errors
+- \`error_kind: "TunnelInactive"\` (exit 5) — tunnel-backed endpoint
+  but no active tunnel. Run \`ish connect <port>\` first.
+- \`error_code: "validation_error"\` (exit 2) — usage error
+  (mutually exclusive flags both set, missing required input,
+  modality mismatch). The error envelope's \`valid_options\` field
+  surfaces the accepted shape.
+- \`error_kind: "BotInvalidResponseError"\` (exit 1) — the bot
+  responded but the configured \`incoming.*\` paths didn't resolve.
+  Edit the response shape via \`update --endpoint-config\` or rerun
+  \`init\` with a fresher curl sample.
+## Related
+- \`concepts/iteration\` — chat iteration shape (\`details.endpoint\`,
+  \`details.chatbot_endpoint_id\`, \`details.max_turns\`).
+- \`concepts/study\` — modality + assignments + iteration nesting.
+- \`reference/json-mode\` — JSON output, error envelope, exit codes.
+- \`guides/first-study\` — the same pattern for an interactive
+  modality study.
+`;
 const PAGES = [
     {
         slug: "overview",
@@ -1529,6 +2042,12 @@ const PAGES = [
         description: "Credentials for gated URLs (basic auth, cookies, login forms).",
         body: CONCEPT_SITE_ACCESS,
     },
+    {
+        slug: "concepts/secret",
+        title: "concept: secret",
+        description: "Per-workspace KV store for {{secret:KEY}} placeholders in chatbot endpoint headers.",
+        body: CONCEPT_SECRET,
+    },
     {
         slug: "concepts/run-verbs",
         title: "concept: run verbs — study run vs ask run",
@@ -1565,6 +2084,12 @@ const PAGES = [
         description: "Login → workspace → audience → study → iteration → run → results.",
         body: GUIDE_FIRST_STUDY,
     },
+    {
+        slug: "guides/chat",
+        title: "guide: chat-modality studies",
+        description: "Configure a chatbot endpoint, smoke test it, run a chat-modality study.",
+        body: GUIDE_CHAT,
+    },
 ];
 const PAGES_BY_SLUG = new Map(PAGES.map((p) => [p.slug, p]));
 export function listPages() {