npm - @ishlabs/cli - Versions diffs - 0.9.0 → 0.10.0 - Mend

@ishlabs/cli 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +54 -5
package/dist/commands/ask.d.ts +12 -0
package/dist/commands/ask.js +127 -2
package/dist/commands/chat.d.ts +17 -0
package/dist/commands/chat.js +589 -0
package/dist/commands/iteration.js +134 -14
package/dist/commands/secret.d.ts +20 -0
package/dist/commands/secret.js +246 -0
package/dist/commands/study-run.d.ts +38 -0
package/dist/commands/study-run.js +199 -80
package/dist/commands/study-tester.js +17 -2
package/dist/commands/study.js +309 -37
package/dist/commands/workspace.js +81 -0
package/dist/config.d.ts +3 -0
package/dist/connect.d.ts +3 -0
package/dist/connect.js +346 -22
package/dist/index.js +64 -6
package/dist/lib/alias-hydrate.d.ts +42 -0
package/dist/lib/alias-hydrate.js +175 -0
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +28 -1
package/dist/lib/auth.js +4 -2
package/dist/lib/chat-endpoint-formatters.d.ts +39 -0
package/dist/lib/chat-endpoint-formatters.js +104 -0
package/dist/lib/command-helpers.d.ts +18 -0
package/dist/lib/command-helpers.js +105 -3
package/dist/lib/docs.js +542 -17
package/dist/lib/modality.d.ts +42 -0
package/dist/lib/modality.js +192 -0
package/dist/lib/output.d.ts +41 -0
package/dist/lib/output.js +453 -19
package/dist/lib/paths.d.ts +1 -0
package/dist/lib/paths.js +3 -0
package/dist/lib/skill-content.js +182 -12
package/dist/lib/types.d.ts +15 -0
package/package.json +1 -1

package/dist/lib/skill-content.js CHANGED Viewed

@@ -112,11 +112,28 @@ ish study list
 ish iteration list --study s-b2c
 ish ask list
-# Define / configure
-ish study create --name "..." --modality interactive --assignment "..." --question "..."
-ish iteration create --url https://example.com
+# Define / configure (one-shot — iteration A inline)
+ish study create --modality interactive --name "..." --url https://example.com \
+  --assignment "..." --question "..."
+ish study create --modality image --name "..." \
+  --image-urls "https://cdn.example.com/a.png,https://cdn.example.com/b.png" \
+  --assignment "Compare:Which feels more premium?"
+ish study create --modality video --name "..." \
+  --content-url https://cdn.example.com/ad.mp4 --assignment "Watch:..."
+# Or 2-step (when you want to A/B iterations later, or upload local files)
+ish study create --name "..." --modality interactive --assignment "..."
+ish iteration create --url https://example.com  # auto-uploads local files
 ish profile generate --description "..." --count 5
+# Chat modality (talk to a customer chatbot). Audience size lives on
+# study run; study create defines the persistent shape only.
+ish chat endpoint init --from-curl ./bot.curl --name my-bot
+ish chat endpoint test my-bot -m "Hello"
+ish study create --modality chat --endpoint my-bot --assignment "Sign up:Try to sign up"
+# (then) ish study run --sample 5 --wait
 # Run
 ish study run --sample 5 --country SE --wait
 ish ask run --new --name "..." --prompt "..." --variant text:"A" --variant text:"B" --sample 30 --wants-pick --wait
@@ -223,6 +240,22 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
   Top-level field with per-round picks/winner snapshots and
   \`picks_delta\` (R1 → last). Don't diff two \`ask results\` calls by
   hand.
+- **\`ask retry <ask> --round N\` re-dispatches errored responses.**
+  Use after a partial failure (e.g. 4 of 5 testers errored on round
+  1). Only ERRORED rows are reset to PENDING and re-run; COMPLETED
+  rows are left untouched. Idempotent: zero-errored is a no-op. Add
+  \`--wait\` to block.
+- **Errored ask responses carry \`error_message\` + \`error_kind\`.**
+  Each \`responses[]\` entry whose \`status: errored\` exposes the
+  classified failure (e.g. \`first_impression_llm_failed\`,
+  \`interview_llm_failed\`, \`variant_preparation_failed\`). Branch on
+  \`error_kind\` to decide retry vs abort.
+- **\`winner\` carries \`n\` and \`confidence\`.** \`n\` is the completed
+  sample the verdict was elected from; \`confidence\` is \`low\` /
+  \`medium\` / \`high\` based on completion ratio + tied-ness. When
+  errored responses exceed 50%, the winner block is REPLACED by
+  \`{ refused: true, reason: "error_rate_too_high", errored, total }\`
+  — run \`ask retry\` first.
 - **\`--workspace\` works at the program root AND every subcommand.**
   \`ish --workspace w-6ec study list\` and \`ish study list --workspace
   w-6ec\` are equivalent; if both are passed, the subcommand-level
@@ -348,14 +381,17 @@ ish profile generate \\
     --description "Tech-savvy millennials in the US who use mobile banking" \\
     --count 3
-# 4. Define the study
+# 4. Define the study + iteration A in one call (one-shot path).
+#    The same shape works for image (--image-urls), video / audio /
+#    document (--content-url <url>), and chat (--endpoint <id>).
 ish study create --name "Onboarding UX" --modality interactive \\
+    --url https://example.com --screen-format desktop \\
     --assignment "Sign up:Complete the signup flow" \\
     --question "How easy was it?"
 ish study use s-…
-# 5. Configure an iteration with the URL under test
-ish iteration create --url https://example.com
+# (Optional) add a B variant later instead of inline:
+# ish iteration create --url https://example.com/v2
 # 6. Run, blocking until done
 ish study run --all --wait
@@ -379,7 +415,7 @@ ish ask run --new --name "hero shots" \\
 # Read the verdict directly — no comment-parsing required:
 ish ask results --json | jq '.rounds[0].aggregates'
 # → { "picks": { "A": 22, "B": 8 },
-#     "winner": { "letter": "A", "count": 22, "tied": false } }
+#     "winner": { "label": "A", "count": 22, "tied": false, "n": 30, "confidence": "high" } }
 \`\`\`
 For \`--wants-pick\` / \`--wants-ratings\` rounds, \`ask results --json\`
@@ -480,7 +516,87 @@ URL=$(jq -r 'select(.status=="connected") | .tunnel_url' /tmp/ish-tunnel.log | h
 ish iteration create --url "$URL"
 \`\`\`
-## 7. Display-vs-capture: a script that does both
+## 7. Chat-modality study (drive a chatbot endpoint)
+Goal: configure a customer chatbot endpoint, smoke test it, and run
+a chat-modality study end to end. The CLI talks to the endpoint
+through whatever transport it's configured for (sync / async-poll);
+local bots reach ish via \`ish connect\`.
+\`\`\`bash
+# 1. Author the endpoint from a curl example (or a ChatbotEndpointConfig file).
+#    Localhost URLs auto-flag is_tunnel_backed=true.
+ID=$(ish chat endpoint init --from-curl ./bot.curl --name my-bot \\
+       | jq -r .endpoint_id)
+# 2. Smoke test (single turn). Tunnel-backed endpoints need an active
+#    \`ish connect <port>\` first; otherwise this exits 5 with
+#    error_kind="TunnelInactive".
+ish chat endpoint test "$ID" -m "Hello"
+# → { "success": true, "text": "Hi! How can I help?", "conversation_id": "...",
+#     "slots": [...], "bot_latency_ms": 240 }
+# 3. (Optional) iterate on the config — full-replace via stdin or
+#    one-liner shorthand. Mirrors the editor dialog's PUT contract.
+ish chat endpoint update "$ID" --name "Production support bot"
+ish chat endpoint get "$ID" --verbose \\
+  | jq '.config.incoming.slotsContainerPaths += ["response.options"]' \\
+  | ish chat endpoint update "$ID" --endpoint-config -
+# 4. Run a chat-modality study referencing the endpoint. Audience size
+#    is set on study run, not study create (--sample, --all, --profile).
+STUDY=$(ish study create --modality chat --endpoint "$ID" \\
+          --name "Sign-up Q1" --assignment "Sign up:Try to sign up" \\
+        | jq -r .id)
+ish study run --study "$STUDY" --sample 5 --wait
+ish study results "$STUDY" --json | jq '.testers'
+\`\`\`
+For stateful bots, thread \`conversation_id\` across single-turn
+test invocations:
+\`\`\`bash
+CID=$(ish chat endpoint test my-bot -m "Hi" | jq -r .conversation_id)
+ish chat endpoint test my-bot -m "Tell me more" --conversation-id "$CID"
+\`\`\`
+For OpenAI-shape bots that take a single \`messages: [...]\` array
+of prior turns plus the current user message, use the
+\`{{history_with_current}}\` placeholder in the body template
+(\`{ "messages": "{{history_with_current}}" }\`). Auto-detect emits
+this automatically when it sees an OpenAI-shape sample.
+For bots behind an API key, store the key as a workspace secret
+once and reference it from headers:
+\`\`\`bash
+printf %s "$GROQ_KEY" | ish secret set GROQ_KEY --value-stdin
+ish chat endpoint update "$ID" --endpoint-config - <<'EOF'
+{ "config": { "outgoing": { "headers": { "Authorization": "Bearer {{secret:GROQ_KEY}}" } } } }
+EOF
+\`\`\`
+Endpoint editing: \`get --verbose\` emits a round-trippable
+\`{id, name, isTunnelBacked, config}\` envelope that pipes directly
+into \`update --endpoint-config -\`. Field-shorthand flags
+(\`--name\`, \`--url\`, \`--method\`, \`--mode\`,
+\`--tunnel-backed\` / \`--no-tunnel-backed\`) cover one-liner edits
+without round-tripping.
+Failed chat workers surface their error in
+\`study results --json\` under \`testers[].error_message\` and
+also in \`study poll --json\`. Branch on it instead of treating
+\`interaction_count: 0\` as a generic failure.
+Pre-flight tip: \`ish workspace info\` exposes
+\`{studies_used, studies_max, testers_used, testers_max, tier}\` so
+you can branch on plan caps before \`study create\` returns
+\`error_code: usage_limit_reached\`.
+The full reference is at \`ish docs get-page guides/chat\`,
+secrets are at \`ish docs get-page concepts/secret\`.
+## 8. Display-vs-capture: a script that does both
 Goal: drive an A/B in a script, capture aliases without \`jq\`, and
 still show the human a readable result table at the end.
@@ -496,8 +612,8 @@ ASK=$(ish ask create --new --name "tagline AB" \\
 # Wait silently — exit code is what matters here.
 ish ask wait "$ASK" --timeout 600 --quiet
-# Capture the winner letter for downstream branching:
-WINNER=$(ish ask results "$ASK" --get rounds.aggregates.winner.letter)
+# Capture the winner label for downstream branching:
+WINNER=$(ish ask results "$ASK" --get rounds.aggregates.winner.label)
 echo "Winning variant: $WINNER"
 # Display mode — show the user the full results table even though
@@ -514,7 +630,33 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
 - Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
 - After \`ish study run --json\`, the testers you just dispatched are at
   \`.tester_aliases[]\` (and \`.tester_ids[]\` for UUIDs). Pass these to
-  \`ish study poll/wait/cancel <tester_id>\`.
+  \`ish study poll/wait/cancel <tester_id>\`. The \`simulations[]\` array
+  is collapsed to one batch entry per study with nested
+  \`tester_ids[]\` / \`tester_aliases[]\` / \`job_ids[]\` so an N-sample
+  batch is a single row, not N near-duplicate rows.
+- \`ish study poll\` honors the active study set by \`ish study use\` —
+  pass no \`--study\` flag and it polls the active study (parity with
+  \`study results\` / \`study wait\` / \`study run\`).
+- \`ish study results --json\` includes per-answer \`sentiment\` (the
+  tester's session-level sentiment label) on every \`interview_answers[]
+  .answers[]\` row, plus \`sentiment\` + \`comment\` on every
+  \`testers[]\` row. No need to fetch \`study tester <id>\` per row.
+- \`ish study results --summary --json\` drops the interview_answers
+  payload and gives you counts + sentiment + per-tester
+  {alias, status, sentiment, comment}. The cheapest "did this run land?"
+  shape.
+- \`ish study results --transcript <tester_id> --json\` is the
+  chat-modality projection: a flat \`transcript[]\` of {role, text,
+  turn_index, action_type?, option_label?, sentiment?, failure?} with a
+  \`unique_bot_replies\` count (1 on a multi-turn run = the M2 loop
+  signature). Same shape as the MCP \`get_chat_transcript\` tool.
+- \`ish study tester <id> --summary --json\` drops the action timeline
+  and returns just {tester, sentiment, comment, error_message}.
+- \`ish ask results --json\` keeps \`variant_pick_id\` on every
+  response without needing \`--verbose\` — it's the load-bearing field
+  for "who picked what". Same logic on \`ask get\`.
+- \`ish iteration get --json\` testers carry \`alias\` + \`name\` (M12
+  parity with \`study results --json\`).
 - Use \`--fields\` to keep JSON tight: \`ish study list --fields alias,name,status\`
 - Always pass \`--wait\` (or \`ish study wait\`) before reading
   \`ish study results\` — without it you may read partial data.
@@ -528,6 +670,27 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
   the JSON body to construct a recovery message. \`profile generate\` /
   \`study generate\` refuse the entire batch when the post-generation
   count would exceed the cap; re-issue with a smaller \`--count\`.
+- Every verb's \`--help\` ends with a "Tips:" footer naming \`--get\`
+  and \`--fields\`. If you're reaching for \`jq -r .x\` you almost
+  certainly wanted \`--get x\`.
+- \`ish study run --wait\` returns \`error_code: "wait_timeout"\`
+  on wait expiry (exit 5, retryable) — distinct from network /
+  server timeouts. The envelope carries \`progress\` so you can
+  resume by polling the listed testers instead of re-dispatching.
+  Same envelope on \`ish study wait\` and per-tester \`study wait\`.
+- \`ish study run\` accepts \`--dispatch-timeout <s>\` (default 120)
+  for the per-POST budget. On dispatch failure the error envelope
+  includes \`seeded_but_not_dispatched_ids[]\` /
+  \`seeded_but_not_dispatched_aliases[]\` — testers exist
+  server-side; resume by polling them, don't re-run \`study run\`.
+- \`ish ask run --new\` is non-idempotent and marked
+  \`retryable: false\` on any failure. If you do see one, run
+  \`ish ask list --workspace <id>\` first to check whether the
+  ask was created server-side before retrying manually.
+- \`ish connect --detach\` blocks until backend registration is
+  confirmed. The orphan-tunnel-on-startup-404 bug is fixed.
+- The \`Warning: Could not verify token (network error). Proceeding
+  anyway.\` stderr line is gone on green runs.
 ## Common reshaping → use the CLI, not jq/python
@@ -543,6 +706,11 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
 | Count responses on a round                | \`--json \\| jq '.rounds[0].responses \\| length'\` | \`ish ask get a-… --fields alias,rounds,responses_complete,responses_total\` |
 | Pick the A/B winner                       | \`--json \\| jq '.rounds[0].responses…'\` | \`ish ask results a-… --json\` then read \`.rounds[].aggregates.winner\` |
 | List of testers from \`study run\`        | \`--json \\| jq '.testers[].id'\`        | \`--get tester_aliases\` (or \`tester_ids\` for UUIDs)                |
+| Per-answer sentiment                      | \`--json \\| jq '...'\` per tester       | \`ish study results <id> --json\` (sentiment is on every answer row) |
+| "Did this run land?" headline             | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\`                          |
+| Chat transcript for one tester            | \`study tester --json\` + jq            | \`ish study results <id> --transcript <tester_id> --json\`           |
+| Tester headline only (no action timeline) | \`study tester --json\` + jq            | \`ish study tester <id> --summary --json\`                           |
+| Variant pick id on an ask response        | \`ask results --json --verbose\`        | \`ish ask results a-… --json\` (variant_pick_id is preserved)        |
 The bias here is intentional: \`ish\` ships shapes designed for agent
 consumption. If you find yourself reaching for \`jq\` or \`python\` to
@@ -564,13 +732,15 @@ ish <command> --help
 | Group       | Purpose                                         | Concept page                |
 |-------------|-------------------------------------------------|-----------------------------|
-| \`workspace\` | Top-level container (= product)                 | concepts/workspace          |
+| \`workspace\` | Top-level container (= product). \`info\` shows usage caps. | concepts/workspace |
 | \`study\`     | Persistent research artifact                    | concepts/study              |
 | \`iteration\` | One configured run of a study (URL or media)    | concepts/iteration          |
 | \`ask\`       | Lightweight reaction artifact                   | concepts/ask                |
 | \`profile\`   | Tester profiles + audience generation           | concepts/profile            |
 | \`source\`    | Upload sources for profile generation           | concepts/source             |
 | \`config\`    | Simulation configs (model, timing, retries)     | (run \`ish config --help\`)   |
+| \`chat\`      | Chat endpoint CRUD + smoke test (chat modality) | guides/chat                 |
+| \`secret\`    | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret           |
 | \`docs\`      | Offline docs for agents                         | (run \`ish docs --help\`)     |
 | \`init\`      | Drop this skill into a Claude Code / Codex /    | (run \`ish init --help\`)     |
 |             | Cursor / Cline / Roo project                    |                             |

package/dist/lib/types.d.ts CHANGED Viewed

@@ -265,12 +265,27 @@ export interface InterviewAnswer {
     answer: unknown;
     rationale?: string;
 }
+/**
+ * Pattern B — drill-in subset for a follow-up ask round.
+ *
+ * Filters the new round's audience to the testers who picked
+ * `picked_variant_id` on the 1-indexed prior `round`. Mirrors the
+ * backend's `AudienceSubset` model. Only valid on follow-up rounds —
+ * round 1 has no prior round to filter against. The backend rejects
+ * unresolvable subsets with a 422 carrying
+ * `error_kind: "audience_subset_invalid"`.
+ */
+export interface AudienceSubset {
+    round: number;
+    picked_variant_id: string;
+}
 export interface AskRoundInput {
     prompt: string;
     variants?: AskVariantInput[];
     wants_pick?: boolean;
     wants_ratings?: boolean;
     questions?: InterviewQuestion[];
+    audience_subset?: AudienceSubset;
 }
 export interface AskCreateInput {
     name: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ishlabs/cli",
-  "version": "0.9.0",
+  "version": "0.10.0",
   "description": "The command-line interface for ish",
   "type": "module",
   "bin": {