npm - @ishlabs/cli - Versions diffs - 0.8.2 → 0.8.4 - Mend

@ishlabs/cli 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +6 -0
package/dist/auth.d.ts +1 -0
package/dist/auth.js +12 -3
package/dist/commands/ask.js +59 -16
package/dist/commands/iteration.js +45 -11
package/dist/commands/profile.js +65 -12
package/dist/commands/study-run.js +49 -0
package/dist/commands/study-tester.js +5 -2
package/dist/commands/study.js +71 -16
package/dist/connect.js +7 -7
package/dist/index.js +119 -2
package/dist/lib/api-client.js +29 -7
package/dist/lib/command-helpers.d.ts +14 -0
package/dist/lib/command-helpers.js +40 -0
package/dist/lib/docs.js +430 -13
package/dist/lib/local-sim/install.d.ts +0 -7
package/dist/lib/local-sim/install.js +20 -13
package/dist/lib/output.js +437 -63
package/dist/lib/skill-content.js +102 -9
package/dist/lib/types.d.ts +3 -1
package/dist/upgrade.js +3 -3
package/package.json +3 -2

package/dist/lib/docs.js CHANGED Viewed

@@ -43,6 +43,8 @@ Two top-level run verbs:
 ## Where to look next
 - New here? \`ish docs get-page concepts/workspace\`, then \`concepts/study\`.
+- **Cold start?** Run \`ish status\` (alias \`ish whoami\`) — confirms login
+  and prints active workspace/study/ask. See \`concepts/active-context\`.
 - Running your first study? \`ish docs get-page guides/first-study\`.
 - Comparing study vs ask? \`ish docs get-page concepts/run-verbs\`.
 - Need machine-readable output? \`ish docs get-page reference/json-mode\`.
@@ -81,6 +83,10 @@ ish workspace use w-6ec        # set as active
 ish workspace get              # show the active workspace
 ish workspace site-access status
 \`\`\`
+## Related
+- \`reference/billing-limits\` — \`maxProducts\` cap on workspace creation.
 `;
 const CONCEPT_STUDY = `# concept: study
@@ -101,17 +107,63 @@ its iterations. Think: a study is the recipe; an iteration is one batch.
 ## Lifecycle
-1. \`ish study create --name "Onboarding UX" --modality interactive --assignment "Sign up:Complete the signup flow" --question "How easy was it?"\`
-2. \`ish iteration create --url https://example.com\` (creates the first iteration)
-3. \`ish study run --sample 5 --country SE\` (dispatches simulations)
+1. \`ish study create --name "Onboarding UX" --modality interactive --assignment "Sign up:Complete the signup flow" --question "How easy was it?"\` — creates the recipe with **zero iterations**.
+2. \`ish iteration create --url https://example.com\` — first iteration becomes label \`A\`.
+3. \`ish study run --sample 5 --country SE\` — dispatches simulations.
 4. \`ish study results\` or \`ish study wait\` to gather outputs.
+### One-shot variant
+\`study create\` now accepts \`--content-text\` (text modality) or
+\`--url\` (interactive modality) inline; iteration A is created in the
+same call. Useful when you have a single test artifact and don't need
+to A/B iterations:
+\`\`\`
+ish study create --modality text --content-type email \\
+  --name "Daily Brief concept" \\
+  --assignment "Read:Read the email and react" \\
+  --question "What stood out?" \\
+  --content-text @./brief.md
+# → study + iteration A in one call, ready for \`study run\`.
+\`\`\`
+Without those flags no iteration is created — agents can no longer
+trip the old "empty A" footgun where \`study run\` silently targeted a
+placeholder.
+## Status fields (read \`runtime_status\`, not \`status\`)
+Every study response carries two status-shaped fields:
+- \`status\` — the raw lifecycle column on the row, values
+  \`draft | running | completed | cancelled\`. Updated lazily; can
+  disagree with what the testers actually did.
+- \`runtime_status\` — derived by aggregating the iteration testers'
+  states. Values: \`draft | running | completed |
+  completed_with_errors | cancelled\`. **Never reports \`failed\` while
+  completed runs exist** (the Bk2 invariant). Prefer this for any
+  agent decision.
+The CLI also surfaces a \`status_inferred\` field + stderr warning when
+it detects raw-vs-derived inconsistencies. See \`reference/json-mode\`.
+## Generate vs create
+\`ish study generate --problem "..."\` runs an LLM-backed flow that
+picks a sensible modality from your brief and returns a
+\`modality_rationale\` field (≤30 words) explaining the choice.
+Override before adding iterations via
+\`ish study update <id> --modality text\` if the rationale shows the
+pick was wrong.
 ## Related
 - \`concepts/iteration\` — the unit of execution within a study.
 - \`concepts/assignment\` — task definition syntax.
 - \`concepts/questionnaire\` — question types and timing.
 - \`concepts/run-verbs\` — when to use \`study run\` vs \`ask run\`.
+- \`reference/billing-limits\` — \`maxStudiesPerProduct\` cap on study creation.
 `;
 const CONCEPT_ITERATION = `# concept: iteration
@@ -157,11 +209,42 @@ ish iteration list --study s-b2c
 ish iteration get i-d4e
 \`\`\`
+## No more auto-empty iteration A
+\`ish study create\` and \`ish study generate\` **do not auto-create
+iteration A** anymore (Pattern E remediation, ish-cli v0.8.x). The
+first explicit \`ish iteration create\` becomes label A, second is B,
+etc. Running \`ish study run\` on a study with zero iterations exits
+2 with a clear error pointing you to \`ish iteration create\`.
+If you do somehow run against an interactive iteration without a URL
+(or a media iteration without content), \`study run\` exits 2 with:
+\`\`\`
+Iteration "A" (i-...) has no URL configured yet. Add a URL with
+\`ish iteration create --study s-... --url <url>\` (or update the
+existing iteration via \`ish iteration update i-... --details-json '{...}'\`),
+then retry.
+\`\`\`
+Treat this as actionable, not transient — re-running won't change anything.
+## Default segmentation for text/image iterations
+For text-modality iterations created with just \`--content-text\` (and
+similarly \`--image-urls\` for image), the worker now synthesises a
+single whole-content section if no \`segmentation\` was supplied. This
+means a minimal \`ish iteration create --study s-XYZ --content-text
+"..."\` actually runs end-to-end without you needing to author a
+SegmentationConfig manually. Author your own segmentation when you
+want section-level reactions; otherwise the default just works.
 ## Related
 - \`concepts/study\` — the parent artifact.
 - \`concepts/run-verbs\` — how \`ish study run\` selects the iteration.
 - \`concepts/audience\` — how testers are picked for a run.
+- \`reference/billing-limits\` — \`maxIterationsPerStudy\` cap on iteration creation.
 `;
 const CONCEPT_ASSIGNMENT = `# concept: assignment
@@ -213,7 +296,7 @@ replaces the full assignment list — additive editing is not supported.
 const CONCEPT_QUESTIONNAIRE = `# concept: questionnaire
 The **questionnaire** is the list of \`interview_questions\` a tester
-answers before, during, or after their assignments. A study has 0..N
+answers before or after their assignments. A study has 0..N
 questions, each with a type and a timing.
 ## Question shape
@@ -221,12 +304,12 @@ questions, each with a type and a timing.
 \`\`\`json
 {
   "question": "How easy was checkout?",
-  "type": "slider",          // text | slider | likert | choice_single |
-                             // choice_multiple | number | …
-  "timing": "after",         // before | during | after
+  "type": "slider",          // text | slider | likert |
+                             // single-choice | multiple-choice | number
+  "timing": "after",         // before | after
   "min": 1, "max": 7, "step": 1,
   "labels": ["Hard", "Easy"],
-  "options": ["A", "B", "C"] // only for choice_*
+  "options": ["A", "B", "C"] // only for single-choice / multiple-choice
 }
 \`\`\`
@@ -289,8 +372,53 @@ ish ask run --prompt "And now which?" \\
 ish ask list
 ish ask get a-6ec --round 2
 ish ask results a-6ec
+ish ask results a-6ec --json | jq '.rounds[0].aggregates'
+\`\`\`
+## Reading the verdict
+For \`--wants-pick\` / \`--wants-ratings\` rounds, \`ask results --json\`
+includes an \`aggregates\` field per round so you don't have to parse
+prose:
+\`\`\`json
+{
+  "picks":   { "A": 3, "B": 0 },
+  "ratings": { "A": { "mean": 4.667, "n": 3 },
+               "B": { "mean": 2.000, "n": 3 } },
+  "winner":  { "letter": "A", "count": 3, "tied": false }
+}
 \`\`\`
+When the ask has 2+ rounds, \`ask results\` also includes a top-level
+\`cross_round_summary\` block with per-round picks/winner and a
+\`picks_delta\` (R1 → last round). Skip the manual diffing of two
+\`ask results\` calls.
+\`\`\`json
+"cross_round_summary": {
+  "rounds": [
+    { "round_number": 1, "picks": {"A": 1, "B": 2}, "winner": {"letter": "B", "count": 2, "tied": false } },
+    { "round_number": 2, "picks": {"A": 3, "B": 0}, "winner": {"letter": "A", "count": 3, "tied": false } }
+  ],
+  "picks_delta": { "A": +2, "B": -2 }
+}
+\`\`\`
+## Adding follow-up questions to a round
+\`ish ask add-questions --round N --questions ./qs.json\` is **additive
+by default**: prior phase-1 outputs (comment, pick, ratings) are
+preserved on every non-errored response, and the worker only answers
+the newly-added questions for each tester. Existing picks stay stable.
+Pass \`--redispatch-all\` for the legacy reset behavior — useful when a
+question is sufficiently different that you want fresh first
+impressions, not augmentation. Without that flag, agents iterating on
+copy can safely append questions without losing prior round results.
+See \`reference/json-mode\` for the full shape.
 ## Variant syntax
 \`--variant <type>:<value>[::label=<label>]\`
@@ -327,6 +455,15 @@ ish ask wait a-6ec --round 2 --timeout 600
 ish ask results a-6ec --round 1
 \`\`\`
+## \`add-questions\` is additive
+Appending questions to a completed round preserves prior data — variant
+comments, picks, ratings, and earlier-question answers all stay. Only
+the new question(s) get dispatched to the existing testers. Cost is
+roughly N phase-2 LLM calls instead of 2N (no phase-1 re-run). Errored
+responses are skipped entirely; completed responses flip to PENDING and
+re-finalize after the new question is answered.
 ## Related
 - \`concepts/ask\` — the parent artifact.
@@ -384,6 +521,7 @@ Expected JSON: \`{ "name": "...", "type": "ai", "gender": "female",
 - \`concepts/source\` — the inputs to \`profile generate\`.
 - \`concepts/audience\` — how profiles get selected into a run.
+- \`reference/billing-limits\` — \`maxCustomTesterProfiles\` cap on profile creation.
 `;
 const CONCEPT_SOURCE = `# concept: source
@@ -613,8 +751,8 @@ mode is **auto-enabled when stdout is piped**, so an agent rarely needs
 - \`--json\`            — force JSON output even on a TTY.
 - \`--fields a,b,c\`    — keep only these fields in JSON output (e.g.
                           \`alias,name,status\`). Filters per item only;
-                          paginated wrappers (\`{items, total, limit,
-                          offset}\`) keep their shape.
+                          list wrappers (\`{items, total, returned,
+                          limit, offset, has_more}\`) keep their shape.
 - \`--verbose\`          — include full UUIDs, timestamps, and (on
                           write paths) the full server payload instead
                           of the compact response.
@@ -625,9 +763,26 @@ mode is **auto-enabled when stdout is piped**, so an agent rarely needs
 The CLI guarantees these contracts so agents can chain safely:
-- **Lists keep their wrapper.** \`--fields\` strips per-item, never the
-  envelope. A paginated list with \`{items, total, limit, offset}\` will
-  always have those four keys.
+- **Every list response is a six-key envelope.** All
+  \`<entity> list --json\` responses (workspace, study, iteration, ask,
+  profile, config) return:
+  \`\`\`json
+  {
+    "items":    [...],
+    "total":    121,    // server-provided when paginated; else items.length
+    "returned": 50,     // items.length, always present
+    "limit":    50,
+    "offset":   0,
+    "has_more": true    // total > offset + returned
+  }
+  \`\`\`
+  When the server doesn't paginate, \`total = returned = limit\`,
+  \`offset = 0\`, \`has_more = false\` (synthesized client-side).
+  \`--fields\` strips per-item, never the envelope — those six keys are
+  always present. Use \`has_more\` to detect truncation rather than
+  counting items yourself.
 - **Write paths always include \`id\` AND \`alias\`.** Even with
   \`--fields\` set, you can identify the affected resource. Default
   write-path JSON is compact (\`{id, alias, name, updated_at,
@@ -635,9 +790,99 @@ The CLI guarantees these contracts so agents can chain safely:
 - **\`profile generate\` trims \`simulation_config\` by default** (~9×
   smaller than the raw response). Pass \`--include-simulation-config\`
   if you need it.
+- **\`<entity> get\` accepts multiple IDs.** \`profile get\`, \`study get\`,
+  \`iteration get\`, and \`ask get\` all take \`<ids...>\` — pass two or
+  more aliases (space- or comma-separated) and the response is a
+  \`{items:[...], total:N}\` envelope. Use this instead of piping
+  \`list --json\` to \`jq\`/\`python\` to filter by alias.
+- **Ask detail JSON includes denormalized counts** so agents don't
+  have to count nested arrays. \`ask get\`, \`ask create --wait\`,
+  \`ask run --wait\`, and \`ask wait --verbose\` all add:
+  \`\`\`json
+  {
+    "testers_count":      3,
+    "responses_total":    9,
+    "responses_complete": 9,
+    "rounds": [
+      { "responses_total": 3, "responses_complete": 3, "...": "..." }
+    ]
+  }
+  \`\`\`
+  \`responses_errored\` only appears when at least one response errored.
+  Use these instead of \`jq '.testers | length'\` /
+  \`jq '.rounds[0].responses | length'\`.
 - **\`study run --json\` exposes tester handles.** The top-level
   \`tester_ids[]\` and \`tester_aliases[]\` arrays are the canonical
   inputs to \`ish study poll/wait/cancel\`.
+- **Study responses carry a derived \`runtime_status\` field**
+  (\`draft | running | completed | completed_with_errors | cancelled\`).
+  Prefer this over the raw \`status\` field — \`runtime_status\` is
+  computed from the iteration testers' actual run state and never
+  reports \`failed\` while completed runs exist. Available on
+  \`study get\`, \`study results\`, and the response from
+  \`study generate\`. The CLI also surfaces a \`status_inferred\` field
+  alongside the raw \`status\` when it detects a partial-failure
+  inconsistency, plus a stderr warning ("Warning: study reports
+  status='failed' but N/M testers completed…").
+- **\`study generate --json\` includes a \`modality_rationale\`** —
+  one short sentence explaining why the LLM picked that modality. Use
+  it to detect mis-classifications (e.g. brief was a static concept doc
+  but rationale says "live UI flow") and override via
+  \`study update <id> --modality text\` before adding iterations.
+- **\`ask add-questions\` is additive by default.** Appending questions
+  preserves variant comments / picks / ratings / prior-question
+  answers; only the new question(s) get dispatched. Cost: roughly N
+  phase-2 LLM calls instead of 2N. Pass \`--redispatch-all\` for the
+  legacy reset behavior when you want fresh first impressions.
+- **\`ask results --json\` includes \`cross_round_summary\` for 2+
+  rounds.** Top-level field with per-round picks/winner snapshots and
+  a \`picks_delta\` (R1 → last round). Replaces hand-rolled diffing of
+  two \`ask results\` calls.
+- **No more auto-empty iteration A.** \`study create\` and
+  \`study generate\` no longer produce a placeholder iteration A. The
+  first explicit \`ish iteration create\` becomes label A.
+  \`study create\` now accepts \`--content-text\` (text) or \`--url\`
+  (interactive) inline so a single call yields a runnable study.
+  Running \`study run\` on a study with zero iterations exits 2 with
+  a suggestion to run \`ish iteration create\` first.
+- **Tester responses include \`error_message\`.** When a tester is
+  \`status: failed\`, the JSON exposes \`error_message: "<reason>"\` so
+  agents can act without drilling into logs. \`study results\` rolls
+  this up: top-level \`failed_count\`, plus per-tester \`error_message\`
+  in the \`testers[]\` array, and a "Failed testers" subsection in
+  human output. Empty when the tester succeeded.
+- **\`profile list\` emits a stderr pagination hint** when
+  \`has_more=true\` and stdout is human (TTY, not piped, not \`--quiet\`).
+  Format: "showing N–M of TOTAL; pass --offset M --limit N for more."
+  JSON consumers read \`has_more\` directly off the envelope.
+- **\`ask results --json\` adds an \`aggregates\` field per round.** For
+  rounds with \`wants_pick\`/\`wants_ratings\`, the CLI computes the
+  verdict locally so agents don't have to parse comment prose:
+  \`\`\`json
+  {
+    "aggregates": {
+      "picks":   { "A": 3, "B": 0 },
+      "ratings": { "A": { "mean": 4.667, "n": 3 },
+                   "B": { "mean": 2.000, "n": 3 } },
+      "winner":  { "letter": "A", "count": 3, "tied": false }
+    }
+  }
+  \`\`\`
+  \`picks\` is present iff \`wants_pick\`; \`ratings\` is present iff
+  \`wants_ratings\` and ≥ 1 rating was submitted; \`winner\` is the
+  highest pick count (\`tied: true\` if multiple variants share the
+  top). \`mean\` is rounded to 3 decimal places; \`n\` is the rating
+  count for that variant.
+- **\`ask results --json\` deduplicates tester profile snapshots.** When
+  \`tester_profile\` and \`tester_profile_snapshot\` share all
+  overlapping fields (the common case — they only diverge if the
+  profile was edited after dispatch), the snapshot is collapsed to
+  \`{snapshotted_at, snapshot_version, _matches_tester_profile: true}\`.
+  Use \`--verbose\` to keep both copies in full.
 ## Exit codes
@@ -774,6 +1019,166 @@ ish study results --json | jq .
 - Want a quick reaction test instead of an interactive study? Skip to
   \`ish docs get-page concepts/ask\`.
 `;
+const CONCEPT_ACTIVE_CONTEXT = `# concept: active context
+The CLI keeps a small amount of session state in \`~/.ish/config.json\`
+(or wherever \`ISH_HOME\` points) so commands don't need to repeat IDs:
+- \`access_token\` / \`refresh_token\` — the OAuth pair from \`ish login\`.
+- \`workspace\`  — set by \`ish workspace use <id>\`.
+- \`study\`      — set by \`ish study use <id>\`.
+- \`ask\`        — set by \`ish ask use <id>\`.
+Most commands fall back to these when their corresponding flag is
+omitted (\`--workspace\`, \`--study\`, \`--ask\`).
+## Inspecting active context
+\`ish status\` (alias: \`ish whoami\`) is the canonical way to see what's
+configured. **Run it as the first command on a cold start** — it
+confirms login, prints the active workspace/study/ask handles, and
+shows how long the token has left.
+\`\`\`bash
+ish status
+# User:       you@example.com  (token valid, expires in 47m)
+# Workspace:  Onboarding revamp (w-6ec)
+# Study:      —
+# Ask:        a-6ec "tagline AB"
+# Home:       /home/you/.ish
+# API:        https://api.ishlabs.io
+\`\`\`
+JSON shape (\`ish status --json\` or piped):
+\`\`\`json
+{
+  "user":      { "email": "...", "token_valid": true, "expires_in_seconds": 2820 },
+  "workspace": { "id": "...", "alias": "w-6ec", "name": "Onboarding revamp" },
+  "study":     null,
+  "ask":       { "id": "...", "alias": "a-6ec", "name": "tagline AB" },
+  "api_url":   "https://api.ishlabs.io",
+  "home":      "/home/you/.ish"
+}
+\`\`\`
+\`status\` does not error when the user is logged out — it returns
+\`user: null\` plus a \`hint\` field telling the caller to run
+\`ish login\`. Safe to run unconditionally at the start of any
+script or agent session.
+## Setting / clearing active context
+\`\`\`bash
+ish workspace use w-6ec        # set
+ish workspace use --clear      # clear
+ish study use s-b2c
+ish study use --clear
+ish ask use a-6ec
+ish ask use --clear
+\`\`\`
+## Overriding without persisting
+Every read command accepts \`--workspace <id>\`, \`--study <id>\`, or
+\`--ask <id>\` to override the saved active value for one invocation
+without touching the config. Useful for one-off pokes at another
+resource.
+\`--workspace\` is accepted on **every workspace-scoped subcommand**
+(\`ask\`, \`study\`, \`iteration\`, \`profile\`, \`source\` and their
+descendants). When workspace is inferable from the subject ID alias
+(e.g. \`ish ask delete a-6ec\`) the value is silently ignored — agents
+can pass it reflexively without tripping "unknown option" errors. Out
+of scope: \`workspace\`, \`config\`, \`docs\`, \`init\`, \`login\`,
+\`logout\`, \`whoami\`, \`upgrade\` (none of these need a workspace).
+## Related
+- \`reference/aliases\` — the prefix scheme used by every entity.
+- \`reference/json-mode\` — output contracts for piping \`ish status\`.
+`;
+const REFERENCE_BILLING_LIMITS = `# reference: billing tier limits
+Some create operations are gated by your account's billing tier. The
+backend enforces these. The CLI just renders the structured rejection.
+There is no way to bypass enforcement from the CLI; running the same
+\`POST\` with \`curl\` will hit the same gate.
+The web UI reads these caps at runtime from
+\`GET /api/v1/billing/limits\` (cached for one hour) and falls back to
+its build-time snapshot if the endpoint is unreachable. The table below
+is the CLI's own snapshot, intentionally release-pinned for offline
+use; re-pull it after each \`ish-cli\` release. The source of truth at
+request time, for any client, is the backend's \`TIER_LIMITS\` dict in
+\`tier_limits.py\`.
+## Limits enforced
+| Limit                       | Free | Media | Starter | Pro | Enterprise |
+|-----------------------------|------|-------|---------|-----|------------|
+| \`maxProducts\`               | 1    | 1     | ∞       | ∞   | ∞          |
+| \`maxStudiesPerProduct\`      | 3    | ∞     | ∞       | ∞   | ∞          |
+| \`maxIterationsPerStudy\`     | 2    | ∞     | ∞       | ∞   | ∞          |
+| \`maxCustomTesterProfiles\`   | 3    | 10    | 10      | ∞   | ∞          |
+Commands that may hit a limit: \`ish workspace create\`,
+\`ish study create\`, \`ish study generate\`, \`ish iteration create\`,
+\`ish profile create\`, \`ish profile generate\`.
+## What you see when a limit is hit
+Human output (stderr):
+\`\`\`
+Error: Free plan allows 3 studies per workspace. Upgrade to add more.
+  → Upgrade your plan at https://app.ishlabs.io/billing
+  → Run \`ish docs get-page reference/billing-limits\` for the tier table
+\`\`\`
+JSON output (stdout — \`--json\` or piped):
+\`\`\`json
+{
+  "error": "Free plan allows 3 studies per workspace. Upgrade to add more.",
+  "error_code": "usage_limit_reached",
+  "status": 403,
+  "retryable": false,
+  "tier": "free",
+  "limit": "maxStudiesPerProduct",
+  "current": 3,
+  "max": 3,
+  "upgrade_url": "https://app.ishlabs.io/billing",
+  "suggestions": ["Upgrade your plan at https://app.ishlabs.io/billing", "..."]
+}
+\`\`\`
+Exit code: \`1\` (general — non-retryable). Don't retry; the user has to
+upgrade or delete an existing resource to free up headroom.
+## Agent-side handling
+- Branch on \`error_code === "usage_limit_reached"\` (preferred) or
+  \`status === 403\` with that error_code in the body. \`forbidden\`
+  errors that are *not* tier-related keep \`error_code: "forbidden"\`.
+- Use \`limit\`, \`current\`, \`max\`, \`tier\` to construct your own
+  recovery message. The \`limit\` value matches the table above and is
+  stable.
+- The \`generate\` endpoints (\`study generate\`, \`profile generate\`)
+  refuse the entire batch when the post-generation count would exceed
+  the cap, rather than partially fulfilling — re-issue with a smaller
+  \`--count\` after upgrading or pruning.
+## Related
+- \`concepts/workspace\` — \`maxProducts\` is per-account.
+- \`concepts/study\`     — \`maxStudiesPerProduct\` gates study creation.
+- \`concepts/iteration\` — \`maxIterationsPerStudy\` gates iteration creation.
+- \`concepts/profile\`   — \`maxCustomTesterProfiles\` gates profile creation.
+- \`reference/json-mode\` — full error envelope shape and exit codes.
+`;
 const PAGES = [
     {
         slug: "overview",
@@ -853,6 +1258,12 @@ const PAGES = [
         description: "Side-by-side; decision rule for choosing one over the other.",
         body: CONCEPT_RUN_VERBS,
     },
+    {
+        slug: "concepts/active-context",
+        title: "concept: active context",
+        description: "Saved workspace/study/ask state and how to inspect it (ish status).",
+        body: CONCEPT_ACTIVE_CONTEXT,
+    },
     {
         slug: "reference/aliases",
         title: "reference: aliases",
@@ -865,6 +1276,12 @@ const PAGES = [
         description: "JSON, --fields, --verbose, exit codes, pipe behaviour.",
         body: REFERENCE_JSON_MODE,
     },
+    {
+        slug: "reference/billing-limits",
+        title: "reference: billing tier limits",
+        description: "Per-tier caps on workspaces/studies/iterations/profiles; usage_limit_reached error shape.",
+        body: REFERENCE_BILLING_LIMITS,
+    },
     {
         slug: "guides/first-study",
         title: "guide: your first study, end to end",

package/dist/lib/local-sim/install.d.ts CHANGED Viewed

@@ -3,14 +3,7 @@
  * Uses playwright-core to download and manage Chromium in Playwright's
  * default cache (`~/Library/Caches/ms-playwright` on macOS, etc.).
  */
-/**
- * Check if Chromium is installed in Playwright's default cache.
- */
 export declare function isBrowserInstalled(): boolean;
-/**
- * Install Chromium browser for local simulations.
- * Downloads ~120 MB on first use into Playwright's default cache.
- */
 export declare function installBrowser(quiet?: boolean): Promise<void>;
 /**
  * Ensure Chromium is available, installing if needed.

package/dist/lib/local-sim/install.js CHANGED Viewed

@@ -3,12 +3,24 @@
  * Uses playwright-core to download and manage Chromium in Playwright's
  * default cache (`~/Library/Caches/ms-playwright` on macOS, etc.).
  */
-import { execSync } from "node:child_process";
 import { existsSync } from "node:fs";
 import { chromium } from "playwright-core";
-/**
- * Check if Chromium is installed in Playwright's default cache.
- */
+// Deep-import the bundled registry so this works in both the npm-install path
+// and the standalone bun binary (which has no `npx` to spawn).
+import { registry } from "playwright-core/lib/server/registry/index";
+// playwright-core's userAgent module does `require("../../../package.json")`
+// at runtime to read its version. bun's --compile bundler is unreliable about
+// embedding that JSON, which causes install to crash in the standalone binary
+// with "Cannot find module ../../../package.json". Setting PW_VERSION_OVERRIDE
+// makes that code path skip the require entirely.
+//
+// Keep this string in sync with the playwright-core dep in package.json. It
+// only feeds the User-Agent string sent to download CDN, so a slight mismatch
+// is harmless.
+const PLAYWRIGHT_CORE_VERSION = "1.59.1";
+if (!process.env.PW_VERSION_OVERRIDE) {
+    process.env.PW_VERSION_OVERRIDE = PLAYWRIGHT_CORE_VERSION;
+}
 export function isBrowserInstalled() {
     try {
         const execPath = chromium.executablePath();
@@ -18,23 +30,18 @@ export function isBrowserInstalled() {
         return false;
     }
 }
-/**
- * Install Chromium browser for local simulations.
- * Downloads ~120 MB on first use into Playwright's default cache.
- */
 export async function installBrowser(quiet = false) {
     const log = (msg) => { if (!quiet)
         console.error(msg); };
     log("Installing Chromium for local simulations (~120 MB)...");
     try {
-        execSync("npx playwright-core install chromium", {
-            stdio: quiet ? "ignore" : "inherit",
-        });
+        const executables = registry.resolveBrowsers(["chromium"], {});
+        await registry.install(executables, { force: false });
         log("Chromium installed successfully.");
     }
     catch (err) {
-        throw new Error(`Failed to install Chromium. You can install manually:\n` +
-            `  npx playwright-core install chromium`);
+        const detail = err instanceof Error ? err.message : String(err);
+        throw new Error(`Failed to install Chromium: ${detail}`);
     }
 }
 /**