npm - @ishlabs/cli - Versions diffs - 0.23.1 → 0.24.0 - Mend

@ishlabs/cli 0.23.1 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/commands/ask.js +4 -4
package/dist/commands/iteration.js +25 -3
package/dist/commands/study-share.d.ts +18 -0
package/dist/commands/study-share.js +117 -0
package/dist/commands/study.js +54 -7
package/dist/commands/workspace.js +4 -1
package/dist/connect.d.ts +4 -2
package/dist/connect.js +151 -11
package/dist/index.js +63 -6
package/dist/lib/ask-questions.d.ts +15 -5
package/dist/lib/ask-questions.js +34 -11
package/dist/lib/auth.d.ts +1 -0
package/dist/lib/auth.js +7 -1
package/dist/lib/command-helpers.js +33 -5
package/dist/lib/docs.js +140 -8
package/dist/lib/output.js +8 -1
package/dist/lib/reverse-proxy.d.ts +19 -0
package/dist/lib/reverse-proxy.js +87 -0
package/dist/lib/reverse-proxy.test.d.ts +10 -0
package/dist/lib/reverse-proxy.test.js +149 -0
package/dist/lib/segmentation.d.ts +31 -0
package/dist/lib/segmentation.js +105 -0
package/dist/lib/skill-content.js +76 -4
package/dist/lib/types.d.ts +2 -0
package/package.json +3 -1

package/dist/lib/docs.js CHANGED Viewed

@@ -98,10 +98,20 @@ ish workspace list
 ish workspace create --name "My product" --base-url https://example.com
 ish workspace use w-6ec        # set as active
 ish workspace get              # show the active workspace
+ish workspace update w-6ec --logo https://logo.clearbit.com/acme.com  # brand logo
 ish workspace info             # usage counters + plan caps (see below)
 ish workspace site-access status
 \`\`\`
+## Branding a workspace (\`--logo\`)
+\`ish workspace update <id> --logo <url>\` sets a brand logo from an
+external image URL. The logo shows on the workspace and — importantly —
+on **shared study links** (\`ish study share\`), so a prospect opening the
+public link sees the demo branded with their own logo. There is no
+\`--logo\` on \`workspace create\`; create first, then update. See
+\`concepts/sharing\`.
 ## Checking usage before destructive calls
 \`ish workspace info\` shows usage counters so an agent can branch on
@@ -217,6 +227,15 @@ its iterations. Think: a study is the recipe; an iteration is one batch.
 iteration A inline in the same call. Useful when you have a single
 test artifact and don't need to A/B iterations:
+For text + media, the inline iteration A can also carry
+\`--segmentation-json\` (+ \`--content-config-json\`) and the text
+email-styling flags (\`--content-html\`, \`--sender-name\`,
+\`--sender-email\`, \`--featured-image-url\`). So a single-iteration
+**segmented** study is one \`study create\` call — you do NOT need a
+second \`iteration create\` (which would leave an empty A plus a
+redundant B). Reach for \`iteration create\` only when you genuinely
+want a 2nd iteration to A/B.
 | Modality        | Inline content flag                                  |
 |-----------------|------------------------------------------------------|
 | \`interactive\` | \`--url <url>\` (\`--screen-format desktop\` is the default; pass \`mobile_portrait\` for mobile) |
@@ -402,6 +421,14 @@ Each segment can carry a human-readable **label** ("Intro", "Pricing
 section", "Call to action") that surfaces in the participant UI and in
 results.
+**Segments are semantic sections, not paragraphs.** Group related
+paragraphs into a few coherent sections — a 16-paragraph article is
+usually 3–6 sections (e.g. "Lede", "The argument", "Counterpoints",
+"Conclusion"), not 16. \`paragraph_start\`/\`paragraph_end\` only mark
+where a section begins and ends; the unit you are choosing is the
+*section*. The CLI errors on a missing label and warns when you emit one
+section per paragraph.
 Segments live inside the iteration's \`segmentation\` field — there is
 no separate segments resource. Three discriminated shapes:
@@ -431,6 +458,11 @@ no separate segments resource. Three discriminated shapes:
   }
   \`\`\`
+  The three sections above each group several paragraphs (greeting +
+  context, the body, the call to action) — semantic grouping, not one
+  section per paragraph. Adjust the ranges to your content's logical
+  structure.
 - **page_based** (document): pages are auto-derived from the document.
   No additional fields.
@@ -888,12 +920,16 @@ Two flags, mutually exclusive:
 # --question is repeatable. Defaults to type=text, timing=after.
 ish study create … --question "How easy was it?" --question "Anything confusing?"
-# Richer types from a JSON manifest:
-ish study create … --questionnaire ./questionnaire.json
+# Richer types via --questionnaire. Three interchangeable input forms — no
+# temp file required (mirrors how --assignments takes inline JSON):
+ish study create … --questionnaire '[{"question":"How easy?","type":"slider","min":0,"max":10}]'   # inline JSON
+ish study create … --questionnaire @/tmp/questionnaire.json                                         # @file
+ish study create … --questionnaire ./questionnaire.json                                             # bare path
 \`\`\`
-\`questionnaire.json\` is an array of question objects in the shape above.
-The same shape is accepted by \`ish ask add-questions … --questions …\`.
+The payload is always an array of question objects in the shape above
+(inline JSON must start with \`[\`; an \`@\`-prefixed or bare value is read
+from disk). The same three input forms are accepted by \`ish ask … --questions\`.
 The \`type\` field is hyphenated for the multi-word values (\`single-choice\`,
 \`multiple-choice\`). The CLI normalises the underscored variants
@@ -2130,11 +2166,27 @@ The CLI guarantees these contracts so agents can chain safely:
   \`--fields\` set, you can identify the affected resource. Default
   write-path JSON is compact (\`{id, alias, name, updated_at,
   ...changed_fields}\`); pass \`--verbose\` for the full server payload.
+- **Write-path echoes keep collection arrays even when empty.** On a
+  create/update echo (e.g. \`study create\`/\`study update\`), entity
+  collections like \`assignments\`, \`interview_questions\`, and
+  \`iterations\` are always present — \`[]\` when the resource has none,
+  not dropped. So the echo reflects exactly what was persisted: an empty
+  \`assignments\` means the study genuinely has no assignment and will
+  fail at run with "Study has no assignments" — you don't need a second
+  \`--verbose\` (or \`study get\`) call to tell "zero persisted" from
+  "stripped by lean mode." (Read-path \`list\` responses still drop empty
+  per-item arrays as noise; this guarantee is write-path only.)
 - **\`person generate\` returns \`{job: {id, status, person_ids},
   profiles: [...]}\`** in \`--json\` mode. Each profile is the
   lean \`person\` shape (pass \`--verbose\` for the full record,
   including \`simulation_config\`) with its evidence-grounded
   \`scenarios\` attached; pass \`--no-scenarios\` to omit them.
+- **\`study share\` returns \`{id, token, share_url, expires_at,
+  created_at}\`** in \`--json\` mode (full envelope, not lean-stripped).
+  \`share_url\` is the public no-login URL — use it verbatim. In human
+  mode \`share_url\` goes to stdout, context to stderr. \`study share
+  --list\` returns rows of \`{token, study, expires_at, is_revoked}\`
+  (no \`share_url\` — only create returns it). See \`concepts/sharing\`.
 - **\`<entity> get\` accepts multiple IDs.** \`person get\`, \`study get\`,
   \`iteration get\`, and \`ask get\` all take \`<ids...>\` — pass two or
   more aliases (space- or comma-separated) and the response is a
@@ -2835,10 +2887,16 @@ script or agent session.
 ### \`ish login\` is idempotent
-When you already have a valid saved token, \`ish login\` short-circuits
-with a friendly "Already logged in" message and **does not** open a new
-browser tab or register a fresh OAuth client. Use \`--force\` (or \`-f\`)
-to bypass the guard — typical reason is switching accounts.
+When you already have a saved token that is **both unexpired and still
+accepted by the API**, \`ish login\` short-circuits with a friendly
+"Already logged in" message and **does not** open a new browser tab or
+register a fresh OAuth client. If the saved token is unexpired but the
+server rejects it — a revoked session, a rotated signing key, or a token
+minted against the wrong Supabase project (e.g. a dev-issued token while
+calling the prod api) — the guard falls through and re-runs the browser
+flow instead of falsely reporting "Already logged in". Use \`--force\`
+(or \`-f\`) to bypass the guard unconditionally — typical reason is
+switching accounts.
 \`\`\`bash
 ish login              # no-op when already authenticated
@@ -4187,6 +4245,74 @@ overridden URL.
 - \`reference/json-mode\` — display vs capture vs chain output rules.
 `;
+const CONCEPT_SHARE = `# concept: sharing study results
+A **share link** is a public, no-login URL to one study's results. Anyone
+with the link opens it in a browser — no ish account — and sees the study's
+summary, key insights, participant journeys, interactive frames, and segment
+breakdowns (read-only). This is how you hand a study to someone outside your
+workspace: a prospect, a stakeholder, a teammate without a seat.
+- Created via: \`ish study share [id]\` (defaults to the active study).
+- Revoked via: \`ish study unshare <token>\`.
+- The link host is the **web app frontend**, not the API host. The backend
+  returns the fully-formed \`share_url\` — print/use it verbatim. Do NOT
+  hand-build the URL from the API host or app URL; they differ.
+## Create a link
+\`\`\`
+ish study share                      # share the active study
+ish study share s-b2c                # share a specific study
+ish study share s-b2c --expires 30   # auto-expire 30 days from now
+ish study share s-b2c --json         # { token, share_url, expires_at, created_at, id }
+\`\`\`
+Human mode prints the \`share_url\` to **stdout** (it's the deliverable — a
+URL to paste into an email) and the token / expiry / revoke hint to stderr.
+JSON mode returns the full create envelope:
+\`\`\`json
+{
+  "id": "…",
+  "token": "Hk9_…",            // opaque url-safe token, NOT an alias
+  "share_url": "https://<frontend>/share/study/Hk9_…",
+  "expires_at": null,           // null = never expires
+  "created_at": "…"
+}
+\`\`\`
+## List and revoke
+\`\`\`
+ish study share --list               # every share link you created (all studies)
+ish study unshare Hk9_…              # revoke by raw token; URL stops working immediately
+ish study unshare Hk9_… --yes        # skip the confirmation (required in --json / non-TTY)
+\`\`\`
+The \`--list\` rows carry \`token\`, \`study\` (aliased), \`expires_at\`,
+\`is_revoked\`. The full \`share_url\` only comes back from \`share\` (create) —
+list responses do not reconstruct it. \`study unshare\` takes the **raw token**,
+never a study ID or alias.
+## What a good shareable study looks like
+The viewer is only as good as the run behind it. Before sharing, make sure:
+- The study has **run** with enough participants (\`ish study run … --wait\`;
+  analysis needs ≥5 completed participants) and no broken simulations.
+- An **analysis** has been generated so the summary + key insights render
+  (\`ish study analyze --wait\` → \`ish study insights\`).
+- For media studies, every **segment is labelled** (see \`concepts/iteration\`).
+- The workspace has a **logo** if you want the link branded
+  (\`ish workspace update <id> --logo <url>\`).
+## Related
+- \`concepts/study\` — the artifact a link points at.
+- \`concepts/workspace\` — \`--logo\` branding shown on the shared link.
+- \`concepts/active-context\` — \`ish study share\` defaults to the active study.
+- \`reference/json-mode\` — the \`{ token, share_url, … }\` envelope.
+`;
 const PAGES = [
     {
         slug: "overview",
@@ -4284,6 +4410,12 @@ const PAGES = [
         description: "Saved workspace/study/ask state and how to inspect it (ish status).",
         body: CONCEPT_ACTIVE_CONTEXT,
     },
+    {
+        slug: "concepts/sharing",
+        title: "concept: sharing study results",
+        description: "Public no-login share links for a study: study share / study unshare / --list, --expires, token vs URL, branding with workspace --logo.",
+        body: CONCEPT_SHARE,
+    },
     {
         slug: "reference/aliases",
         title: "reference: aliases",

package/dist/lib/output.js CHANGED Viewed

@@ -196,7 +196,14 @@ function leanJson(data, keepIds = false) {
         // Recurse into objects/arrays
         if (typeof value === "object") {
             const cleaned = leanJson(value, keepIds);
-            if (cleaned !== undefined && !(Array.isArray(cleaned) && cleaned.length === 0)) {
+            // Read paths drop empty arrays as noise. Write-path echoes (keepIds)
+            // must NOT: an empty `assignments`/`interview_questions` is the
+            // "zero persisted" signal the create/update echo exists to surface —
+            // a study with no assignments fails at run with "Study has no
+            // assignments". Dropping it made the echo indistinguishable from a
+            // lean-strip, which is why agents were told not to trust it.
+            const dropEmptyArray = !keepIds && Array.isArray(cleaned) && cleaned.length === 0;
+            if (cleaned !== undefined && !dropEmptyArray) {
                 result[key] = cleaned;
             }
             continue;

package/dist/lib/reverse-proxy.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Local reverse proxy: fan one inbound port out to multiple localhost services
+ * by path prefix. Wired into `ish connect` so a single cloudflared tunnel can
+ * serve a frontend + backend + extras under one origin (no CORS / cookie
+ * cross-origin pain in the cloud browser).
+ */
+export type Route = {
+    prefix: string;
+    target: string;
+};
+export interface ReverseProxyHandle {
+    port: number;
+    close: () => Promise<void>;
+}
+export interface StartReverseProxyOptions {
+    primaryPort: number;
+    routes: Route[];
+}
+export declare function startReverseProxy(opts: StartReverseProxyOptions): Promise<ReverseProxyHandle>;

package/dist/lib/reverse-proxy.js ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Local reverse proxy: fan one inbound port out to multiple localhost services
+ * by path prefix. Wired into `ish connect` so a single cloudflared tunnel can
+ * serve a frontend + backend + extras under one origin (no CORS / cookie
+ * cross-origin pain in the cloud browser).
+ */
+import http from "node:http";
+import httpProxy from "http-proxy";
+function resolveRoute(url, sortedRoutes, fallback) {
+    const path = url ?? "/";
+    for (const route of sortedRoutes) {
+        // Match the prefix at a segment boundary so `/api` doesn't catch `/apiary`.
+        if (path === route.prefix || path.startsWith(route.prefix + "/") || path.startsWith(route.prefix + "?")) {
+            return route.target;
+        }
+    }
+    return fallback;
+}
+export function startReverseProxy(opts) {
+    return new Promise((resolve, reject) => {
+        const primaryTarget = `http://127.0.0.1:${opts.primaryPort}`;
+        // Longest prefix wins: a request to `/api/v1/x` with routes
+        // `[/api, /api/v1]` should land on `/api/v1`.
+        const sortedRoutes = [...opts.routes]
+            .map((r) => ({ prefix: r.prefix, target: r.target }))
+            .sort((a, b) => b.prefix.length - a.prefix.length);
+        const proxy = httpProxy.createProxyServer({
+            xfwd: true,
+            ws: true,
+            // Preserve the full original path — http-proxy does this by default when
+            // we pass `target` without `prependPath`/`ignorePath`. Setting changeOrigin
+            // false keeps the Host header pointing at the upstream's address.
+            changeOrigin: false,
+        });
+        proxy.on("error", (err, _req, res) => {
+            // `res` can be either an HTTP response or a raw socket (WS upgrade path).
+            if (res && "writeHead" in res && typeof res.writeHead === "function") {
+                const httpRes = res;
+                if (!httpRes.headersSent) {
+                    httpRes.writeHead(502, { "Content-Type": "text/plain; charset=utf-8" });
+                }
+                httpRes.end(`Bad gateway: upstream not reachable (${err.message})`);
+            }
+            else if (res && "destroy" in res && typeof res.destroy === "function") {
+                res.destroy();
+            }
+        });
+        // Track open sockets so close() can force-destroy them — mirrors the
+        // shutdown discipline in src/auth.ts. server.close() alone waits for
+        // keep-alive sockets to drain, which hangs the CLI on SIGINT.
+        const sockets = new Set();
+        const server = http.createServer((req, res) => {
+            const target = resolveRoute(req.url, sortedRoutes, primaryTarget);
+            proxy.web(req, res, { target });
+        });
+        server.on("upgrade", (req, socket, head) => {
+            const target = resolveRoute(req.url, sortedRoutes, primaryTarget);
+            proxy.ws(req, socket, head, { target });
+        });
+        server.on("connection", (socket) => {
+            sockets.add(socket);
+            socket.on("close", () => sockets.delete(socket));
+        });
+        server.on("error", reject);
+        server.listen(0, "127.0.0.1", () => {
+            const addr = server.address();
+            if (!addr || typeof addr === "string") {
+                reject(new Error("Failed to bind reverse proxy"));
+                return;
+            }
+            resolve({
+                port: addr.port,
+                close: () => new Promise((resolveClose) => {
+                    // Stop accepting new connections, then force-destroy anything still
+                    // open. closeAllConnections + the manual socket sweep is what makes
+                    // shutdown reliable on macOS (see auth.ts comment).
+                    server.close(() => resolveClose());
+                    server.closeAllConnections?.();
+                    for (const socket of sockets)
+                        socket.destroy();
+                    sockets.clear();
+                    proxy.close();
+                }),
+            });
+        });
+    });
+}

package/dist/lib/reverse-proxy.test.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Smoke test for the reverse-proxy module. Spins up two mock HTTP servers,
+ * routes through the proxy, and asserts paths land on the right upstream
+ * with the full path preserved. Also verifies a raw WebSocket upgrade
+ * routes via the prefix rules.
+ *
+ * Compiled to dist/lib/reverse-proxy.test.js and runnable with:
+ *   node --test dist/lib/reverse-proxy.test.js
+ */
+export {};

package/dist/lib/reverse-proxy.test.js ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Smoke test for the reverse-proxy module. Spins up two mock HTTP servers,
+ * routes through the proxy, and asserts paths land on the right upstream
+ * with the full path preserved. Also verifies a raw WebSocket upgrade
+ * routes via the prefix rules.
+ *
+ * Compiled to dist/lib/reverse-proxy.test.js and runnable with:
+ *   node --test dist/lib/reverse-proxy.test.js
+ */
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import http from "node:http";
+import { startReverseProxy } from "./reverse-proxy.js";
+function startMockServer(name) {
+    return new Promise((resolve, reject) => {
+        const hits = [];
+        const sockets = new Set();
+        const server = http.createServer((req, res) => {
+            hits.push({ url: req.url ?? "", upgrade: false });
+            res.writeHead(200, { "Content-Type": "text/plain", "X-Mock-Name": name });
+            res.end(`${name}:${req.url}`);
+        });
+        server.on("connection", (socket) => {
+            sockets.add(socket);
+            socket.on("close", () => sockets.delete(socket));
+        });
+        server.on("upgrade", (req, socket) => {
+            hits.push({ url: req.url ?? "", upgrade: true });
+            sockets.add(socket);
+            socket.on("close", () => sockets.delete(socket));
+            // Minimal handshake: accept the upgrade with a static accept token so we
+            // don't pull in the `ws` library just for the test.
+            const acceptKey = req.headers["sec-websocket-key"];
+            socket.write("HTTP/1.1 101 Switching Protocols\r\n" +
+                "Upgrade: websocket\r\n" +
+                "Connection: Upgrade\r\n" +
+                `Sec-WebSocket-Accept: ${acceptKey ?? "x"}\r\n` +
+                `X-Mock-Name: ${name}\r\n\r\n`);
+        });
+        server.on("error", reject);
+        server.listen(0, "127.0.0.1", () => {
+            const addr = server.address();
+            resolve({
+                port: addr.port,
+                hits,
+                close: () => new Promise((r) => {
+                    server.closeAllConnections?.();
+                    for (const s of sockets)
+                        s.destroy();
+                    sockets.clear();
+                    server.close(() => r());
+                    server.unref();
+                }),
+            });
+        });
+    });
+}
+test("reverse-proxy routes by prefix and preserves the full path", async () => {
+    const primary = await startMockServer("primary");
+    const api = await startMockServer("api");
+    const proxy = await startReverseProxy({
+        primaryPort: primary.port,
+        routes: [{ prefix: "/api", target: `http://127.0.0.1:${api.port}` }],
+    });
+    try {
+        const root = await fetch(`http://127.0.0.1:${proxy.port}/`);
+        assert.equal(root.status, 200);
+        assert.equal(root.headers.get("x-mock-name"), "primary");
+        assert.equal(await root.text(), "primary:/");
+        assert.equal(primary.hits.at(-1)?.url, "/");
+        const apiHit = await fetch(`http://127.0.0.1:${proxy.port}/api/health`);
+        assert.equal(apiHit.status, 200);
+        assert.equal(apiHit.headers.get("x-mock-name"), "api");
+        // Full path preserved — the upstream sees `/api/health`, NOT `/health`.
+        assert.equal(await apiHit.text(), "api:/api/health");
+        assert.equal(api.hits.at(-1)?.url, "/api/health");
+        // Non-matching path that just happens to start with the prefix letters
+        // must fall through to primary (segment-boundary match, not substring).
+        const apiary = await fetch(`http://127.0.0.1:${proxy.port}/apiary`);
+        assert.equal(apiary.headers.get("x-mock-name"), "primary");
+        const deep = await fetch(`http://127.0.0.1:${proxy.port}/api/v1/users`);
+        assert.equal(deep.headers.get("x-mock-name"), "api");
+        assert.equal(await deep.text(), "api:/api/v1/users");
+    }
+    finally {
+        await proxy.close();
+        await primary.close();
+        await api.close();
+    }
+});
+test("reverse-proxy routes WebSocket upgrades by prefix", async () => {
+    const primary = await startMockServer("primary");
+    const api = await startMockServer("api");
+    const proxy = await startReverseProxy({
+        primaryPort: primary.port,
+        routes: [{ prefix: "/api", target: `http://127.0.0.1:${api.port}` }],
+    });
+    try {
+        const status = await new Promise((resolve, reject) => {
+            const req = http.request({
+                host: "127.0.0.1",
+                port: proxy.port,
+                path: "/api/ws",
+                method: "GET",
+                headers: {
+                    Connection: "Upgrade",
+                    Upgrade: "websocket",
+                    "Sec-WebSocket-Key": "dGhlIHNhbXBsZSBub25jZQ==",
+                    "Sec-WebSocket-Version": "13",
+                },
+            });
+            req.on("upgrade", (res, socket) => {
+                resolve({
+                    statusLine: `HTTP/1.1 ${res.statusCode} ${res.statusMessage}`,
+                    mockName: typeof res.headers["x-mock-name"] === "string"
+                        ? res.headers["x-mock-name"]
+                        : undefined,
+                });
+                socket.destroy();
+            });
+            req.on("error", reject);
+            req.end();
+        });
+        assert.match(status.statusLine, /^HTTP\/1\.1 101/);
+        assert.equal(status.mockName, "api");
+        assert.ok(api.hits.some((h) => h.upgrade && h.url === "/api/ws"));
+    }
+    finally {
+        await proxy.close();
+        await primary.close();
+        await api.close();
+    }
+});
+test("reverse-proxy returns 502 when upstream is down", async () => {
+    // No primary mock — pick an arbitrary port nothing is bound on.
+    const proxy = await startReverseProxy({
+        primaryPort: 1, // privileged, definitely not listening to our process
+        routes: [],
+    });
+    try {
+        const res = await fetch(`http://127.0.0.1:${proxy.port}/whatever`);
+        assert.equal(res.status, 502);
+        const body = await res.text();
+        assert.match(body, /Bad gateway/i);
+    }
+    finally {
+        await proxy.close();
+    }
+});

package/dist/lib/segmentation.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * Validation + nudge for media/text `segmentation` (the parsed value of
+ * `--segmentation-json` on `study create` / `iteration create`).
+ *
+ * THE PRINCIPLE these guard: **segments are semantic sections, not
+ * paragraphs.** Group related paragraphs into a few coherent sections
+ * (intro → argument → conclusion). A long article is usually 3–6 sections,
+ * not one per paragraph; `paragraph_start`/`paragraph_end` only mark where a
+ * section begins and ends — the unit is the *section*.
+ *
+ * - `validateSegmentation` is FATAL (throws ValidationError → exit 2) on a
+ *   malformed `section_based` shape — most importantly a missing/empty label,
+ *   which the backend would otherwise reject after a network round-trip.
+ * - `warnIfOverSegmented` is NON-FATAL: an agent that ignores the docs and
+ *   emits one section per paragraph gets a stderr nudge, but is never blocked
+ *   (over-segmenting can be intentional).
+ *
+ * Both take the already-JSON-parsed object; `undefined` is a no-op.
+ */
+/** Throw on a malformed segmentation shape. No-op for undefined / unknown types. */
+export declare function validateSegmentation(seg: unknown): void;
+/**
+ * Non-fatal nudge toward semantic sections. Conservative on purpose: only
+ * fires for `section_based` with >= 5 sections that EACH span a single
+ * paragraph — the signature of one-section-per-paragraph — so a genuine
+ * 3-section piece never trips it. stderr only (keeps --json stdout clean);
+ * suppressed under --quiet.
+ */
+export declare function warnIfOverSegmented(seg: unknown, opts?: {
+    quiet?: boolean;
+}): void;

package/dist/lib/segmentation.js ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * Validation + nudge for media/text `segmentation` (the parsed value of
+ * `--segmentation-json` on `study create` / `iteration create`).
+ *
+ * THE PRINCIPLE these guard: **segments are semantic sections, not
+ * paragraphs.** Group related paragraphs into a few coherent sections
+ * (intro → argument → conclusion). A long article is usually 3–6 sections,
+ * not one per paragraph; `paragraph_start`/`paragraph_end` only mark where a
+ * section begins and ends — the unit is the *section*.
+ *
+ * - `validateSegmentation` is FATAL (throws ValidationError → exit 2) on a
+ *   malformed `section_based` shape — most importantly a missing/empty label,
+ *   which the backend would otherwise reject after a network round-trip.
+ * - `warnIfOverSegmented` is NON-FATAL: an agent that ignores the docs and
+ *   emits one section per paragraph gets a stderr nudge, but is never blocked
+ *   (over-segmenting can be intentional).
+ *
+ * Both take the already-JSON-parsed object; `undefined` is a no-op.
+ */
+import { writeSync } from "node:fs";
+import { c } from "./colors.js";
+import { ValidationError } from "./output.js";
+/** Throw on a malformed segmentation shape. No-op for undefined / unknown types. */
+export function validateSegmentation(seg) {
+    if (!seg || typeof seg !== "object")
+        return;
+    const s = seg;
+    if (s.type === "section_based") {
+        const sections = s.sections;
+        if (!Array.isArray(sections) || sections.length === 0) {
+            throw new ValidationError("section_based segmentation needs a non-empty `sections` array.", [], "Group related paragraphs into a few semantic sections (intro, argument, conclusion) — not one per paragraph.");
+        }
+        sections.forEach((raw, i) => {
+            const sec = (raw ?? {});
+            const name = typeof sec.name === "string" ? sec.name.trim() : "";
+            const label = typeof sec.label === "string" ? sec.label.trim() : "";
+            if (!name) {
+                throw new ValidationError(`section_based sections[${i}] is missing a non-empty \`name\`.`, []);
+            }
+            if (!label) {
+                throw new ValidationError(`section_based sections[${i}] ("${name}") is missing a non-empty \`label\`. ` +
+                    "Every section needs a human-readable label — it surfaces in the participant UI and in results.", []);
+            }
+            // Paragraph-bounded sections: validate the range when present. (A
+            // marker-bounded section_based variant may omit these — don't require.)
+            const start = sec.paragraph_start;
+            const end = sec.paragraph_end;
+            if (start !== undefined || end !== undefined) {
+                if (typeof start !== "number" || typeof end !== "number" || start < 0 || end <= start) {
+                    throw new ValidationError(`section_based sections[${i}] ("${name}") has an invalid paragraph range ` +
+                        `(paragraph_start=${String(start)}, paragraph_end=${String(end)}). ` +
+                        "Need paragraph_start >= 0 and paragraph_end > paragraph_start.", []);
+                }
+            }
+        });
+        return;
+    }
+    if (s.type === "time_based") {
+        const iv = s.intervals_seconds;
+        if (Array.isArray(iv)) {
+            for (let i = 1; i < iv.length; i++) {
+                const prev = iv[i - 1];
+                const cur = iv[i];
+                if (typeof prev !== "number" || typeof cur !== "number" || cur <= prev) {
+                    throw new ValidationError(`time_based intervals_seconds must be strictly ascending numbers ` +
+                        `(problem at index ${i}: ${String(prev)} → ${String(cur)}).`, []);
+                }
+            }
+        }
+    }
+}
+/**
+ * Non-fatal nudge toward semantic sections. Conservative on purpose: only
+ * fires for `section_based` with >= 5 sections that EACH span a single
+ * paragraph — the signature of one-section-per-paragraph — so a genuine
+ * 3-section piece never trips it. stderr only (keeps --json stdout clean);
+ * suppressed under --quiet.
+ */
+export function warnIfOverSegmented(seg, opts = {}) {
+    if (opts.quiet)
+        return;
+    if (!seg || typeof seg !== "object")
+        return;
+    const s = seg;
+    if (s.type !== "section_based" || !Array.isArray(s.sections))
+        return;
+    const sections = s.sections;
+    if (sections.length < 5)
+        return;
+    const allSingleParagraph = sections.every((sec) => {
+        const start = sec?.paragraph_start;
+        const end = sec?.paragraph_end;
+        return typeof start === "number" && typeof end === "number" && end - start <= 1;
+    });
+    if (!allSingleParagraph)
+        return;
+    // Synchronous fd-2 write, not console.error: this fires moments before the
+    // command's own output + a process.exit (via exitWithFlush), which truncates
+    // async-buffered stderr writes to a pipe/file. writeSync guarantees the nudge
+    // lands.
+    writeSync(2, `${c.yellow}⚠ ${sections.length} single-paragraph sections.${c.reset} ` +
+        "Segments are meant to be semantic sections — group related paragraphs into a few " +
+        "coherent sections (e.g. intro → argument → conclusion), not one per paragraph. " +
+        "A long article is usually 3–6 sections. Proceeding as-is.\n");
+}