npm - gipity - Versions diffs - 1.0.384 → 1.0.386 - Mend

gipity 1.0.384 → 1.0.386

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/auth.js +10 -9
package/dist/commands/db.js +4 -1
package/dist/commands/deploy.js +14 -2
package/dist/commands/init.js +21 -8
package/dist/commands/page-eval.js +135 -9
package/dist/commands/page-inspect.js +37 -6
package/dist/commands/page-screenshot.js +13 -5
package/dist/commands/page-test.js +58 -9
package/dist/commands/sandbox.js +67 -14
package/dist/commands/status.js +8 -4
package/dist/commands/test.js +12 -3
package/dist/commands/text.js +1 -1
package/dist/commands/workflow.js +78 -19
package/dist/helpers/text-analysis.js +9 -5
package/dist/index.js +25 -13
package/dist/knowledge.js +8 -1
package/dist/project-setup.js +2 -2
package/dist/setup.js +71 -3
package/dist/sync.js +43 -5
package/package.json +2 -2

package/dist/auth.js CHANGED Viewed

@@ -52,17 +52,18 @@ export function isExpired() {
     const buffer = 5 * 60 * 1000; // 5 minute buffer
     return Date.now() > expiresAt - buffer;
 }
-export function getTimeRemaining() {
+/** True only when re-login is genuinely required: the refresh token itself
+ *  has expired. Access-token expiry (`expiresAt` / isExpired) is invisible
+ *  to users — every API call renews it via refreshTokenIfNeeded() — so it
+ *  must never be surfaced as a session warning. */
+export function sessionExpired() {
     const auth = getAuth();
     if (!auth)
-        return 'not authenticated';
-    const ms = new Date(auth.expiresAt).getTime() - Date.now();
-    if (ms <= 0)
-        return 'expired';
-    const mins = Math.floor(ms / 60000);
-    if (mins < 60)
-        return `${mins}m remaining`;
-    return `${Math.floor(mins / 60)}h ${mins % 60}m remaining`;
+        return true;
+    const exp = decodeJwtExp(auth.refreshToken);
+    if (!exp)
+        return false; // undecodable - let the refresh path decide
+    return Date.now() > exp * 1000;
 }
 export async function refreshTokenIfNeeded() {
     if (!isExpired())

package/dist/commands/db.js CHANGED Viewed

@@ -91,7 +91,10 @@ dbCommand
     else {
         const config = requireConfig();
         const res = await get(`/projects/${config.projectGuid}/databases`);
-        printList(res.data, opts, 'No databases. Create one: gipity db create <name>', db => db.friendlyName);
+        // This list is project-scoped; the account-wide database cap counts
+        // databases across ALL projects. An empty project can still be at the
+        // cap, so point at `--all` rather than implying nothing exists.
+        printList(res.data, opts, 'No databases in this project. Run `gipity db list --all` to see every database counting toward your account cap, or create one: gipity db create <name>', db => db.friendlyName);
     }
 }));
 dbCommand

package/dist/commands/deploy.js CHANGED Viewed

@@ -73,8 +73,20 @@ export const deployCommand = new Command('deploy')
         }
     }
     console.log(muted('─'.repeat(40)));
-    const hasFailed = d.phases?.some(p => p.status === 'failed');
-    if (hasFailed) {
+    const failedPhases = d.phases?.filter(p => p.status === 'failed') ?? [];
+    if (failedPhases.length > 0) {
+        // The database phase can fail on the account-wide database cap, whose
+        // server message ("Maximum of N databases reached. Drop one first.")
+        // names no command. The droppable databases live in OTHER projects, so
+        // the default project-scoped `gipity db list` shows nothing — point the
+        // caller straight at the account-wide list + drop path so they don't
+        // dead-end (or reach for raw DB access) to free a slot.
+        if (failedPhases.some(p => /databases? reached|database (cap|limit)/i.test(p.summary))) {
+            console.log('');
+            console.log(muted('Free a slot under the account database cap:'));
+            console.log(`  ${brand('gipity db list --all')}            ${muted('# every database counting toward the cap, by project')}`);
+            console.log(`  ${brand('gipity db drop <name> --project <slug>')} ${muted('# drop one from another project')}`);
+        }
         console.log(clrError(`Deploy failed`) + muted(` (${d.elapsedMs}ms)`));
         process.exit(1);
     }

package/dist/commands/init.js CHANGED Viewed

@@ -2,36 +2,38 @@ import { Command } from 'commander';
 import { basename, resolve, dirname } from 'path';
 import { existsSync } from 'fs';
 import { getAccountSlug } from '../api.js';
-import { getConfig, getConfigPath } from '../config.js';
+import { getConfig, getConfigPath, saveConfigAt } from '../config.js';
 import { getAuth } from '../auth.js';
-import { slugify, setupClaudeHooks, setupGitignore, SUPPORTED_TOOLS } from '../setup.js';
+import { slugify, setupClaudeHooks, setupGitignore, SUPPORTED_TOOLS, DEFAULT_TOOLS, DEFAULT_SYNC_IGNORE } from '../setup.js';
 import { success, error as clrError, info, muted, bold } from '../colors.js';
 import { confirm } from '../utils.js';
 import { scanForAdoption, adoptCurrentDir, canAdoptCwd, formatBytes, formatCwdLabel, ADOPT_THRESHOLDS, } from '../adopt-cwd.js';
 const TOOL_KEYS = SUPPORTED_TOOLS.map(t => t.key);
 function resolveTools(forFlag) {
-    if (!forFlag || forFlag === 'all')
-        return SUPPORTED_TOOLS;
+    if (!forFlag)
+        return DEFAULT_TOOLS;
     const requested = forFlag.split(',').map(s => s.trim().toLowerCase()).filter(Boolean);
     const unknown = requested.filter(k => !TOOL_KEYS.includes(k) && k !== 'all');
     if (unknown.length) {
         throw new Error(`Unknown --for value(s): ${unknown.join(', ')}. Valid: ${TOOL_KEYS.join(', ')}, all`);
     }
-    if (requested.includes('all'))
-        return SUPPORTED_TOOLS;
-    return SUPPORTED_TOOLS.filter(t => requested.includes(t.key));
+    // `all` expands to the default set; an opt-in tool still joins when named
+    // alongside it (`--for all,aider`).
+    return SUPPORTED_TOOLS.filter(t => requested.includes(t.key) || (!t.optIn && requested.includes('all')));
 }
 export const initCommand = new Command('init')
     .description('Link this directory to a Gipity project (writes primer files so your AI coding tool understands Gipity)')
     .argument('[name]', 'Project name/slug (defaults to current directory name)')
     .option('--agent <guid>', 'Agent GUID to use')
-    .option('--for <tools>', `Which AI tool primer files to write (comma-separated). Default: all. Choices: ${TOOL_KEYS.join(', ')}, all`)
+    .option('--for <tools>', `Which AI tool primer files to write (comma-separated). Default: all except aider (opt-in - it also writes .aider.conf.yml). Choices: ${TOOL_KEYS.join(', ')}, all`)
     .addHelpText('after', `
 Examples:
   $ gipity init                          Link cwd as a new project (slug = dir name).
   $ gipity init my-app                   Link cwd with an explicit slug.
   $ gipity init --for codex              Write only AGENTS.md (skip Claude/Cursor/etc).
   $ gipity init --for cursor,gemini      Write only the Cursor + Gemini primers.
+  $ gipity init --for aider              AGENTS.md + a read: entry in .aider.conf.yml
+                                         (aider auto-reads nothing, so it's opt-in).
 Working with an existing Gipity project:
   - If cwd's name matches the remote project's slug, init auto-adopts it.
@@ -79,6 +81,17 @@ Working with an existing Gipity project:
                 setupClaudeHooks();
             writeAllPrimers();
             setupGitignore();
+            // The config's ignore list was frozen at link time, so a workstation
+            // artifact introduced by a newer CLI (e.g. aider's .aider.conf.yml)
+            // would sync up as project content. Union in the current defaults.
+            if (existing) {
+                const cur = existing.ignore ?? (existing.ignore = []);
+                const missing = DEFAULT_SYNC_IGNORE.filter(e => !cur.includes(e));
+                if (missing.length) {
+                    cur.push(...missing);
+                    saveConfigAt(cwd, existing);
+                }
+            }
             console.log(success(`Refreshed primer files: ${primerSummary}.`));
             return;
         }

package/dist/commands/page-eval.js CHANGED Viewed

@@ -1,8 +1,64 @@
+import { readFileSync } from 'node:fs';
 import { Command } from 'commander';
 import { post, get, ApiError } from '../api.js';
 import { brand, bold, muted, warning } from '../colors.js';
 import { run } from '../helpers/index.js';
+// Shown when an eval runs cleanly but returns nothing serializable. Turns a
+// bare/opaque `null` into a deterministic, actionable nudge so the agent shapes
+// a returnable value instead of guessing and retrying.
+export const EVAL_NO_VALUE_HINT = 'The eval ran but returned no JSON-serializable value. A statement body with no `return`, an assignment, a void call, or a DOM node/function all serialize to null. ' +
+    'End the script with an expression — or an explicit `return` — that yields plain data, e.g. `return { label: input.value, count: items.length }` or `return JSON.stringify(payload)`.';
+/** Normalize a raw eval result for display. The eval can come back as a useful
+ *  serialized value, the literal `null`/`undefined`/empty string, or — when the
+ *  script returns undefined — agent-browser's raw envelope leaking through
+ *  (`{"success":true,"data":{"origin":…,"result":null},"error":null}`). The last
+ *  two mean the same thing to the agent: no value came back. Unwrap the leaked
+ *  envelope so it never reaches the agent as an opaque blob, and flag the
+ *  no-value cases so the caller can attach EVAL_NO_VALUE_HINT. */
+export function normalizeEvalResult(raw) {
+    const trimmed = (raw ?? '').trim();
+    if (trimmed === '' || trimmed === 'null' || trimmed === 'undefined') {
+        return { result: trimmed, noValue: true };
+    }
+    // A leaked agent-browser eval envelope (only emitted when the eval returns
+    // undefined): unwrap to the inner value. Strict shape match — exact key set
+    // plus a string origin — so a genuine user object never trips this.
+    if (trimmed.startsWith('{') && trimmed.includes('"result"')) {
+        try {
+            const env = JSON.parse(trimmed);
+            const isEnvelope = env && typeof env === 'object'
+                && Object.keys(env).every((k) => k === 'success' || k === 'data' || k === 'error')
+                && env.data && typeof env.data === 'object'
+                && typeof env.data.origin === 'string' && 'result' in env.data;
+            if (isEnvelope) {
+                const inner = env.data.result;
+                if (inner == null)
+                    return { result: 'null', noValue: true };
+                return { result: typeof inner === 'string' ? inner : JSON.stringify(inner), noValue: false };
+            }
+        }
+        catch { /* not the envelope — fall through and show the raw value */ }
+    }
+    return { result: raw, noValue: false };
+}
 const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
+// A single browser session is held open synchronously for the whole --wait, so
+// the server caps it at the gateway idle timeout. Longer is impossible in one
+// shot; watching an app past 30s means several windows, not one big wait.
+export const MAX_WAIT_MS = 30_000;
+/** Parse --wait (defaulting to 500ms), clamping to the per-call cap. When the
+ *  caller asks for more than the cap, clamp and explain — to stderr, so --json
+ *  stdout stays clean — and point at the windowed watch primitive instead of
+ *  leaking the server's raw "Too big" validation error. */
+export function capWaitMs(rawWait, url) {
+    const parsed = parseInt(rawWait, 10);
+    const wait = Number.isFinite(parsed) && parsed >= 0 ? parsed : 500;
+    if (wait <= MAX_WAIT_MS)
+        return wait;
+    console.error(warning(`--wait ${wait}ms exceeds the ${MAX_WAIT_MS}ms cap (one browser session is held open synchronously; longer trips the gateway timeout) — using ${MAX_WAIT_MS}ms. ` +
+        `To watch an app that keeps changing past 30s, cover the span with staggered windows in one command: gipity page test "${url}" --clients N --stagger S.`));
+    return MAX_WAIT_MS;
+}
 /** Poll the async eval job until it finishes. Eval runs server-side as a
  *  short-lived job (so a long --wait can't trip the gateway idle timeout);
  *  we submit, then poll the result out of the job store. `expectedWorkMs` is
@@ -35,21 +91,74 @@ export async function pollEvalResult(evalJobId, expectedWorkMs) {
     }
     throw new ApiError(504, 'EVAL_TIMEOUT', 'Eval did not finish in time; narrow the expression or lower --wait');
 }
+// The in-page execution budget for an eval body's OWN runtime (its `await`/
+// `setTimeout` pauses), enforced by agent-browser's per-command CDP timeout
+// (AGENT_BROWSER_DEFAULT_TIMEOUT) — distinct from --wait, which only sleeps
+// BEFORE the eval. Used to translate the opaque timeout envelope into guidance.
+const EVAL_EXEC_BUDGET_MS = 20_000;
+/** When the eval body's own runtime overruns the in-page execution budget,
+ *  agent-browser aborts the `Runtime.evaluate` CDP call and the failure comes
+ *  back as a `{success:false, error:"CDP command timed out: Runtime.evaluate"}`
+ *  envelope that the server surfaces verbatim as the eval `result` — opaque to
+ *  the caller (no timeout named, no distinction from the page or --wait). Detect
+ *  exactly that envelope and return an actionable message; null otherwise. */
+export function evalExecTimeoutMessage(result) {
+    let parsed;
+    try {
+        parsed = JSON.parse(result);
+    }
+    catch {
+        return null;
+    }
+    if (!parsed || parsed.success !== false || typeof parsed.error !== 'string')
+        return null;
+    if (!/CDP command timed out:\s*Runtime\.evaluate/i.test(parsed.error))
+        return null;
+    return (`the expression hit the ~${EVAL_EXEC_BUDGET_MS / 1000}s in-page execution budget — the eval body ` +
+        `(including its own await/setTimeout pauses) ran longer than that. This budget is the time the ` +
+        `expression itself is allowed to run; it is separate from --wait, which only sleeps BEFORE the eval ` +
+        `and cannot extend it. Split a long interactive check into several shorter 'page eval' calls (e.g. ` +
+        `one per state to verify), keeping each body's in-page waits well under ${EVAL_EXEC_BUDGET_MS / 1000}s.`);
+}
 // The long-tail escape hatch alongside `page inspect`'s fixed bundle: when the
 // curated metrics don't cover what you need (computed styles, element rects,
 // visibility, z-index stacks), eval an expression in page context and get the
 // serialized result back. Runs in the same browser sandbox as inspect.
+//
+// The body runs as an async function, so it can be an inline expression OR a
+// multi-statement script with `return`/`await`. Pass a saved script with
+// --file to functionally exercise a page's own code paths headlessly (drive
+// tools, undo/redo, transforms) and `return` a JSON-serializable result —
+// no /tmp + shell command-substitution harness needed.
 export const pageEvalCommand = new Command('eval')
-    .description('Evaluate a JS expression in a real browser on a page (DOM, computed styles, element rects)')
+    .description('Evaluate JS in a real browser on a page (DOM, computed styles, element rects; inline expr or --file script)')
     .argument('<url>', 'URL to load')
-    .argument('<expr>', 'JavaScript expression to evaluate in page context (result is JSON-serialized)')
-    .option('--wait <ms>', 'Sleep this many ms after DOMContentLoaded before evaluating (lets late async work settle)', '500')
+    .argument('[expr]', 'JavaScript to evaluate in page context (inline expression or statement body with return/await; result is JSON-serialized). Omit when using --file.')
+    .option('--file <path>', 'Read the script body from a file instead of the inline <expr> arg (mutually exclusive). Runs as an async function body, so top-level return/await work.')
+    .option('--wait <ms>', 'Sleep this many ms after DOMContentLoaded before evaluating (lets late async work settle; max 30000)', '500')
     .option('--wait-for <selector>', 'Wait until this CSS selector appears before evaluating (deterministic; replaces --wait)')
     .option('--wait-timeout <ms>', 'Max ms to wait for --wait-for before giving up', '5000')
     .option('--json', 'Output as JSON')
-    .action((url, expr, opts) => run('Page eval', async () => {
-    const parsedWait = parseInt(opts.wait, 10);
-    const waitMs = Number.isFinite(parsedWait) && parsedWait >= 0 ? parsedWait : 500;
+    .action((url, exprArg, opts) => run('Page eval', async () => {
+    // Arg-shape errors go through commander's error() so the enableHelpAfterError
+    // hook renders this command's help inline with the one-line error LAST
+    // (survives `| tail`), same as commander-detected errors like a missing url.
+    if (exprArg !== undefined && opts.file) {
+        pageEvalCommand.error('error: Pass either an inline <expr> arg or --file <path>, not both');
+    }
+    if (exprArg === undefined && !opts.file) {
+        pageEvalCommand.error('error: Provide an inline <expr> arg or --file <path>');
+    }
+    let expr = exprArg;
+    if (opts.file) {
+        try {
+            expr = readFileSync(opts.file, 'utf8');
+        }
+        catch {
+            pageEvalCommand.error(`error: Cannot read file: ${opts.file}`);
+        }
+    }
+    const waitMs = capWaitMs(opts.wait, url);
     const parsedTimeout = parseInt(opts.waitTimeout, 10);
     const waitForTimeoutMs = Number.isFinite(parsedTimeout) && parsedTimeout >= 0 ? parsedTimeout : 5000;
     const kickoff = await post('/tools/browser/eval', {
@@ -58,16 +167,22 @@ export const pageEvalCommand = new Command('eval')
         waitForTimeoutMs: opts.waitFor ? waitForTimeoutMs : undefined,
     });
     const d = await pollEvalResult(kickoff.data.evalJobId, waitMs);
+    const { result, noValue } = normalizeEvalResult(d.result);
+    const execTimeout = evalExecTimeoutMessage(d.result);
+    if (execTimeout)
+        throw new Error(execTimeout);
     if (opts.json) {
-        console.log(JSON.stringify(d));
+        console.log(JSON.stringify(noValue ? { ...d, result, hint: EVAL_NO_VALUE_HINT } : { ...d, result }));
         return;
     }
     console.log(`${brand('Eval')} ${bold(d.url || url)}`);
     if (d.navigationIncomplete) {
         console.log(`${warning('⚠ Navigation incomplete:')} ${d.note || 'page did not reach full load'}`);
     }
-    console.log(`${muted('Expression:')} ${expr}`);
-    console.log(`\n${d.result || muted('(empty result)')}`);
+    console.log(opts.file ? `${muted('Script:')} ${opts.file}` : `${muted('Expression:')} ${expr}`);
+    console.log(`\n${result.trim() ? result : muted('(empty result)')}`);
+    if (noValue)
+        console.log(muted(`\n${EVAL_NO_VALUE_HINT}`));
     if (d.truncated)
         console.log(muted('\n(result truncated to fit context - narrow the expression for the full value)'));
 }));
@@ -77,6 +192,17 @@ export const pageEvalCommand = new Command('eval')
 // concurrent `page test --observe` instead, which overlaps N clients and reports
 // whether they actually ran together.
 pageEvalCommand.addHelpText('after', `
+Examples:
+  gipity page eval "https://dev.gipity.ai/me/app/" "document.title"
+  # Functionally test a page's own code paths: save a script that drives the UI
+  # and returns a JSON-serializable result, then run it (no /tmp + shell quoting):
+  gipity page eval "https://dev.gipity.ai/me/app/" --file ./tests/draw-flow.js --json
+The eval body runs under a ~20s in-page execution budget (its own await/setTimeout
+pauses count; --wait only sleeps BEFORE the eval and does not extend it). For a long
+interactive sequence, split it into several shorter evals (one per state to verify)
+rather than one body with many long waits.
 Testing realtime/shared state across clients?
   Separate 'page eval' calls run sequentially (one finishes before the next
   starts), so they never overlap and will each see only themselves - a false

package/dist/commands/page-inspect.js CHANGED Viewed

@@ -3,6 +3,9 @@ import { post } from '../api.js';
 import { formatSize } from '../utils.js';
 import { brand, bold, error as clrError, warning, muted, info } from '../colors.js';
 import { run } from '../helpers/index.js';
+import { capWaitMs } from './page-eval.js';
+/** A console line is an error-level entry (page error or console.error). */
+const isErrorLine = (line) => /^error:/i.test(line);
 function shortUrl(url, truncate = true, maxLen = 100) {
     let result;
     try {
@@ -22,7 +25,7 @@ function shortUrl(url, truncate = true, maxLen = 100) {
 export const pageInspectCommand = new Command('inspect')
     .description('Inspect a web page (console, failed resources, timing, layout overflow)')
     .argument('<url>', 'URL to inspect')
-    .option('--wait <ms>', 'Sleep this many ms after DOMContentLoaded before capturing (lets late async/LCP work settle)', '500')
+    .option('--wait <ms>', 'Sleep this many ms after DOMContentLoaded before capturing (lets late async/LCP work settle; max 30000)', '500')
     .option('--wait-for <selector>', 'Wait until this CSS selector appears before capturing (deterministic; replaces --wait)')
     .option('--wait-timeout <ms>', 'Max ms to wait for --wait-for before giving up', '5000')
     .option('--json', 'Output as JSON')
@@ -39,21 +42,41 @@ export const pageInspectCommand = new Command('inspect')
         process.exit(1);
     }
     return run('Page inspect', async () => {
-        const parsedWait = parseInt(opts.wait, 10);
-        const waitMs = Number.isFinite(parsedWait) && parsedWait >= 0 ? parsedWait : 500;
+        const waitMs = capWaitMs(opts.wait, url);
         const parsedTimeout = parseInt(opts.waitTimeout, 10);
         const waitForTimeoutMs = Number.isFinite(parsedTimeout) && parsedTimeout >= 0 ? parsedTimeout : 5000;
         const truncate = opts.truncate !== false;
         const showAll = opts.all === true;
-        const res = await post(`/tools/browser/inspect`, {
+        const inspectBody = {
             url, waitMs,
             waitForSelector: opts.waitFor || undefined,
             waitForTimeoutMs: opts.waitFor ? waitForTimeoutMs : undefined,
             fakeMedia: opts.fakeMedia || undefined,
-        });
+        };
+        const res = await post(`/tools/browser/inspect`, inspectBody);
         const b = res.data;
+        // Self-verify console errors before flagging them. A freshly-deployed page's
+        // first hit can throw a one-time, non-reproducible error — typically a
+        // cross-origin "Script error." with no message/stack from a CDN asset still
+        // propagating — and reporting it as a real defect sends agents chasing a
+        // phantom. So when the first probe reports error-level console lines, re-probe
+        // once (the sticky session is now warm) and keep only the errors that recur;
+        // errors seen on a single probe are surfaced separately as transient noise.
+        let transientErrors = [];
+        if ((b.console || []).some(isErrorLine)) {
+            try {
+                const verify = await post(`/tools/browser/inspect`, inspectBody);
+                const recurring = new Set((verify.data.console || []).filter(isErrorLine));
+                transientErrors = (b.console || []).filter((l) => isErrorLine(l) && !recurring.has(l));
+                b.console = (b.console || []).filter((l) => !isErrorLine(l) || recurring.has(l));
+            }
+            catch {
+                // Re-probe failed (timeout / browser error) — report the first probe's
+                // console as-is rather than hiding anything.
+            }
+        }
         if (opts.json) {
-            console.log(JSON.stringify(b));
+            console.log(JSON.stringify(transientErrors.length ? { ...b, transientConsole: transientErrors } : b));
             return;
         }
         const timing = b.timing || { ttfb: 0, domReady: 0, load: 0 };
@@ -83,6 +106,14 @@ export const pageInspectCommand = new Command('inspect')
         else {
             console.log(`\n${bold('Console:')} ${muted('(clean)')}`);
         }
+        // ── Transient console errors (seen on first probe, gone on re-probe) ──
+        if (transientErrors.length > 0) {
+            console.log(`\n${bold('Transient console errors')} ${muted(`(${transientErrors.length}, not reproduced on re-probe)`)}:`);
+            for (const line of transientErrors) {
+                console.log(muted(line));
+            }
+            console.log(muted('One-time cold-load artifact (first hit of freshly-deployed assets, or a cross-origin script) — not reproducible, not in your app code. Ignore unless it recurs.'));
+        }
         // ── Failed Resources ──
         // Browsers auto-request /favicon.ico at the site root for every page, so a
         // 404 there isn't a resource the page actually links — it's noise on any

package/dist/commands/page-screenshot.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Command, Option } from 'commander';
 import { mkdirSync, writeFileSync } from 'fs';
-import { join, resolve as resolvePath } from 'path';
+import { dirname, join, resolve as resolvePath } from 'path';
 import { postForTarEntries } from '../api.js';
 import { getProjectRoot } from '../config.js';
 import { brand, bold, muted, success } from '../colors.js';
@@ -96,7 +96,10 @@ function appendOption(value, previous = []) {
 export const pageScreenshotCommand = new Command('screenshot')
     .description('Screenshot a web page')
     .argument('<url>', 'URL to screenshot')
-    .option('--post-load-delay <ms>', 'Delay after DOMContentLoaded before capture, in ms', '1000')
+    // No commander default: a default here makes opts.postLoadDelay always set,
+    // so the `?? opts.wait` merge below would never see the --wait alias. Default
+    // is applied in the merge instead.
+    .option('--post-load-delay <ms>', 'Delay after DOMContentLoaded before capture, in ms (default: 1000)')
     .option('--full', 'Capture the full scrollable page (default: viewport only)')
     .option('-o, --output <file>', 'Output path (single viewport only; default .gipity/screenshots/ss-<host>-<timestamp>.png)')
     .option('--device <names>', `Viewport preset(s): ${Object.keys(DEVICE_PRESETS).join(', ')} (comma-separated or repeat flag)`, appendOption, [])
@@ -110,7 +113,10 @@ export const pageScreenshotCommand = new Command('screenshot')
     // rather than reject it as an unknown option and send them on a --help detour.
     .addOption(new Option('--full-page', 'Alias for --full').hideHelp())
     .action((url, opts) => run('Page screenshot', async () => {
-    const delayRaw = opts.postLoadDelay ?? opts.wait;
+    // --wait is a hidden alias for --post-load-delay (agents reach for it because
+    // sibling `page inspect`/`eval` name the flag --wait). Canonical name wins if
+    // both given; fall back to the 1000ms default when neither is set.
+    const delayRaw = opts.postLoadDelay ?? opts.wait ?? '1000';
     const postLoadDelayMs = delayRaw !== undefined ? parseInt(String(delayRaw), 10) : undefined;
     if (postLoadDelayMs !== undefined && (!Number.isFinite(postLoadDelayMs) || postLoadDelayMs < 0)) {
         throw new Error('--post-load-delay must be a non-negative integer (ms)');
@@ -147,8 +153,6 @@ export const pageScreenshotCommand = new Command('screenshot')
     const slug = slugFromUrl(url);
     const ts = timestampSlug();
     const dir = defaultScreenshotDir();
-    if (!opts.output)
-        mkdirSync(dir, { recursive: true });
     const savedFiles = [];
     for (let i = 0; i < pngs.length; i++) {
         const shot = meta.screenshots[i];
@@ -156,6 +160,10 @@ export const pageScreenshotCommand = new Command('screenshot')
         const target = opts.output
             ? opts.output
             : join(dir, defaultFilename(slug, ts, suffix));
+        // Create the target's parent dir so a `-o` path under a not-yet-existing
+        // directory (e.g. .gipity/screenshots/home.png) writes cleanly instead of
+        // failing with a raw ENOENT and forcing a manual `mkdir -p`.
+        mkdirSync(dirname(target), { recursive: true });
         writeFileSync(target, pngs[i].buffer);
         // Absolute path so the agent knows exactly where the file landed.
         savedFiles.push(resolvePath(target));

package/dist/commands/page-test.js CHANGED Viewed

@@ -18,12 +18,33 @@ async function inspectClient(url, waitMs, i) {
 }
 const MAX_HOLD_MS = 15_000; // keep each in-page await under the ~20s browser action timeout
 const MIN_HOLD_MS = 1_000;
-/** Splice per-client values into a user expression. `{{label}}` → the client's
- *  label, `{{i}}` → its 0-based index. Plain string replace (no regex) so the
- *  expression's own characters are never treated as patterns. */
+/** Splice per-client values into a user string (URL, --action, or --observe).
+ *  `{{label}}` → the client's label, `{{i}}` → its 0-based index. Plain string
+ *  replace (no regex) so the string's own characters are never treated as
+ *  patterns. */
 function subst(expr, label, i) {
     return expr.split('{{label}}').join(label).split('{{i}}').join(String(i));
 }
+/** Collect any `{{...}}` placeholders the runner does NOT recognize, so an
+ *  invented token (e.g. `{{name}}`) is flagged instead of passing through
+ *  verbatim into every client's URL/expression. */
+function unknownTokens(...strings) {
+    const out = new Set();
+    for (const s of strings) {
+        for (const m of (s ?? '').match(/\{\{[^}]*\}\}/g) ?? []) {
+            if (m !== '{{i}}' && m !== '{{label}}')
+                out.add(m);
+        }
+    }
+    return [...out];
+}
+/** One-time warning (to stderr, so --json stdout stays clean) for
+ *  unrecognized placeholders left as-is. */
+function warnUnknownTokens(unknown) {
+    if (unknown.length === 0)
+        return;
+    console.error(warning(`⚠ Unrecognized placeholder ${unknown.join(', ')} left as-is — only {{i}} (0-based client index) and {{label}} are substituted per client. Set per-client values with --labels and reference them as {{label}}.`));
+}
 /** Build the statement-body script one client runs: do the one-time action,
  *  then sample `observe` `samples` times across `holdMs`, stamping in-page
  *  start/end so the caller can confirm the clients overlapped. */
@@ -98,11 +119,22 @@ function fmtSamples(samples) {
 async function runInteractive(url, observe, opts) {
     const clients = Math.max(1, parseInt(opts.clients, 10) || 2);
     const stagger = opts.stagger != null ? Math.max(0, parseInt(opts.stagger, 10) || 0) : 0;
-    const hold = Math.min(MAX_HOLD_MS, Math.max(MIN_HOLD_MS, parseInt(opts.hold, 10) || 8000));
+    const rawHold = parseInt(opts.hold, 10) || 8000;
+    const hold = Math.min(MAX_HOLD_MS, Math.max(MIN_HOLD_MS, rawHold));
+    if (rawHold > MAX_HOLD_MS) {
+        // Surface the clamp (to stderr, so --json stdout stays clean) instead of
+        // leaving the agent to infer it from the printed "hold Nms" line.
+        console.error(warning(`--hold ${rawHold}ms exceeds the ${MAX_HOLD_MS}ms per-client cap (each client samples inside one browser eval, bounded by the server's eval budget) — using ${MAX_HOLD_MS}ms. ` +
+            `Co-launch every role in this one command (put {{label}}/{{i}} in the URL) so all clients overlap for the whole window; a separately-started background client overlaps only the sliver of its window that lines up.`));
+    }
     const samples = Math.min(30, Math.max(2, parseInt(opts.samples, 10) || 6));
     const settle = opts.waitFor ? 200 : 1000;
     const labels = (opts.labels ? String(opts.labels).split(',').map((s) => s.trim()) : []).filter(Boolean);
     const labelFor = (i) => labels[i] ?? `client-${i}`;
+    // Only {{label}} and {{i}} are substituted. Warn once on any other {{token}}
+    // (a natural guess like {{name}} or {{index}}) so it isn't sent literally to
+    // every client — the silent wrong-behavior trap of identical clients.
+    warnUnknownTokens(unknownTokens(url, opts.action, observe));
     if (!opts.json) {
         console.log(`${brand('Page test')} ${muted('(interactive)')} ${bold(url)}`);
         console.log(muted(`${clients} client(s), stagger ${stagger}s, hold ${hold}ms, ${samples} samples each`));
@@ -113,8 +145,12 @@ async function runInteractive(url, observe, opts) {
             await sleep(i * stagger * 1000);
             if (!opts.json)
                 console.log(muted(`client ${i} (${labelFor(i)}) joining`));
+            // {{label}}/{{i}} substitute into the URL too, so one invocation can launch
+            // asymmetric roles concurrently (e.g. ?role={{label}} with --labels host,join)
+            // and the overlap check still confirms they coexisted.
+            const clientUrl = subst(url, labelFor(i), i);
             const expr = buildHarness(opts.action ? subst(opts.action, labelFor(i), i) : undefined, subst(observe, labelFor(i), i), labelFor(i), hold, samples);
-            return observeClient(url, expr, i, labelFor(i), settle, hold, opts.waitFor);
+            return observeClient(clientUrl, expr, i, labelFor(i), settle, hold, opts.waitFor);
         })());
     }
     const results = (await Promise.all(runs)).sort((a, b) => a.i - b.i);
@@ -159,6 +195,9 @@ async function runPassive(url, opts) {
     const clients = Math.max(1, parseInt(opts.clients, 10) || 2);
     const stagger = opts.stagger != null ? Math.max(0, parseInt(opts.stagger, 10) || 0) : 12;
     const wait = Math.min(30000, Math.max(2000, parseInt(opts.wait, 10) || 24000));
+    const labels = (opts.labels ? String(opts.labels).split(',').map((s) => s.trim()) : []).filter(Boolean);
+    const labelFor = (i) => labels[i] ?? `client-${i}`;
+    warnUnknownTokens(unknownTokens(url));
     if (!opts.json) {
         console.log(`${brand('Page test')} ${bold(url)}`);
         console.log(`${muted(`${clients} client(s), stagger ${stagger}s, ${wait}ms open each`)}`);
@@ -169,7 +208,7 @@ async function runPassive(url, opts) {
             await sleep(i * stagger * 1000);
             if (!opts.json)
                 console.log(`${muted(`client ${i}${i === 0 ? ' (first)' : ''} starting`)}`);
-            return inspectClient(url, wait, i);
+            return inspectClient(subst(url, labelFor(i), i), wait, i);
         })());
     }
     const results = (await Promise.all(runs)).sort((a, b) => a.i - b.i);
@@ -222,14 +261,14 @@ async function runPassive(url, opts) {
 //  just because the clients never actually ran together.
 export const pageTestCommand = new Command('test')
     .description('Multi-client realtime check: load a URL in N concurrent headless clients; flag console errors, or drive an action and observe shared state (--observe)')
-    .argument('<url>', 'Deployed URL to load in every client')
+    .argument('<url>', 'Deployed URL to load in every client. {{label}}/{{i}} substitute per client in both modes (e.g. ?name=Bot{{i}}, or ?role={{label}} with --labels host,join), so one invocation can give each client a distinct role.')
     .option('--clients <n>', 'Number of headless clients to launch', '2')
     .option('--stagger <s>', 'Seconds between client starts (passive default 12; interactive default 0)')
     .option('--wait <ms>', 'Passive mode: ms each client stays open after load (max 30000)', '24000')
     // Interactive mode (--observe drives it):
     .option('--observe <expr>', 'Interactive: JS expression sampled in each client to read shared state (e.g. presence count). Switches on interactive mode.')
     .option('--action <expr>', 'Interactive: one-time JS run in each client before observing (e.g. fill a name + submit). {{label}}/{{i}} are substituted per client.')
-    .option('--labels <csv>', 'Interactive: per-client labels substituted for {{label}} (default client-0, client-1, …)')
+    .option('--labels <csv>', 'Per-client labels substituted for {{label}} in the URL/--action/--observe (default client-0, client-1, …)')
     .option('--hold <ms>', `Interactive: total observe window per client (${MIN_HOLD_MS}-${MAX_HOLD_MS}ms)`, '8000')
     .option('--samples <k>', 'Interactive: number of observations across the hold window (2-30)', '6')
     .option('--wait-for <selector>', 'Interactive: wait for this CSS selector before running --action (deterministic readiness gate)')
@@ -239,12 +278,22 @@ Examples:
   # Passive: load in 3 staggered clients, flag console errors
   gipity page test "https://dev.gipity.ai/me/app/" --clients 3 --stagger 8
+  # Per-client URL params: each client joins under a distinct name (Bot0, Bot1, …)
+  gipity page test "https://dev.gipity.ai/me/app/?name=Bot{{i}}" --clients 2
   # Interactive: two concurrent clients each join with a name, then watch the
   # live presence count. The command confirms the clients actually overlapped.
   gipity page test "https://dev.gipity.ai/me/app/" --clients 2 \\
     --action "document.querySelector('#name').value='{{label}}'; document.querySelector('form').requestSubmit();" \\
     --observe "document.querySelectorAll('.present').length" \\
-    --labels Alice,Bob`)
+    --labels Alice,Bob
+  # Asymmetric roles in ONE invocation: {{label}} in the URL routes client 0 to
+  # host and client 1 to join. They overlap in time (verified), so the joiner
+  # observes the live state the host is driving — no background-process dance.
+  gipity page test "https://dev.gipity.ai/me/app/?test-action={{label}}" --clients 2 \\
+    --labels host,join \\
+    --observe "document.querySelector('[data-screen]')?.dataset.screen"`)
     .action((url, opts) => run('Page test', async () => {
     if (opts.observe) {
         await runInteractive(url, opts.observe, opts);