npm - llm-cli-gateway - Versions diffs - 1.17.0 → 1.17.2 - Mend

llm-cli-gateway 1.17.0 → 1.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +28 -0
package/README.md +16 -19
package/dist/cache-stats.d.ts +47 -0
package/dist/cache-stats.js +85 -2
package/dist/config.js +1 -1
package/dist/doctor.d.ts +22 -1
package/dist/doctor.js +35 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +123 -39
package/dist/process-monitor.d.ts +1 -2
package/dist/process-monitor.js +7 -7
package/dist/prompt-parts.d.ts +1 -1
package/dist/prompt-parts.js +1 -1
package/dist/provider-login-guidance.js +5 -5
package/dist/provider-status.js +0 -4
package/dist/request-helpers.d.ts +28 -26
package/dist/request-helpers.js +50 -43
package/dist/session-manager.js +1 -1
package/dist/stream-json-parser.js +30 -15
package/dist/upstream-contracts.d.ts +24 -0
package/dist/upstream-contracts.js +213 -18
package/dist/validation-tools.js +1 -1
package/package.json +11 -8
package/setup/status.schema.json +31 -0
package/socket.yml +8 -8

package/dist/request-helpers.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { existsSync, unlinkSync, writeFileSync } from "fs";
 import { tmpdir } from "os";
 import { join, isAbsolute } from "path";
 import { randomUUID } from "crypto";
-import { z } from "zod";
+import { z } from "zod/v3";
 /** Prefix for gateway-generated session IDs. Enforces provenance structurally. */
 export const GATEWAY_SESSION_PREFIX = "gw-";
 /**
@@ -262,57 +262,54 @@ export const GEMINI_APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
  */
 export const CODEX_SANDBOX_MODES = ["read-only", "workspace-write", "danger-full-access"];
 /**
- * Codex approval modes (for `--ask-for-approval <mode>`).
+ * Deprecated Codex approval modes. Current Codex no longer exposes an
+ * `--ask-for-approval` flag; the MCP input is temporarily retained so older
+ * callers do not fail schema validation, but it emits no CLI argv.
  */
 export const CODEX_ASK_FOR_APPROVAL_MODES = ["untrusted", "on-request", "never"];
 /**
- * Resolve Codex `--sandbox` / `--ask-for-approval` args from the modern
- * params + legacy `fullAuto` shorthand.
+ * Resolve current Codex sandbox args from the modern params + legacy
+ * `fullAuto` shorthand. Current Codex exposes `--sandbox`, but no longer
+ * exposes `--ask-for-approval` or `--full-auto`.
  *
  * Precedence:
- *   1. If `useLegacyFullAutoFlag && fullAuto`, emit `--full-auto` directly
- *      (escape hatch; deprecated).
- *   2. Else explicit `sandboxMode` / `askForApproval` always emit their
- *      flags. If `fullAuto: true` is set alongside, a warning is attached
- *      and the explicit values win.
- *   3. Else if `fullAuto: true`, expand to
- *      `--sandbox workspace-write --ask-for-approval never`.
+ *   1. Explicit `sandboxMode` emits `--sandbox <mode>`.
+ *   2. Else if `fullAuto: true`, expand to `--sandbox workspace-write`.
+ *   3. Deprecated `askForApproval` and `useLegacyFullAutoFlag` emit no argv
+ *      and return warnings for callers to surface/log.
  *   4. Else emit nothing.
  */
 export function resolveCodexSandboxFlags(input) {
     const { sandboxMode, askForApproval, fullAuto, useLegacyFullAutoFlag } = input;
-    // deprecated: prefer sandboxMode + askForApproval; will be removed after Mistral GA.
-    if (useLegacyFullAutoFlag && fullAuto) {
-        return { args: ["--full-auto"] };
-    }
-    const explicit = Boolean(sandboxMode || askForApproval);
-    if (explicit) {
-        const args = [];
-        if (sandboxMode)
-            args.push("--sandbox", sandboxMode);
-        if (askForApproval)
-            args.push("--ask-for-approval", askForApproval);
-        const warning = fullAuto
-            ? "fullAuto was set alongside explicit sandboxMode/askForApproval; explicit values win. fullAuto is deprecated."
-            : undefined;
-        return { args, warning };
-    }
-    if (fullAuto) {
-        return {
-            args: ["--sandbox", "workspace-write", "--ask-for-approval", "never"],
-        };
+    const args = [];
+    const warnings = [];
+    if (useLegacyFullAutoFlag) {
+        warnings.push("useLegacyFullAutoFlag is deprecated and ignored because current Codex no longer accepts --full-auto.");
     }
-    return { args: [] };
+    if (askForApproval) {
+        warnings.push("askForApproval is deprecated and ignored because current Codex no longer accepts --ask-for-approval.");
+    }
+    if (sandboxMode) {
+        args.push("--sandbox", sandboxMode);
+        if (fullAuto) {
+            warnings.push("fullAuto was set alongside explicit sandboxMode; sandboxMode wins. fullAuto is deprecated.");
+        }
+    }
+    else if (fullAuto) {
+        args.push("--sandbox", "workspace-write");
+    }
+    return { args, warning: warnings.length > 0 ? warnings.join(" ") : undefined };
 }
 /**
  * Flags that `codex exec resume` rejects (the original session's policy is
  * inherited). Callers must drop these when building resume argv.
  *
- * Verified against `codex exec resume --help` (codex-cli 0.133.0):
- * `--full-auto`, `--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`, and
- * `--search` are rejected. `--output-schema` and `-c key=value` ARE accepted
- * on resume and therefore are NOT in this filter (Phase 4 slice α restored
- * the previously-silent drop of those two).
+ * Verified against `codex exec resume --help` (codex-cli 0.135.0):
+ * `--sandbox`, `--add-dir`, `-C`, `--cd`, `--profile`, and `--search` are rejected.
+ * Deprecated `--full-auto` / `--ask-for-approval` are kept here defensively so
+ * legacy pre-filtered segments are stripped instead of reaching spawn.
+ * `--output-schema` and `-c key=value` ARE accepted on resume and therefore are
+ * NOT in this filter (Phase 4 slice α restored the previously-silent drop of those two).
  */
 export const CODEX_RESUME_FILTERED_FLAGS = new Set([
     "--full-auto",
@@ -320,6 +317,8 @@ export const CODEX_RESUME_FILTERED_FLAGS = new Set([
     "--ask-for-approval",
     "--add-dir",
     "-C",
+    "--cd",
+    "--profile",
     "--search",
 ]);
 /**
@@ -331,13 +330,15 @@ const CODEX_RESUME_FILTERED_FLAGS_WITH_VALUE = new Set([
     "--ask-for-approval",
     "--add-dir",
     "-C",
+    "--cd",
+    "--profile",
 ]);
 /**
  * Strip resume-incompatible flag/value pairs from a Codex argv segment.
  *
  * Bare flags (`--full-auto`, `--search`) drop without consuming a value.
- * Value-taking flags (`--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`,
- * `--output-schema`) drop together with their immediately-following value.
+ * Value-taking flags (`--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`, `--cd`,
+ * `--profile`) drop together with their immediately-following value.
  */
 export function filterCodexResumeFlags(args) {
     const out = [];
@@ -371,7 +372,7 @@ export const CLAUDE_EFFORT_LEVELS = ["low", "medium", "high", "xhigh", "max"];
 export const CLAUDE_HIGH_IMPACT_PARAMS_SCHEMA = z
     .object({
     agent: z.string().optional(),
-    agents: z.record(z.record(z.unknown())).optional(),
+    agents: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
     forkSession: z.boolean().optional(),
     systemPrompt: z.string().optional(),
     appendSystemPrompt: z.string().optional(),
@@ -549,7 +550,7 @@ export function findMissingImagePath(images) {
  * params before they reach `prepareCodexRequest`.
  */
 export const CODEX_HIGH_IMPACT_PARAMS_SCHEMA = z.object({
-    outputSchema: z.union([z.string(), z.record(z.unknown())]).optional(),
+    outputSchema: z.union([z.string(), z.record(z.string(), z.unknown())]).optional(),
     search: z.boolean().optional(),
     profile: z.string().optional(),
     configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA,
@@ -578,8 +579,9 @@ export function prepareCodexHighImpactFlags(input) {
         args.push("--output-schema", schema.path);
         cleanup = schema.cleanup;
     }
+    const warnings = [];
     if (input.search) {
-        args.push("--search");
+        warnings.push("search is deprecated and ignored because current Codex exec no longer accepts --search.");
     }
     if (input.profile) {
         args.push("--profile", input.profile);
@@ -599,7 +601,12 @@ export function prepareCodexHighImpactFlags(input) {
     if (input.ignoreRules) {
         args.push("--ignore-rules");
     }
-    return { args, cleanup, missingImagePath: null };
+    return {
+        args,
+        cleanup,
+        missingImagePath: null,
+        warning: warnings.length > 0 ? warnings.join(" ") : undefined,
+    };
 }
 export function prepareCodexForkRequest(input) {
     const { prompt, sessionId, forkLast } = input;

package/dist/session-manager.js CHANGED Viewed

@@ -76,7 +76,7 @@ export class FileSessionManager {
                 const data = readFileSync(this.storagePath, "utf-8");
                 this.storage = JSON.parse(data);
             }
-            catch (error) {
+            catch {
                 // If file is corrupted, start fresh
                 this.storage = { sessions: {}, activeSession: createEmptyActiveSessions() };
             }

package/dist/stream-json-parser.js CHANGED Viewed

@@ -4,6 +4,15 @@
  * Each line of stdout is a complete JSON object. This parser extracts the
  * final result text, cost, usage, and metadata from the stream.
  */
+function stringOrNull(value) {
+    return typeof value === "string" ? value : null;
+}
+function numberOrNull(value) {
+    return typeof value === "number" && Number.isFinite(value) ? value : null;
+}
+function numberOrZero(value) {
+    return typeof value === "number" && Number.isFinite(value) ? value : 0;
+}
 /**
  * Parse completed NDJSON stdout from `claude --output-format stream-json --include-partial-messages`.
  *
@@ -30,6 +39,9 @@ export function parseStreamJson(stdout) {
             // Skip malformed lines
             continue;
         }
+        if (!parsed || typeof parsed !== "object") {
+            continue;
+        }
         if (parsed.type === "result") {
             resultEvent = parsed;
         }
@@ -44,21 +56,21 @@ export function parseStreamJson(stdout) {
     if (resultEvent) {
         const usage = resultEvent.usage
             ? {
-                inputTokens: resultEvent.usage.input_tokens ?? 0,
-                outputTokens: resultEvent.usage.output_tokens ?? 0,
-                cacheReadInputTokens: resultEvent.usage.cache_read_input_tokens ?? 0,
-                cacheCreationInputTokens: resultEvent.usage.cache_creation_input_tokens ?? 0,
+                inputTokens: numberOrZero(resultEvent.usage.input_tokens),
+                outputTokens: numberOrZero(resultEvent.usage.output_tokens),
+                cacheReadInputTokens: numberOrZero(resultEvent.usage.cache_read_input_tokens),
+                cacheCreationInputTokens: numberOrZero(resultEvent.usage.cache_creation_input_tokens),
             }
             : null;
         return {
-            text: resultEvent.result ?? "",
-            costUsd: resultEvent.total_cost_usd ?? null,
+            text: typeof resultEvent.result === "string" ? resultEvent.result : "",
+            costUsd: numberOrNull(resultEvent.total_cost_usd),
             usage,
-            sessionId: resultEvent.session_id ?? systemEvent?.session_id ?? null,
-            model: systemEvent?.model ?? resultEvent.model ?? null,
-            durationApiMs: resultEvent.duration_api_ms ?? null,
+            sessionId: stringOrNull(resultEvent.session_id) ?? stringOrNull(systemEvent?.session_id),
+            model: stringOrNull(systemEvent?.model) ?? stringOrNull(resultEvent.model),
+            durationApiMs: numberOrNull(resultEvent.duration_api_ms),
             isError: resultEvent.is_error === true,
-            numTurns: resultEvent.num_turns ?? null,
+            numTurns: numberOrNull(resultEvent.num_turns),
         };
     }
     // Fallback: extract text from assistant event
@@ -67,7 +79,10 @@ export function parseStreamJson(stdout) {
         let text = "";
         if (message?.content && Array.isArray(message.content)) {
             text = message.content
-                .filter((block) => block.type === "text")
+                .filter((block) => block &&
+                typeof block === "object" &&
+                block.type === "text" &&
+                typeof block.text === "string")
                 .map((block) => block.text)
                 .join("");
         }
@@ -75,8 +90,8 @@ export function parseStreamJson(stdout) {
             text,
             costUsd: null,
             usage: null,
-            sessionId: systemEvent?.session_id ?? null,
-            model: systemEvent?.model ?? message?.model ?? null,
+            sessionId: stringOrNull(systemEvent?.session_id),
+            model: stringOrNull(systemEvent?.model) ?? stringOrNull(message?.model),
             durationApiMs: null,
             isError: false,
             numTurns: null,
@@ -87,8 +102,8 @@ export function parseStreamJson(stdout) {
         text: "",
         costUsd: null,
         usage: null,
-        sessionId: systemEvent?.session_id ?? null,
-        model: systemEvent?.model ?? null,
+        sessionId: stringOrNull(systemEvent?.session_id),
+        model: stringOrNull(systemEvent?.model),
         durationApiMs: null,
         isError: false,
         numTurns: null,

package/dist/upstream-contracts.d.ts CHANGED Viewed

@@ -93,6 +93,20 @@ export declare function validateUpstreamCliArgs(cli: CliType, args: readonly str
 export declare function assertUpstreamCliArgs(cli: CliType, args: readonly string[]): void;
 export declare function validateUpstreamCliEnv(cli: CliType, env: Record<string, string> | undefined): ContractValidationResult;
 export declare function assertUpstreamCliEnv(cli: CliType, env: Record<string, string> | undefined): void;
+/**
+ * Best-effort, advisory-only extraction of long-form flags from raw --help text.
+ * Returns a sorted array of unique `--foo-bar` style flags discovered in the output.
+ *
+ * Heuristics:
+ * - Matches common option declaration lines emitted by clap, yargs, commander, custom TUIs, etc.
+ * - Lowercases for stable comparison against our contract keys.
+ * - Intentionally conservative: ignores obvious noise (URLs, prose in descriptions).
+ *
+ * This powers the bidirectional drift detector (extra flags the installed binary
+ * advertises that our contract does not yet allow). It is NEVER used for argv
+ * validation — only for the upstream scanner and `upstream_contracts` probe reports.
+ */
+export declare function extractDiscoveredFlags(helpText: string): readonly string[];
 export interface InstalledCliContractProbe {
     cli: CliType;
     executable: string;
@@ -101,6 +115,16 @@ export interface InstalledCliContractProbe {
     available: boolean;
     checkedHelpCommands: string[][];
     missingFlags: string[];
+    /** Flags present in the installed binary's --help but absent from the declared contract. */
+    extraFlags: readonly string[];
+    /** Sorted list of long flags discovered in the help text (for snapshot diffing). */
+    discoveredFlags: readonly string[];
+    /** Stable hash of the concatenated help output (detects subtle text changes even if flag set is stable). */
+    helpHash?: string;
+    /** Best-effort version string scraped from the help/version output (if present). */
+    versionHint?: string;
+    /** ISO timestamp when this probe was performed. */
+    probedAt: string;
     warnings: string[];
 }
 export declare function probeInstalledCliContract(cli: CliType, timeoutMs?: number): InstalledCliContractProbe;

package/dist/upstream-contracts.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { spawnSync } from "node:child_process";
+import { createHash } from "node:crypto";
 import { envWithExtendedPath, getExtendedPath, resolveCommandForSpawn } from "./executor.js";
 const PERMISSION_MODES = [
     "default",
@@ -255,12 +256,12 @@ export const UPSTREAM_CLI_CONTRACTS = {
             "workingDir",
             "addDir",
         ],
-        resumeOnlyFlags: ["--last"],
+        resumeOnlyFlags: ["--last", "--all"],
         // Phase 4 slice α (v1.8.0) verified that `codex exec resume` accepts
         // `--output-schema` and `-c` (codex-cli 0.133.0 `exec resume --help`),
-        // so they're no longer forbidden. `--search` stays forbidden (resume
-        // inherits the original session's web-search state).
-        resumeForbiddenFlags: ["--sandbox", "--ask-for-approval", "--full-auto", "--search"],
+        // so they're no longer forbidden. Current resume help does not accept
+        // session-profile or working-directory policy flags.
+        resumeForbiddenFlags: ["--sandbox", "-C", "--cd", "--add-dir", "--profile"],
         flags: {
             "--last": { arity: "none", description: "Resume latest session" },
             "--model": { arity: "one", description: "Model selector" },
@@ -269,12 +270,6 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 values: ["read-only", "workspace-write", "danger-full-access"],
                 description: "Sandbox policy",
             },
-            "--ask-for-approval": {
-                arity: "one",
-                values: ["untrusted", "on-request", "never"],
-                description: "Approval policy",
-            },
-            "--full-auto": { arity: "none", description: "Legacy full-auto shortcut" },
             "--dangerously-bypass-approvals-and-sandbox": {
                 arity: "none",
                 description: "Disable approvals and sandbox",
@@ -282,25 +277,62 @@ export const UPSTREAM_CLI_CONTRACTS = {
             "--json": { arity: "none", description: "JSONL event stream" },
             "--skip-git-repo-check": { arity: "none", description: "Allow non-git cwd" },
             "--output-schema": { arity: "one", description: "Structured output JSON schema path" },
-            "--search": { arity: "none", description: "Enable web search" },
             "--profile": { arity: "one", description: "Config profile" },
             "-c": {
                 arity: "one",
                 pattern: /^[a-zA-Z0-9._]+=([^\r\n]*)$/,
                 description: "Config override key=value",
             },
+            "--config": {
+                arity: "one",
+                pattern: /^[a-zA-Z0-9._]+=([^\r\n]*)$/,
+                description: "Config override key=value",
+            },
+            "--enable": { arity: "one", description: "Enable a Codex feature flag" },
+            "--disable": { arity: "one", description: "Disable a Codex feature flag" },
+            "--strict-config": {
+                arity: "none",
+                description: "Reject unrecognized config.toml fields",
+            },
             "--ephemeral": { arity: "none", description: "Do not persist session" },
             "-i": { arity: "one", description: "Image path" },
+            "--image": { arity: "one", description: "Image path" },
             "--ignore-user-config": { arity: "none", description: "Ignore user config" },
             "--ignore-rules": { arity: "none", description: "Ignore rule files" },
-            // The gateway only ever emits the short form `-C` (codex 0.134.0 accepts
-            // both `-C` and `--cd` as aliases). The contract registers exactly what
-            // we emit; if a future code path emits `--cd` instead, the contract
-            // check will fail loudly — which is the intended catch.
+            "--oss": { arity: "none", description: "Use open-source provider" },
+            "--local-provider": {
+                arity: "one",
+                values: ["lmstudio", "ollama"],
+                description: "Local open-source provider",
+            },
+            "--color": {
+                arity: "one",
+                values: ["always", "never", "auto"],
+                description: "Output color mode",
+            },
+            "--output-last-message": {
+                arity: "one",
+                description: "Write the final agent message to a file",
+            },
+            "--dangerously-bypass-hook-trust": {
+                arity: "none",
+                description: "Run enabled hooks without persisted hook trust",
+            },
+            "--version": { arity: "none", description: "Print version" },
+            "--all": {
+                arity: "none",
+                description: "Resume picker: show all sessions without cwd filtering",
+            },
+            // The gateway emits the short form `-C`, and the advisory contract also
+            // tracks the long `--cd` alias advertised by current Codex exec help.
             "-C": {
                 arity: "one",
                 description: "Working root for the session (Phase 4 slice ζ; new sessions only)",
             },
+            "--cd": {
+                arity: "one",
+                description: "Working root for the session",
+            },
             "--add-dir": {
                 arity: "one",
                 description: "Additional writable workspace directory (Phase 4 slice ζ; repeat once per directory; new sessions only)",
@@ -320,6 +352,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 args: ["exec", "--sandbox", "workspace", "hello"],
                 expect: "fail",
             },
+            {
+                id: "codex-ask-for-approval-unsupported",
+                description: "Current Codex CLI no longer accepts --ask-for-approval",
+                args: ["exec", "--ask-for-approval", "never", "hello"],
+                expect: "fail",
+            },
+            {
+                id: "codex-full-auto-unsupported",
+                description: "Current Codex CLI no longer accepts --full-auto",
+                args: ["exec", "--full-auto", "hello"],
+                expect: "fail",
+            },
             {
                 // Phase 4 slice α: --output-schema IS accepted on resume per
                 // codex-cli 0.133.0; this fixture pins the new behaviour so future
@@ -336,9 +380,9 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 expect: "pass",
             },
             {
-                id: "codex-resume-search-still-forbidden",
-                description: "Phase 4 slice α: --search remains forbidden on resume",
-                args: ["exec", "resume", "--search", "session-id", "hello"],
+                id: "codex-search-unsupported",
+                description: "Current Codex exec no longer accepts --search",
+                args: ["exec", "--search", "hello"],
                 expect: "fail",
             },
             {
@@ -361,6 +405,41 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 ],
                 expect: "pass",
             },
+            {
+                id: "codex-current-exec-help-surface",
+                description: "Current Codex exec advertises additional config, output, provider, and safety flags",
+                args: [
+                    "exec",
+                    "--config",
+                    "features.foo=true",
+                    "--enable",
+                    "foo",
+                    "--disable",
+                    "bar",
+                    "--strict-config",
+                    "--image",
+                    "/tmp/a.png",
+                    "--oss",
+                    "--local-provider",
+                    "ollama",
+                    "--color",
+                    "auto",
+                    "--cd",
+                    "/tmp/work",
+                    "--output-last-message",
+                    "/tmp/out.txt",
+                    "--dangerously-bypass-hook-trust",
+                    "--version",
+                    "hello",
+                ],
+                expect: "pass",
+            },
+            {
+                id: "codex-current-resume-help-surface",
+                description: "Current Codex resume advertises --all for disabling cwd filtering",
+                args: ["exec", "resume", "--all", "session-id", "hello"],
+                expect: "pass",
+            },
         ],
     },
     gemini: {
@@ -554,6 +633,38 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 arity: "one",
                 description: "Permission deny rule (Phase 4 slice θ; repeat once per rule per `grok --help`)",
             },
+            "--agent": { arity: "one", description: "Agent name or definition file path" },
+            "--agents": { arity: "one", description: "Inline subagent definitions JSON" },
+            "--best-of-n": {
+                arity: "one",
+                pattern: /^[1-9][0-9]*$/,
+                description: "Run the task N ways in parallel and pick the best",
+            },
+            "--check": { arity: "none", description: "Append a self-verification loop" },
+            "--disable-web-search": {
+                arity: "none",
+                description: "Disable web search and web fetch tools",
+            },
+            "--experimental-memory": { arity: "none", description: "Enable cross-session memory" },
+            "--no-alt-screen": { arity: "none", description: "Run inline without alt screen" },
+            "--no-memory": { arity: "none", description: "Disable cross-session memory" },
+            "--no-plan": { arity: "none", description: "Disable plan mode" },
+            "--no-subagents": { arity: "none", description: "Disable subagent spawning" },
+            "--oauth": { arity: "none", description: "Use OAuth during authentication" },
+            "--prompt-file": { arity: "one", description: "Single-turn prompt from a file" },
+            "--prompt-json": { arity: "one", description: "Single-turn prompt JSON blocks" },
+            "--restore-code": {
+                arity: "none",
+                description: "Check out the original session commit when resuming",
+            },
+            "--single": { arity: "one", description: "Single-turn prompt" },
+            "--todo-gate": { arity: "none", description: "Enable runtime turn-end TodoGate" },
+            "--verbatim": { arity: "none", description: "Send prompt exactly as given" },
+            "--version": { arity: "none", description: "Print version" },
+            "--worktree": {
+                arity: "optional",
+                description: "Start the session in a new git worktree, optionally named",
+            },
         },
         env: {},
         conformanceFixtures: [
@@ -617,6 +728,40 @@ export const UPSTREAM_CLI_CONTRACTS = {
                 args: ["-p", "hello", "--deny", "write", "--deny", "kill"],
                 expect: "pass",
             },
+            {
+                id: "grok-current-help-surface",
+                description: "Current Grok Build help advertises agent, prompt, memory, web, and worktree flags",
+                args: [
+                    "-p",
+                    "hello",
+                    "--agent",
+                    "reviewer",
+                    "--agents",
+                    "{}",
+                    "--best-of-n",
+                    "2",
+                    "--check",
+                    "--disable-web-search",
+                    "--experimental-memory",
+                    "--no-alt-screen",
+                    "--no-memory",
+                    "--no-plan",
+                    "--no-subagents",
+                    "--oauth",
+                    "--prompt-file",
+                    "/tmp/prompt.md",
+                    "--prompt-json",
+                    "[]",
+                    "--restore-code",
+                    "--single",
+                    "single prompt",
+                    "--todo-gate",
+                    "--verbatim",
+                    "--version",
+                    "--worktree",
+                ],
+                expect: "pass",
+            },
         ],
     },
     mistral: {
@@ -948,6 +1093,39 @@ function validateFlagValue(cli, arg, flag, value, index, violations) {
         });
     }
 }
+/**
+ * Best-effort, advisory-only extraction of long-form flags from raw --help text.
+ * Returns a sorted array of unique `--foo-bar` style flags discovered in the output.
+ *
+ * Heuristics:
+ * - Matches common option declaration lines emitted by clap, yargs, commander, custom TUIs, etc.
+ * - Lowercases for stable comparison against our contract keys.
+ * - Intentionally conservative: ignores obvious noise (URLs, prose in descriptions).
+ *
+ * This powers the bidirectional drift detector (extra flags the installed binary
+ * advertises that our contract does not yet allow). It is NEVER used for argv
+ * validation — only for the upstream scanner and `upstream_contracts` probe reports.
+ */
+export function extractDiscoveredFlags(helpText) {
+    const discovered = new Set();
+    // Long flags: --foo, --foo-bar, --foo_bar (some CLIs normalize _ to - in display).
+    // Only inspect option declaration lines so prose such as
+    // "(Claude Code: --allowedTools)" does not create false drift.
+    const longRe = /--([a-z0-9][a-z0-9_-]{1,}[a-z0-9]?)/g;
+    for (const line of helpText.split(/\r?\n/)) {
+        const trimmed = line.trimStart();
+        if (!trimmed.startsWith("-"))
+            continue;
+        const declaration = trimmed.split(/\s{2,}/, 1)[0] ?? "";
+        for (const match of declaration.matchAll(longRe)) {
+            const name = `--${match[1].toLowerCase().replace(/_/g, "-")}`;
+            if (name === "--help")
+                continue;
+            discovered.add(name);
+        }
+    }
+    return Array.from(discovered).sort();
+}
 export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
     const contract = UPSTREAM_CLI_CONTRACTS[cli];
     const outputs = [];
@@ -979,6 +1157,11 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
                 available: false,
                 checkedHelpCommands: contract.helpArgs,
                 missingFlags: [],
+                extraFlags: [],
+                discoveredFlags: [],
+                helpHash: undefined,
+                versionHint: undefined,
+                probedAt: new Date().toISOString(),
                 warnings: [result.error.message],
             };
         }
@@ -989,6 +1172,13 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
     }
     const helpText = outputs.join("\n");
     const missingFlags = Object.keys(contract.flags).filter(flag => !helpText.includes(flag));
+    const discoveredFlags = extractDiscoveredFlags(helpText);
+    const contractFlagSet = new Set(Object.keys(contract.flags));
+    const extraFlags = discoveredFlags.filter(f => !contractFlagSet.has(f));
+    // Cheap version hint: first line that looks like a version banner
+    const versionMatch = helpText.match(/^\s*(?:[A-Za-z][\w .-]+)?v?\d+\.\d+\S*/m);
+    const versionHint = versionMatch ? versionMatch[0].trim().slice(0, 80) : undefined;
+    const helpHash = createHash("sha256").update(helpText).digest("hex");
     return {
         cli,
         executable: contract.executable,
@@ -997,6 +1187,11 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
         available: true,
         checkedHelpCommands: contract.helpArgs,
         missingFlags,
+        extraFlags,
+        discoveredFlags,
+        helpHash,
+        versionHint,
+        probedAt: new Date().toISOString(),
         warnings,
     };
 }

package/dist/validation-tools.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { z } from "zod";
+import { z } from "zod/v3";
 import { getAvailableCliInfo } from "./model-registry.js";
 import { collectValidationJobResult, startJudgeSynthesis, startValidationRun, } from "./validation-orchestrator.js";
 const providerSchema = z.enum(["claude", "codex", "gemini", "grok", "mistral"]);