llm-cli-gateway 1.17.0 → 1.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +16 -19
- package/dist/cache-stats.d.ts +47 -0
- package/dist/cache-stats.js +85 -2
- package/dist/config.js +1 -1
- package/dist/doctor.d.ts +22 -1
- package/dist/doctor.js +35 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +123 -39
- package/dist/process-monitor.d.ts +1 -2
- package/dist/process-monitor.js +7 -7
- package/dist/prompt-parts.d.ts +1 -1
- package/dist/prompt-parts.js +1 -1
- package/dist/provider-login-guidance.js +5 -5
- package/dist/provider-status.js +0 -4
- package/dist/request-helpers.d.ts +28 -26
- package/dist/request-helpers.js +50 -43
- package/dist/session-manager.js +1 -1
- package/dist/stream-json-parser.js +30 -15
- package/dist/upstream-contracts.d.ts +24 -0
- package/dist/upstream-contracts.js +213 -18
- package/dist/validation-tools.js +1 -1
- package/package.json +11 -8
- package/setup/status.schema.json +31 -0
- package/socket.yml +8 -8
package/dist/request-helpers.js
CHANGED
|
@@ -6,7 +6,7 @@ import { existsSync, unlinkSync, writeFileSync } from "fs";
|
|
|
6
6
|
import { tmpdir } from "os";
|
|
7
7
|
import { join, isAbsolute } from "path";
|
|
8
8
|
import { randomUUID } from "crypto";
|
|
9
|
-
import { z } from "zod";
|
|
9
|
+
import { z } from "zod/v3";
|
|
10
10
|
/** Prefix for gateway-generated session IDs. Enforces provenance structurally. */
|
|
11
11
|
export const GATEWAY_SESSION_PREFIX = "gw-";
|
|
12
12
|
/**
|
|
@@ -262,57 +262,54 @@ export const GEMINI_APPROVAL_MODES = ["default", "auto_edit", "yolo", "plan"];
|
|
|
262
262
|
*/
|
|
263
263
|
export const CODEX_SANDBOX_MODES = ["read-only", "workspace-write", "danger-full-access"];
|
|
264
264
|
/**
|
|
265
|
-
* Codex approval modes
|
|
265
|
+
* Deprecated Codex approval modes. Current Codex no longer exposes an
|
|
266
|
+
* `--ask-for-approval` flag; the MCP input is temporarily retained so older
|
|
267
|
+
* callers do not fail schema validation, but it emits no CLI argv.
|
|
266
268
|
*/
|
|
267
269
|
export const CODEX_ASK_FOR_APPROVAL_MODES = ["untrusted", "on-request", "never"];
|
|
268
270
|
/**
|
|
269
|
-
* Resolve Codex
|
|
270
|
-
*
|
|
271
|
+
* Resolve current Codex sandbox args from the modern params + legacy
|
|
272
|
+
* `fullAuto` shorthand. Current Codex exposes `--sandbox`, but no longer
|
|
273
|
+
* exposes `--ask-for-approval` or `--full-auto`.
|
|
271
274
|
*
|
|
272
275
|
* Precedence:
|
|
273
|
-
* 1.
|
|
274
|
-
*
|
|
275
|
-
*
|
|
276
|
-
*
|
|
277
|
-
* and the explicit values win.
|
|
278
|
-
* 3. Else if `fullAuto: true`, expand to
|
|
279
|
-
* `--sandbox workspace-write --ask-for-approval never`.
|
|
276
|
+
* 1. Explicit `sandboxMode` emits `--sandbox <mode>`.
|
|
277
|
+
* 2. Else if `fullAuto: true`, expand to `--sandbox workspace-write`.
|
|
278
|
+
* 3. Deprecated `askForApproval` and `useLegacyFullAutoFlag` emit no argv
|
|
279
|
+
* and return warnings for callers to surface/log.
|
|
280
280
|
* 4. Else emit nothing.
|
|
281
281
|
*/
|
|
282
282
|
export function resolveCodexSandboxFlags(input) {
|
|
283
283
|
const { sandboxMode, askForApproval, fullAuto, useLegacyFullAutoFlag } = input;
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
const explicit = Boolean(sandboxMode || askForApproval);
|
|
289
|
-
if (explicit) {
|
|
290
|
-
const args = [];
|
|
291
|
-
if (sandboxMode)
|
|
292
|
-
args.push("--sandbox", sandboxMode);
|
|
293
|
-
if (askForApproval)
|
|
294
|
-
args.push("--ask-for-approval", askForApproval);
|
|
295
|
-
const warning = fullAuto
|
|
296
|
-
? "fullAuto was set alongside explicit sandboxMode/askForApproval; explicit values win. fullAuto is deprecated."
|
|
297
|
-
: undefined;
|
|
298
|
-
return { args, warning };
|
|
299
|
-
}
|
|
300
|
-
if (fullAuto) {
|
|
301
|
-
return {
|
|
302
|
-
args: ["--sandbox", "workspace-write", "--ask-for-approval", "never"],
|
|
303
|
-
};
|
|
284
|
+
const args = [];
|
|
285
|
+
const warnings = [];
|
|
286
|
+
if (useLegacyFullAutoFlag) {
|
|
287
|
+
warnings.push("useLegacyFullAutoFlag is deprecated and ignored because current Codex no longer accepts --full-auto.");
|
|
304
288
|
}
|
|
305
|
-
|
|
289
|
+
if (askForApproval) {
|
|
290
|
+
warnings.push("askForApproval is deprecated and ignored because current Codex no longer accepts --ask-for-approval.");
|
|
291
|
+
}
|
|
292
|
+
if (sandboxMode) {
|
|
293
|
+
args.push("--sandbox", sandboxMode);
|
|
294
|
+
if (fullAuto) {
|
|
295
|
+
warnings.push("fullAuto was set alongside explicit sandboxMode; sandboxMode wins. fullAuto is deprecated.");
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
else if (fullAuto) {
|
|
299
|
+
args.push("--sandbox", "workspace-write");
|
|
300
|
+
}
|
|
301
|
+
return { args, warning: warnings.length > 0 ? warnings.join(" ") : undefined };
|
|
306
302
|
}
|
|
307
303
|
/**
|
|
308
304
|
* Flags that `codex exec resume` rejects (the original session's policy is
|
|
309
305
|
* inherited). Callers must drop these when building resume argv.
|
|
310
306
|
*
|
|
311
|
-
* Verified against `codex exec resume --help` (codex-cli 0.
|
|
312
|
-
* `--
|
|
313
|
-
* `--
|
|
314
|
-
*
|
|
315
|
-
*
|
|
307
|
+
* Verified against `codex exec resume --help` (codex-cli 0.135.0):
|
|
308
|
+
* `--sandbox`, `--add-dir`, `-C`, `--cd`, `--profile`, and `--search` are rejected.
|
|
309
|
+
* Deprecated `--full-auto` / `--ask-for-approval` are kept here defensively so
|
|
310
|
+
* legacy pre-filtered segments are stripped instead of reaching spawn.
|
|
311
|
+
* `--output-schema` and `-c key=value` ARE accepted on resume and therefore are
|
|
312
|
+
* NOT in this filter (Phase 4 slice α restored the previously-silent drop of those two).
|
|
316
313
|
*/
|
|
317
314
|
export const CODEX_RESUME_FILTERED_FLAGS = new Set([
|
|
318
315
|
"--full-auto",
|
|
@@ -320,6 +317,8 @@ export const CODEX_RESUME_FILTERED_FLAGS = new Set([
|
|
|
320
317
|
"--ask-for-approval",
|
|
321
318
|
"--add-dir",
|
|
322
319
|
"-C",
|
|
320
|
+
"--cd",
|
|
321
|
+
"--profile",
|
|
323
322
|
"--search",
|
|
324
323
|
]);
|
|
325
324
|
/**
|
|
@@ -331,13 +330,15 @@ const CODEX_RESUME_FILTERED_FLAGS_WITH_VALUE = new Set([
|
|
|
331
330
|
"--ask-for-approval",
|
|
332
331
|
"--add-dir",
|
|
333
332
|
"-C",
|
|
333
|
+
"--cd",
|
|
334
|
+
"--profile",
|
|
334
335
|
]);
|
|
335
336
|
/**
|
|
336
337
|
* Strip resume-incompatible flag/value pairs from a Codex argv segment.
|
|
337
338
|
*
|
|
338
339
|
* Bare flags (`--full-auto`, `--search`) drop without consuming a value.
|
|
339
|
-
* Value-taking flags (`--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`,
|
|
340
|
-
* `--
|
|
340
|
+
* Value-taking flags (`--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`, `--cd`,
|
|
341
|
+
* `--profile`) drop together with their immediately-following value.
|
|
341
342
|
*/
|
|
342
343
|
export function filterCodexResumeFlags(args) {
|
|
343
344
|
const out = [];
|
|
@@ -371,7 +372,7 @@ export const CLAUDE_EFFORT_LEVELS = ["low", "medium", "high", "xhigh", "max"];
|
|
|
371
372
|
export const CLAUDE_HIGH_IMPACT_PARAMS_SCHEMA = z
|
|
372
373
|
.object({
|
|
373
374
|
agent: z.string().optional(),
|
|
374
|
-
agents: z.record(z.record(z.unknown())).optional(),
|
|
375
|
+
agents: z.record(z.string(), z.record(z.string(), z.unknown())).optional(),
|
|
375
376
|
forkSession: z.boolean().optional(),
|
|
376
377
|
systemPrompt: z.string().optional(),
|
|
377
378
|
appendSystemPrompt: z.string().optional(),
|
|
@@ -549,7 +550,7 @@ export function findMissingImagePath(images) {
|
|
|
549
550
|
* params before they reach `prepareCodexRequest`.
|
|
550
551
|
*/
|
|
551
552
|
export const CODEX_HIGH_IMPACT_PARAMS_SCHEMA = z.object({
|
|
552
|
-
outputSchema: z.union([z.string(), z.record(z.unknown())]).optional(),
|
|
553
|
+
outputSchema: z.union([z.string(), z.record(z.string(), z.unknown())]).optional(),
|
|
553
554
|
search: z.boolean().optional(),
|
|
554
555
|
profile: z.string().optional(),
|
|
555
556
|
configOverrides: CODEX_CONFIG_OVERRIDES_SCHEMA,
|
|
@@ -578,8 +579,9 @@ export function prepareCodexHighImpactFlags(input) {
|
|
|
578
579
|
args.push("--output-schema", schema.path);
|
|
579
580
|
cleanup = schema.cleanup;
|
|
580
581
|
}
|
|
582
|
+
const warnings = [];
|
|
581
583
|
if (input.search) {
|
|
582
|
-
|
|
584
|
+
warnings.push("search is deprecated and ignored because current Codex exec no longer accepts --search.");
|
|
583
585
|
}
|
|
584
586
|
if (input.profile) {
|
|
585
587
|
args.push("--profile", input.profile);
|
|
@@ -599,7 +601,12 @@ export function prepareCodexHighImpactFlags(input) {
|
|
|
599
601
|
if (input.ignoreRules) {
|
|
600
602
|
args.push("--ignore-rules");
|
|
601
603
|
}
|
|
602
|
-
return {
|
|
604
|
+
return {
|
|
605
|
+
args,
|
|
606
|
+
cleanup,
|
|
607
|
+
missingImagePath: null,
|
|
608
|
+
warning: warnings.length > 0 ? warnings.join(" ") : undefined,
|
|
609
|
+
};
|
|
603
610
|
}
|
|
604
611
|
export function prepareCodexForkRequest(input) {
|
|
605
612
|
const { prompt, sessionId, forkLast } = input;
|
package/dist/session-manager.js
CHANGED
|
@@ -76,7 +76,7 @@ export class FileSessionManager {
|
|
|
76
76
|
const data = readFileSync(this.storagePath, "utf-8");
|
|
77
77
|
this.storage = JSON.parse(data);
|
|
78
78
|
}
|
|
79
|
-
catch
|
|
79
|
+
catch {
|
|
80
80
|
// If file is corrupted, start fresh
|
|
81
81
|
this.storage = { sessions: {}, activeSession: createEmptyActiveSessions() };
|
|
82
82
|
}
|
|
@@ -4,6 +4,15 @@
|
|
|
4
4
|
* Each line of stdout is a complete JSON object. This parser extracts the
|
|
5
5
|
* final result text, cost, usage, and metadata from the stream.
|
|
6
6
|
*/
|
|
7
|
+
function stringOrNull(value) {
|
|
8
|
+
return typeof value === "string" ? value : null;
|
|
9
|
+
}
|
|
10
|
+
function numberOrNull(value) {
|
|
11
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
12
|
+
}
|
|
13
|
+
function numberOrZero(value) {
|
|
14
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
15
|
+
}
|
|
7
16
|
/**
|
|
8
17
|
* Parse completed NDJSON stdout from `claude --output-format stream-json --include-partial-messages`.
|
|
9
18
|
*
|
|
@@ -30,6 +39,9 @@ export function parseStreamJson(stdout) {
|
|
|
30
39
|
// Skip malformed lines
|
|
31
40
|
continue;
|
|
32
41
|
}
|
|
42
|
+
if (!parsed || typeof parsed !== "object") {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
33
45
|
if (parsed.type === "result") {
|
|
34
46
|
resultEvent = parsed;
|
|
35
47
|
}
|
|
@@ -44,21 +56,21 @@ export function parseStreamJson(stdout) {
|
|
|
44
56
|
if (resultEvent) {
|
|
45
57
|
const usage = resultEvent.usage
|
|
46
58
|
? {
|
|
47
|
-
inputTokens: resultEvent.usage.input_tokens
|
|
48
|
-
outputTokens: resultEvent.usage.output_tokens
|
|
49
|
-
cacheReadInputTokens: resultEvent.usage.cache_read_input_tokens
|
|
50
|
-
cacheCreationInputTokens: resultEvent.usage.cache_creation_input_tokens
|
|
59
|
+
inputTokens: numberOrZero(resultEvent.usage.input_tokens),
|
|
60
|
+
outputTokens: numberOrZero(resultEvent.usage.output_tokens),
|
|
61
|
+
cacheReadInputTokens: numberOrZero(resultEvent.usage.cache_read_input_tokens),
|
|
62
|
+
cacheCreationInputTokens: numberOrZero(resultEvent.usage.cache_creation_input_tokens),
|
|
51
63
|
}
|
|
52
64
|
: null;
|
|
53
65
|
return {
|
|
54
|
-
text: resultEvent.result
|
|
55
|
-
costUsd: resultEvent.total_cost_usd
|
|
66
|
+
text: typeof resultEvent.result === "string" ? resultEvent.result : "",
|
|
67
|
+
costUsd: numberOrNull(resultEvent.total_cost_usd),
|
|
56
68
|
usage,
|
|
57
|
-
sessionId: resultEvent.session_id ?? systemEvent?.session_id
|
|
58
|
-
model: systemEvent?.model ?? resultEvent.model
|
|
59
|
-
durationApiMs: resultEvent.duration_api_ms
|
|
69
|
+
sessionId: stringOrNull(resultEvent.session_id) ?? stringOrNull(systemEvent?.session_id),
|
|
70
|
+
model: stringOrNull(systemEvent?.model) ?? stringOrNull(resultEvent.model),
|
|
71
|
+
durationApiMs: numberOrNull(resultEvent.duration_api_ms),
|
|
60
72
|
isError: resultEvent.is_error === true,
|
|
61
|
-
numTurns: resultEvent.num_turns
|
|
73
|
+
numTurns: numberOrNull(resultEvent.num_turns),
|
|
62
74
|
};
|
|
63
75
|
}
|
|
64
76
|
// Fallback: extract text from assistant event
|
|
@@ -67,7 +79,10 @@ export function parseStreamJson(stdout) {
|
|
|
67
79
|
let text = "";
|
|
68
80
|
if (message?.content && Array.isArray(message.content)) {
|
|
69
81
|
text = message.content
|
|
70
|
-
.filter((block) => block
|
|
82
|
+
.filter((block) => block &&
|
|
83
|
+
typeof block === "object" &&
|
|
84
|
+
block.type === "text" &&
|
|
85
|
+
typeof block.text === "string")
|
|
71
86
|
.map((block) => block.text)
|
|
72
87
|
.join("");
|
|
73
88
|
}
|
|
@@ -75,8 +90,8 @@ export function parseStreamJson(stdout) {
|
|
|
75
90
|
text,
|
|
76
91
|
costUsd: null,
|
|
77
92
|
usage: null,
|
|
78
|
-
sessionId: systemEvent?.session_id
|
|
79
|
-
model: systemEvent?.model ?? message?.model
|
|
93
|
+
sessionId: stringOrNull(systemEvent?.session_id),
|
|
94
|
+
model: stringOrNull(systemEvent?.model) ?? stringOrNull(message?.model),
|
|
80
95
|
durationApiMs: null,
|
|
81
96
|
isError: false,
|
|
82
97
|
numTurns: null,
|
|
@@ -87,8 +102,8 @@ export function parseStreamJson(stdout) {
|
|
|
87
102
|
text: "",
|
|
88
103
|
costUsd: null,
|
|
89
104
|
usage: null,
|
|
90
|
-
sessionId: systemEvent?.session_id
|
|
91
|
-
model: systemEvent?.model
|
|
105
|
+
sessionId: stringOrNull(systemEvent?.session_id),
|
|
106
|
+
model: stringOrNull(systemEvent?.model),
|
|
92
107
|
durationApiMs: null,
|
|
93
108
|
isError: false,
|
|
94
109
|
numTurns: null,
|
|
@@ -93,6 +93,20 @@ export declare function validateUpstreamCliArgs(cli: CliType, args: readonly str
|
|
|
93
93
|
export declare function assertUpstreamCliArgs(cli: CliType, args: readonly string[]): void;
|
|
94
94
|
export declare function validateUpstreamCliEnv(cli: CliType, env: Record<string, string> | undefined): ContractValidationResult;
|
|
95
95
|
export declare function assertUpstreamCliEnv(cli: CliType, env: Record<string, string> | undefined): void;
|
|
96
|
+
/**
|
|
97
|
+
* Best-effort, advisory-only extraction of long-form flags from raw --help text.
|
|
98
|
+
* Returns a sorted array of unique `--foo-bar` style flags discovered in the output.
|
|
99
|
+
*
|
|
100
|
+
* Heuristics:
|
|
101
|
+
* - Matches common option declaration lines emitted by clap, yargs, commander, custom TUIs, etc.
|
|
102
|
+
* - Lowercases for stable comparison against our contract keys.
|
|
103
|
+
* - Intentionally conservative: ignores obvious noise (URLs, prose in descriptions).
|
|
104
|
+
*
|
|
105
|
+
* This powers the bidirectional drift detector (extra flags the installed binary
|
|
106
|
+
* advertises that our contract does not yet allow). It is NEVER used for argv
|
|
107
|
+
* validation — only for the upstream scanner and `upstream_contracts` probe reports.
|
|
108
|
+
*/
|
|
109
|
+
export declare function extractDiscoveredFlags(helpText: string): readonly string[];
|
|
96
110
|
export interface InstalledCliContractProbe {
|
|
97
111
|
cli: CliType;
|
|
98
112
|
executable: string;
|
|
@@ -101,6 +115,16 @@ export interface InstalledCliContractProbe {
|
|
|
101
115
|
available: boolean;
|
|
102
116
|
checkedHelpCommands: string[][];
|
|
103
117
|
missingFlags: string[];
|
|
118
|
+
/** Flags present in the installed binary's --help but absent from the declared contract. */
|
|
119
|
+
extraFlags: readonly string[];
|
|
120
|
+
/** Sorted list of long flags discovered in the help text (for snapshot diffing). */
|
|
121
|
+
discoveredFlags: readonly string[];
|
|
122
|
+
/** Stable hash of the concatenated help output (detects subtle text changes even if flag set is stable). */
|
|
123
|
+
helpHash?: string;
|
|
124
|
+
/** Best-effort version string scraped from the help/version output (if present). */
|
|
125
|
+
versionHint?: string;
|
|
126
|
+
/** ISO timestamp when this probe was performed. */
|
|
127
|
+
probedAt: string;
|
|
104
128
|
warnings: string[];
|
|
105
129
|
}
|
|
106
130
|
export declare function probeInstalledCliContract(cli: CliType, timeoutMs?: number): InstalledCliContractProbe;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
2
3
|
import { envWithExtendedPath, getExtendedPath, resolveCommandForSpawn } from "./executor.js";
|
|
3
4
|
const PERMISSION_MODES = [
|
|
4
5
|
"default",
|
|
@@ -255,12 +256,12 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
255
256
|
"workingDir",
|
|
256
257
|
"addDir",
|
|
257
258
|
],
|
|
258
|
-
resumeOnlyFlags: ["--last"],
|
|
259
|
+
resumeOnlyFlags: ["--last", "--all"],
|
|
259
260
|
// Phase 4 slice α (v1.8.0) verified that `codex exec resume` accepts
|
|
260
261
|
// `--output-schema` and `-c` (codex-cli 0.133.0 `exec resume --help`),
|
|
261
|
-
// so they're no longer forbidden.
|
|
262
|
-
//
|
|
263
|
-
resumeForbiddenFlags: ["--sandbox", "
|
|
262
|
+
// so they're no longer forbidden. Current resume help does not accept
|
|
263
|
+
// session-profile or working-directory policy flags.
|
|
264
|
+
resumeForbiddenFlags: ["--sandbox", "-C", "--cd", "--add-dir", "--profile"],
|
|
264
265
|
flags: {
|
|
265
266
|
"--last": { arity: "none", description: "Resume latest session" },
|
|
266
267
|
"--model": { arity: "one", description: "Model selector" },
|
|
@@ -269,12 +270,6 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
269
270
|
values: ["read-only", "workspace-write", "danger-full-access"],
|
|
270
271
|
description: "Sandbox policy",
|
|
271
272
|
},
|
|
272
|
-
"--ask-for-approval": {
|
|
273
|
-
arity: "one",
|
|
274
|
-
values: ["untrusted", "on-request", "never"],
|
|
275
|
-
description: "Approval policy",
|
|
276
|
-
},
|
|
277
|
-
"--full-auto": { arity: "none", description: "Legacy full-auto shortcut" },
|
|
278
273
|
"--dangerously-bypass-approvals-and-sandbox": {
|
|
279
274
|
arity: "none",
|
|
280
275
|
description: "Disable approvals and sandbox",
|
|
@@ -282,25 +277,62 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
282
277
|
"--json": { arity: "none", description: "JSONL event stream" },
|
|
283
278
|
"--skip-git-repo-check": { arity: "none", description: "Allow non-git cwd" },
|
|
284
279
|
"--output-schema": { arity: "one", description: "Structured output JSON schema path" },
|
|
285
|
-
"--search": { arity: "none", description: "Enable web search" },
|
|
286
280
|
"--profile": { arity: "one", description: "Config profile" },
|
|
287
281
|
"-c": {
|
|
288
282
|
arity: "one",
|
|
289
283
|
pattern: /^[a-zA-Z0-9._]+=([^\r\n]*)$/,
|
|
290
284
|
description: "Config override key=value",
|
|
291
285
|
},
|
|
286
|
+
"--config": {
|
|
287
|
+
arity: "one",
|
|
288
|
+
pattern: /^[a-zA-Z0-9._]+=([^\r\n]*)$/,
|
|
289
|
+
description: "Config override key=value",
|
|
290
|
+
},
|
|
291
|
+
"--enable": { arity: "one", description: "Enable a Codex feature flag" },
|
|
292
|
+
"--disable": { arity: "one", description: "Disable a Codex feature flag" },
|
|
293
|
+
"--strict-config": {
|
|
294
|
+
arity: "none",
|
|
295
|
+
description: "Reject unrecognized config.toml fields",
|
|
296
|
+
},
|
|
292
297
|
"--ephemeral": { arity: "none", description: "Do not persist session" },
|
|
293
298
|
"-i": { arity: "one", description: "Image path" },
|
|
299
|
+
"--image": { arity: "one", description: "Image path" },
|
|
294
300
|
"--ignore-user-config": { arity: "none", description: "Ignore user config" },
|
|
295
301
|
"--ignore-rules": { arity: "none", description: "Ignore rule files" },
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
302
|
+
"--oss": { arity: "none", description: "Use open-source provider" },
|
|
303
|
+
"--local-provider": {
|
|
304
|
+
arity: "one",
|
|
305
|
+
values: ["lmstudio", "ollama"],
|
|
306
|
+
description: "Local open-source provider",
|
|
307
|
+
},
|
|
308
|
+
"--color": {
|
|
309
|
+
arity: "one",
|
|
310
|
+
values: ["always", "never", "auto"],
|
|
311
|
+
description: "Output color mode",
|
|
312
|
+
},
|
|
313
|
+
"--output-last-message": {
|
|
314
|
+
arity: "one",
|
|
315
|
+
description: "Write the final agent message to a file",
|
|
316
|
+
},
|
|
317
|
+
"--dangerously-bypass-hook-trust": {
|
|
318
|
+
arity: "none",
|
|
319
|
+
description: "Run enabled hooks without persisted hook trust",
|
|
320
|
+
},
|
|
321
|
+
"--version": { arity: "none", description: "Print version" },
|
|
322
|
+
"--all": {
|
|
323
|
+
arity: "none",
|
|
324
|
+
description: "Resume picker: show all sessions without cwd filtering",
|
|
325
|
+
},
|
|
326
|
+
// The gateway emits the short form `-C`, and the advisory contract also
|
|
327
|
+
// tracks the long `--cd` alias advertised by current Codex exec help.
|
|
300
328
|
"-C": {
|
|
301
329
|
arity: "one",
|
|
302
330
|
description: "Working root for the session (Phase 4 slice ζ; new sessions only)",
|
|
303
331
|
},
|
|
332
|
+
"--cd": {
|
|
333
|
+
arity: "one",
|
|
334
|
+
description: "Working root for the session",
|
|
335
|
+
},
|
|
304
336
|
"--add-dir": {
|
|
305
337
|
arity: "one",
|
|
306
338
|
description: "Additional writable workspace directory (Phase 4 slice ζ; repeat once per directory; new sessions only)",
|
|
@@ -320,6 +352,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
320
352
|
args: ["exec", "--sandbox", "workspace", "hello"],
|
|
321
353
|
expect: "fail",
|
|
322
354
|
},
|
|
355
|
+
{
|
|
356
|
+
id: "codex-ask-for-approval-unsupported",
|
|
357
|
+
description: "Current Codex CLI no longer accepts --ask-for-approval",
|
|
358
|
+
args: ["exec", "--ask-for-approval", "never", "hello"],
|
|
359
|
+
expect: "fail",
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
id: "codex-full-auto-unsupported",
|
|
363
|
+
description: "Current Codex CLI no longer accepts --full-auto",
|
|
364
|
+
args: ["exec", "--full-auto", "hello"],
|
|
365
|
+
expect: "fail",
|
|
366
|
+
},
|
|
323
367
|
{
|
|
324
368
|
// Phase 4 slice α: --output-schema IS accepted on resume per
|
|
325
369
|
// codex-cli 0.133.0; this fixture pins the new behaviour so future
|
|
@@ -336,9 +380,9 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
336
380
|
expect: "pass",
|
|
337
381
|
},
|
|
338
382
|
{
|
|
339
|
-
id: "codex-
|
|
340
|
-
description: "
|
|
341
|
-
args: ["exec", "
|
|
383
|
+
id: "codex-search-unsupported",
|
|
384
|
+
description: "Current Codex exec no longer accepts --search",
|
|
385
|
+
args: ["exec", "--search", "hello"],
|
|
342
386
|
expect: "fail",
|
|
343
387
|
},
|
|
344
388
|
{
|
|
@@ -361,6 +405,41 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
361
405
|
],
|
|
362
406
|
expect: "pass",
|
|
363
407
|
},
|
|
408
|
+
{
|
|
409
|
+
id: "codex-current-exec-help-surface",
|
|
410
|
+
description: "Current Codex exec advertises additional config, output, provider, and safety flags",
|
|
411
|
+
args: [
|
|
412
|
+
"exec",
|
|
413
|
+
"--config",
|
|
414
|
+
"features.foo=true",
|
|
415
|
+
"--enable",
|
|
416
|
+
"foo",
|
|
417
|
+
"--disable",
|
|
418
|
+
"bar",
|
|
419
|
+
"--strict-config",
|
|
420
|
+
"--image",
|
|
421
|
+
"/tmp/a.png",
|
|
422
|
+
"--oss",
|
|
423
|
+
"--local-provider",
|
|
424
|
+
"ollama",
|
|
425
|
+
"--color",
|
|
426
|
+
"auto",
|
|
427
|
+
"--cd",
|
|
428
|
+
"/tmp/work",
|
|
429
|
+
"--output-last-message",
|
|
430
|
+
"/tmp/out.txt",
|
|
431
|
+
"--dangerously-bypass-hook-trust",
|
|
432
|
+
"--version",
|
|
433
|
+
"hello",
|
|
434
|
+
],
|
|
435
|
+
expect: "pass",
|
|
436
|
+
},
|
|
437
|
+
{
|
|
438
|
+
id: "codex-current-resume-help-surface",
|
|
439
|
+
description: "Current Codex resume advertises --all for disabling cwd filtering",
|
|
440
|
+
args: ["exec", "resume", "--all", "session-id", "hello"],
|
|
441
|
+
expect: "pass",
|
|
442
|
+
},
|
|
364
443
|
],
|
|
365
444
|
},
|
|
366
445
|
gemini: {
|
|
@@ -554,6 +633,38 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
554
633
|
arity: "one",
|
|
555
634
|
description: "Permission deny rule (Phase 4 slice θ; repeat once per rule per `grok --help`)",
|
|
556
635
|
},
|
|
636
|
+
"--agent": { arity: "one", description: "Agent name or definition file path" },
|
|
637
|
+
"--agents": { arity: "one", description: "Inline subagent definitions JSON" },
|
|
638
|
+
"--best-of-n": {
|
|
639
|
+
arity: "one",
|
|
640
|
+
pattern: /^[1-9][0-9]*$/,
|
|
641
|
+
description: "Run the task N ways in parallel and pick the best",
|
|
642
|
+
},
|
|
643
|
+
"--check": { arity: "none", description: "Append a self-verification loop" },
|
|
644
|
+
"--disable-web-search": {
|
|
645
|
+
arity: "none",
|
|
646
|
+
description: "Disable web search and web fetch tools",
|
|
647
|
+
},
|
|
648
|
+
"--experimental-memory": { arity: "none", description: "Enable cross-session memory" },
|
|
649
|
+
"--no-alt-screen": { arity: "none", description: "Run inline without alt screen" },
|
|
650
|
+
"--no-memory": { arity: "none", description: "Disable cross-session memory" },
|
|
651
|
+
"--no-plan": { arity: "none", description: "Disable plan mode" },
|
|
652
|
+
"--no-subagents": { arity: "none", description: "Disable subagent spawning" },
|
|
653
|
+
"--oauth": { arity: "none", description: "Use OAuth during authentication" },
|
|
654
|
+
"--prompt-file": { arity: "one", description: "Single-turn prompt from a file" },
|
|
655
|
+
"--prompt-json": { arity: "one", description: "Single-turn prompt JSON blocks" },
|
|
656
|
+
"--restore-code": {
|
|
657
|
+
arity: "none",
|
|
658
|
+
description: "Check out the original session commit when resuming",
|
|
659
|
+
},
|
|
660
|
+
"--single": { arity: "one", description: "Single-turn prompt" },
|
|
661
|
+
"--todo-gate": { arity: "none", description: "Enable runtime turn-end TodoGate" },
|
|
662
|
+
"--verbatim": { arity: "none", description: "Send prompt exactly as given" },
|
|
663
|
+
"--version": { arity: "none", description: "Print version" },
|
|
664
|
+
"--worktree": {
|
|
665
|
+
arity: "optional",
|
|
666
|
+
description: "Start the session in a new git worktree, optionally named",
|
|
667
|
+
},
|
|
557
668
|
},
|
|
558
669
|
env: {},
|
|
559
670
|
conformanceFixtures: [
|
|
@@ -617,6 +728,40 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
617
728
|
args: ["-p", "hello", "--deny", "write", "--deny", "kill"],
|
|
618
729
|
expect: "pass",
|
|
619
730
|
},
|
|
731
|
+
{
|
|
732
|
+
id: "grok-current-help-surface",
|
|
733
|
+
description: "Current Grok Build help advertises agent, prompt, memory, web, and worktree flags",
|
|
734
|
+
args: [
|
|
735
|
+
"-p",
|
|
736
|
+
"hello",
|
|
737
|
+
"--agent",
|
|
738
|
+
"reviewer",
|
|
739
|
+
"--agents",
|
|
740
|
+
"{}",
|
|
741
|
+
"--best-of-n",
|
|
742
|
+
"2",
|
|
743
|
+
"--check",
|
|
744
|
+
"--disable-web-search",
|
|
745
|
+
"--experimental-memory",
|
|
746
|
+
"--no-alt-screen",
|
|
747
|
+
"--no-memory",
|
|
748
|
+
"--no-plan",
|
|
749
|
+
"--no-subagents",
|
|
750
|
+
"--oauth",
|
|
751
|
+
"--prompt-file",
|
|
752
|
+
"/tmp/prompt.md",
|
|
753
|
+
"--prompt-json",
|
|
754
|
+
"[]",
|
|
755
|
+
"--restore-code",
|
|
756
|
+
"--single",
|
|
757
|
+
"single prompt",
|
|
758
|
+
"--todo-gate",
|
|
759
|
+
"--verbatim",
|
|
760
|
+
"--version",
|
|
761
|
+
"--worktree",
|
|
762
|
+
],
|
|
763
|
+
expect: "pass",
|
|
764
|
+
},
|
|
620
765
|
],
|
|
621
766
|
},
|
|
622
767
|
mistral: {
|
|
@@ -948,6 +1093,39 @@ function validateFlagValue(cli, arg, flag, value, index, violations) {
|
|
|
948
1093
|
});
|
|
949
1094
|
}
|
|
950
1095
|
}
|
|
1096
|
+
/**
|
|
1097
|
+
* Best-effort, advisory-only extraction of long-form flags from raw --help text.
|
|
1098
|
+
* Returns a sorted array of unique `--foo-bar` style flags discovered in the output.
|
|
1099
|
+
*
|
|
1100
|
+
* Heuristics:
|
|
1101
|
+
* - Matches common option declaration lines emitted by clap, yargs, commander, custom TUIs, etc.
|
|
1102
|
+
* - Lowercases for stable comparison against our contract keys.
|
|
1103
|
+
* - Intentionally conservative: ignores obvious noise (URLs, prose in descriptions).
|
|
1104
|
+
*
|
|
1105
|
+
* This powers the bidirectional drift detector (extra flags the installed binary
|
|
1106
|
+
* advertises that our contract does not yet allow). It is NEVER used for argv
|
|
1107
|
+
* validation — only for the upstream scanner and `upstream_contracts` probe reports.
|
|
1108
|
+
*/
|
|
1109
|
+
export function extractDiscoveredFlags(helpText) {
|
|
1110
|
+
const discovered = new Set();
|
|
1111
|
+
// Long flags: --foo, --foo-bar, --foo_bar (some CLIs normalize _ to - in display).
|
|
1112
|
+
// Only inspect option declaration lines so prose such as
|
|
1113
|
+
// "(Claude Code: --allowedTools)" does not create false drift.
|
|
1114
|
+
const longRe = /--([a-z0-9][a-z0-9_-]{1,}[a-z0-9]?)/g;
|
|
1115
|
+
for (const line of helpText.split(/\r?\n/)) {
|
|
1116
|
+
const trimmed = line.trimStart();
|
|
1117
|
+
if (!trimmed.startsWith("-"))
|
|
1118
|
+
continue;
|
|
1119
|
+
const declaration = trimmed.split(/\s{2,}/, 1)[0] ?? "";
|
|
1120
|
+
for (const match of declaration.matchAll(longRe)) {
|
|
1121
|
+
const name = `--${match[1].toLowerCase().replace(/_/g, "-")}`;
|
|
1122
|
+
if (name === "--help")
|
|
1123
|
+
continue;
|
|
1124
|
+
discovered.add(name);
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
return Array.from(discovered).sort();
|
|
1128
|
+
}
|
|
951
1129
|
export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
|
|
952
1130
|
const contract = UPSTREAM_CLI_CONTRACTS[cli];
|
|
953
1131
|
const outputs = [];
|
|
@@ -979,6 +1157,11 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
|
|
|
979
1157
|
available: false,
|
|
980
1158
|
checkedHelpCommands: contract.helpArgs,
|
|
981
1159
|
missingFlags: [],
|
|
1160
|
+
extraFlags: [],
|
|
1161
|
+
discoveredFlags: [],
|
|
1162
|
+
helpHash: undefined,
|
|
1163
|
+
versionHint: undefined,
|
|
1164
|
+
probedAt: new Date().toISOString(),
|
|
982
1165
|
warnings: [result.error.message],
|
|
983
1166
|
};
|
|
984
1167
|
}
|
|
@@ -989,6 +1172,13 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
|
|
|
989
1172
|
}
|
|
990
1173
|
const helpText = outputs.join("\n");
|
|
991
1174
|
const missingFlags = Object.keys(contract.flags).filter(flag => !helpText.includes(flag));
|
|
1175
|
+
const discoveredFlags = extractDiscoveredFlags(helpText);
|
|
1176
|
+
const contractFlagSet = new Set(Object.keys(contract.flags));
|
|
1177
|
+
const extraFlags = discoveredFlags.filter(f => !contractFlagSet.has(f));
|
|
1178
|
+
// Cheap version hint: first line that looks like a version banner
|
|
1179
|
+
const versionMatch = helpText.match(/^\s*(?:[A-Za-z][\w .-]+)?v?\d+\.\d+\S*/m);
|
|
1180
|
+
const versionHint = versionMatch ? versionMatch[0].trim().slice(0, 80) : undefined;
|
|
1181
|
+
const helpHash = createHash("sha256").update(helpText).digest("hex");
|
|
992
1182
|
return {
|
|
993
1183
|
cli,
|
|
994
1184
|
executable: contract.executable,
|
|
@@ -997,6 +1187,11 @@ export function probeInstalledCliContract(cli, timeoutMs = 5_000) {
|
|
|
997
1187
|
available: true,
|
|
998
1188
|
checkedHelpCommands: contract.helpArgs,
|
|
999
1189
|
missingFlags,
|
|
1190
|
+
extraFlags,
|
|
1191
|
+
discoveredFlags,
|
|
1192
|
+
helpHash,
|
|
1193
|
+
versionHint,
|
|
1194
|
+
probedAt: new Date().toISOString(),
|
|
1000
1195
|
warnings,
|
|
1001
1196
|
};
|
|
1002
1197
|
}
|
package/dist/validation-tools.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
1
|
+
import { z } from "zod/v3";
|
|
2
2
|
import { getAvailableCliInfo } from "./model-registry.js";
|
|
3
3
|
import { collectValidationJobResult, startJudgeSynthesis, startValidationRun, } from "./validation-orchestrator.js";
|
|
4
4
|
const providerSchema = z.enum(["claude", "codex", "gemini", "grok", "mistral"]);
|