llm-cli-gateway 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +170 -0
- package/dist/index.d.ts +94 -0
- package/dist/index.js +139 -35
- package/dist/mistral-meta-json-parser.d.ts +6 -0
- package/dist/mistral-meta-json-parser.js +175 -0
- package/dist/request-helpers.d.ts +25 -5
- package/dist/request-helpers.js +14 -5
- package/dist/upstream-contracts.js +94 -9
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,176 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the llm-cli-gateway project.
|
|
4
4
|
|
|
5
|
+
## [1.9.0] - 2026-05-27 — Phase 4 slice δ (budget/max-turns parity) + retroactive α/γ contract closure
|
|
6
|
+
|
|
7
|
+
Ships the fourth Phase 4 slice (budget/max-turns parity for Grok and Mistral),
|
|
8
|
+
and retroactively closes three latent contract gaps that shipped silently in
|
|
9
|
+
v1.8.0 (slices α and γ). Five commits land together: the slice δ feature,
|
|
10
|
+
two bounds-tightening fixes, a contract-table closure, and a test-veracity
|
|
11
|
+
hardening pass driven by an iterative multi-LLM audit.
|
|
12
|
+
|
|
13
|
+
### Added — `maxTurns` / `maxPrice` budget caps (slice δ)
|
|
14
|
+
|
|
15
|
+
- `grok_request` and `grok_request_async` gain optional `maxTurns?: number`
|
|
16
|
+
→ emits `grok --max-turns N`. Grok exposes no per-request budget flag,
|
|
17
|
+
so `--max-price` is Mistral-only.
|
|
18
|
+
- `mistral_request` and `mistral_request_async` gain optional
|
|
19
|
+
`maxTurns?: number` → `vibe --max-turns N` AND `maxPrice?: number` →
|
|
20
|
+
`vibe --max-price DOLLARS`. Both apply only in programmatic mode (`-p`),
|
|
21
|
+
matching Vibe's documented constraint.
|
|
22
|
+
- The Mistral stale-model recovery retry path (extracted into a pure
|
|
23
|
+
`buildMistralRetryPrep` helper) preserves all three slice-γ/δ flags
|
|
24
|
+
(`trust`, `maxTurns`, `maxPrice`) on the second attempt.
|
|
25
|
+
- Defaults: undefined for all three new fields → no flag emitted →
|
|
26
|
+
existing callers see no behavioural change.
|
|
27
|
+
|
|
28
|
+
### Fixed — Bounded numeric schemas for lossless argv stringification
|
|
29
|
+
|
|
30
|
+
- Extracted two shared, exported Zod constants:
|
|
31
|
+
- `MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000)`
|
|
32
|
+
- `MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000)`
|
|
33
|
+
- The lower `.min(1e-6)` cap on price is exactly the boundary where
|
|
34
|
+
`String(N)` switches from decimal to scientific notation
|
|
35
|
+
(`String(1e-6) === "0.000001"` but `String(1e-7) === "1e-7"`); both
|
|
36
|
+
upstream CLIs reject scientific-notation values.
|
|
37
|
+
- Reused across all four slice-δ tool registrations so bounds stay
|
|
38
|
+
consistent if they ever need to change.
|
|
39
|
+
|
|
40
|
+
### Fixed — Upstream contract table closes 5 latent flag gaps
|
|
41
|
+
|
|
42
|
+
`assertUpstreamCliArgs` consults `UPSTREAM_CLI_CONTRACTS` on every real
|
|
43
|
+
`*_request` call. The following flags / mcpParameters were never registered
|
|
44
|
+
there before this release, so production calls setting any of them threw
|
|
45
|
+
"Upstream contract violation" at runtime even though the prepare-function
|
|
46
|
+
unit tests passed:
|
|
47
|
+
|
|
48
|
+
- **Gemini** (slice γ retroactive): `skipTrust` + `--skip-trust`.
|
|
49
|
+
- **Mistral** (slice γ + δ retroactive): `trust` + `--trust`; `maxTurns` +
|
|
50
|
+
`--max-turns`; `maxPrice` + `--max-price` (with a strict decimal-only
|
|
51
|
+
regex matching `MAX_PRICE_SCHEMA`'s lower bound).
|
|
52
|
+
- **Grok** (slice δ): `maxTurns` + `--max-turns`.
|
|
53
|
+
- **Codex** (slice α retroactive): `--output-schema` and `-c` removed
|
|
54
|
+
from `resumeForbiddenFlags` — verified accepted on `codex exec resume`
|
|
55
|
+
per codex-cli 0.133.0.
|
|
56
|
+
|
|
57
|
+
Conformance fixtures pin each new flag's argv shape, including a
|
|
58
|
+
`mistral-max-price-scientific-notation` fixture that locks the `1e-7`
|
|
59
|
+
rejection at the contract layer.
|
|
60
|
+
|
|
61
|
+
### Hardened — Test veracity (multi-LLM audit follow-up)
|
|
62
|
+
|
|
63
|
+
Codex + Grok ran iterative test-veracity audits with mutation probes per
|
|
64
|
+
`docs/plans/test-veracity-audit.spec.md`. They proved several added tests
|
|
65
|
+
were not falsifiable on the dimensions their commit messages claimed.
|
|
66
|
+
New file `src/__tests__/test-veracity-regressions.test.ts` closes those
|
|
67
|
+
gaps with six describe blocks:
|
|
68
|
+
|
|
69
|
+
- **REGRESSIONS A** — probes registered tool `inputSchema` bounds
|
|
70
|
+
directly (not the bare schema constants), so schema-drift in any of
|
|
71
|
+
the four sync/async registrations is caught.
|
|
72
|
+
- **REGRESSIONS B** — tests the pure `buildMistralRetryPrep` helper
|
|
73
|
+
across all combinations of `trust × maxTurns × maxPrice`. Self-
|
|
74
|
+
validated: dropping any of the three forwards on retry goes red.
|
|
75
|
+
- **REGRESSIONS C** — positive allowlist asserting slice α/γ/δ
|
|
76
|
+
parameters live in the matching contract's `mcpParameters` (closes
|
|
77
|
+
the self-oracle gap where removing a param from BOTH the contract
|
|
78
|
+
AND the schema previously stayed green).
|
|
79
|
+
- **REGRESSIONS D** — threads `prepare*Request` output into
|
|
80
|
+
`validateUpstreamCliArgs` end-to-end; the exact consistency check
|
|
81
|
+
the latent v1.8.0 contract breaks would have failed.
|
|
82
|
+
- **REGRESSIONS E** — `it.each` over sync AND async variants of every
|
|
83
|
+
slice-touched tool; the existing C4 was sync-only.
|
|
84
|
+
- **REGRESSIONS F** — flag-fixture coverage map: every flag in each
|
|
85
|
+
contract `flags` table must be exercised by a passing fixture (with
|
|
86
|
+
a grandfathered pre-audit baseline). Forces future slice authors to
|
|
87
|
+
add a fixture alongside any new flag entry.
|
|
88
|
+
|
|
89
|
+
The existing C4 (`MCP request schemas expose the provider contract
|
|
90
|
+
parameters`) now walks `_async` tools too.
|
|
91
|
+
|
|
92
|
+
### Notes
|
|
93
|
+
|
|
94
|
+
Multi-LLM review across multiple iterative rounds, ending with a
|
|
95
|
+
dedicated test-veracity audit per Werner's strict-evidence protocol
|
|
96
|
+
(documented in `docs/plans/test-veracity-audit.spec.md`). Round 2 of the
|
|
97
|
+
audit landed UNCONDITIONAL APPROVE from Codex, Grok, Claude, and Mistral
|
|
98
|
+
with full mutation-probe evidence — every documented counterexample
|
|
99
|
+
mutation went red as predicted; tests are falsifiable by exactly the
|
|
100
|
+
regressions they claim to guard against. Gemini was quota-exhausted
|
|
101
|
+
during the audit window (~6h reset) and did not participate in round 2.
|
|
102
|
+
|
|
103
|
+
## [1.8.0] - 2026-05-27 — Phase 4 openers (codex resume fix, mistral telemetry, headless trust flags)
|
|
104
|
+
|
|
105
|
+
Ships the first three slices of the Phase 4 provider-modernisation
|
|
106
|
+
backlog, one bug fix and two small features. Multi-LLM review surfaced
|
|
107
|
+
five additional bug classes during the cycle (path traversal, UUID→dir
|
|
108
|
+
resolution gap, sync usage ctx drop, retry-path flag drop, symlink
|
|
109
|
+
boundary bypass); all are addressed in the two follow-up fix commits.
|
|
110
|
+
|
|
111
|
+
### Fixed — Codex `--output-schema` + `-c/--config` on `exec resume`
|
|
112
|
+
|
|
113
|
+
- `prepareCodexRequest` previously dropped `outputSchema` and
|
|
114
|
+
`configOverrides` on the resume branch because the U26 audit assumed
|
|
115
|
+
`codex exec resume` rejected both flags. Live re-verification against
|
|
116
|
+
`codex exec resume --help` (codex-cli 0.133.0) confirms both ARE
|
|
117
|
+
accepted on resume; only `--search` remains resume-incompatible. The
|
|
118
|
+
resume branch now threads both fields through, reusing the existing
|
|
119
|
+
outputSchema temp-file materialisation + cleanup contract.
|
|
120
|
+
`CODEX_RESUME_FILTERED_FLAGS` no longer strips `--output-schema`.
|
|
121
|
+
|
|
122
|
+
### Added — Mistral Vibe `meta.json` usage / cost telemetry
|
|
123
|
+
|
|
124
|
+
- New `src/mistral-meta-json-parser.ts` reads
|
|
125
|
+
`~/.vibe/logs/session/session_<YYYYMMDD>_<HHMMSS>_<first8hex>/meta.json`
|
|
126
|
+
(the actual filename — an earlier TODO at `src/index.ts:750` said
|
|
127
|
+
`metadata.json`, which was incorrect). Maps `stats.session_prompt_tokens`,
|
|
128
|
+
`stats.session_completion_tokens`, and `stats.session_cost` onto the
|
|
129
|
+
gateway's `inputTokens`/`outputTokens`/`costUsd` flight-recorder
|
|
130
|
+
columns. Cache-token surfaces stay undefined — Vibe doesn't expose
|
|
131
|
+
them today.
|
|
132
|
+
- The gateway's mistral sessionId surface accepts the full UUID (to match
|
|
133
|
+
`vibe --resume <uuid>`), but Vibe persists telemetry under
|
|
134
|
+
`session_<ts>_<first8>` directories. The new resolver globs by the
|
|
135
|
+
leading 8-hex prefix and verifies each candidate's `session_id` field
|
|
136
|
+
before returning — required for every UUID input including
|
|
137
|
+
single-match cases, so two UUIDs sharing the leading 8 hex chars never
|
|
138
|
+
cross-attribute usage.
|
|
139
|
+
- `extractUsageAndCost` and `buildAsyncFlightRecorderHandoff` thread a
|
|
140
|
+
primitives-only `{ sessionId, home }` context so the AsyncJobRecord
|
|
141
|
+
retention stays O(constant). `buildCliResponse` passes the same ctx so
|
|
142
|
+
sync `mistral_request` resume calls populate structured usage in their
|
|
143
|
+
response (not just the flight-recorder row).
|
|
144
|
+
|
|
145
|
+
### Added — Headless trust-prompt bypass for Gemini + Mistral
|
|
146
|
+
|
|
147
|
+
- New optional `skipTrust?: boolean` field on `gemini_request` and
|
|
148
|
+
`gemini_request_async`, defaulting `false`. When set, emits
|
|
149
|
+
`--skip-trust` so fresh workspaces don't block headless invocations on
|
|
150
|
+
Gemini's interactive trust prompt.
|
|
151
|
+
- New optional `trust?: boolean` field on `mistral_request` and
|
|
152
|
+
`mistral_request_async`, defaulting `false`. When set, emits `--trust`
|
|
153
|
+
(per-invocation only, not persisted to `trusted_folders.toml`) so
|
|
154
|
+
fresh workspaces don't block headless Vibe runs. Preserved on the
|
|
155
|
+
stale-model recovery retry path so a fresh untrusted workspace can't
|
|
156
|
+
deadlock on the second attempt.
|
|
157
|
+
- Default `false` preserves existing prompt behaviour for legacy
|
|
158
|
+
callers.
|
|
159
|
+
|
|
160
|
+
### Security
|
|
161
|
+
|
|
162
|
+
- `parseVibeMetaJson` enforces a strict input charset (UUID-shape OR
|
|
163
|
+
`^session_\d{8}_\d{6}_[0-9a-f]{8}$` Vibe dir basename) before any
|
|
164
|
+
filesystem access.
|
|
165
|
+
- New `readInBase(realBase, candidate)` helper realpath-resolves both
|
|
166
|
+
ends and rejects targets whose final inode lives outside the session
|
|
167
|
+
log root. Both the resolver's disambiguation reads and the final
|
|
168
|
+
parser read route through it, so an in-tree symlink to an
|
|
169
|
+
out-of-tree directory (or symlinked meta.json) cannot leak file
|
|
170
|
+
contents outside `~/.vibe/logs/session/`.
|
|
171
|
+
- Test coverage: traversal inputs (`../`, absolute, control-char,
|
|
172
|
+
embedded `../`), single-candidate prefix-collision rejection,
|
|
173
|
+
symlink-to-outside-baseDir rejection.
|
|
174
|
+
|
|
5
175
|
## [1.7.0] - 2026-05-26 — cache-awareness slice 1.5 (async-path flight recorder + codex parser fix)
|
|
6
176
|
|
|
7
177
|
Closes the two telemetry gaps that v1.6.0 explicitly deferred: async-path
|
package/dist/index.d.ts
CHANGED
|
@@ -54,6 +54,19 @@ declare const logger: {
|
|
|
54
54
|
debug: (message: string, ...args: any[]) => void;
|
|
55
55
|
};
|
|
56
56
|
type GatewayLogger = typeof logger;
|
|
57
|
+
/**
|
|
58
|
+
* Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
|
|
59
|
+
*
|
|
60
|
+
* Both flags reach the upstream CLIs as decimal-formatted argv strings via
|
|
61
|
+
* `String(N)`. `z.number().int().positive()` alone lets values past
|
|
62
|
+
* `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
|
|
63
|
+
* scientific notation that Grok and Vibe both reject. The bounds below
|
|
64
|
+
* (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
|
|
65
|
+
* for price) guarantee a lossless decimal stringification AND a sane
|
|
66
|
+
* upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
|
|
67
|
+
*/
|
|
68
|
+
export declare const MAX_TURNS_SCHEMA: z.ZodNumber;
|
|
69
|
+
export declare const MAX_PRICE_SCHEMA: z.ZodNumber;
|
|
57
70
|
export declare const SESSION_PROVIDER_VALUES: readonly ["claude", "codex", "gemini", "grok", "mistral"];
|
|
58
71
|
export declare const SESSION_PROVIDER_ENUM: z.ZodEnum<["claude", "codex", "gemini", "grok", "mistral"]>;
|
|
59
72
|
export type SessionProvider = (typeof SESSION_PROVIDER_VALUES)[number];
|
|
@@ -81,6 +94,23 @@ interface GatewayServerRuntime {
|
|
|
81
94
|
persistence: PersistenceConfig;
|
|
82
95
|
cacheAwareness: CacheAwarenessConfig;
|
|
83
96
|
}
|
|
97
|
+
export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
|
|
98
|
+
/**
|
|
99
|
+
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
100
|
+
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
101
|
+
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
102
|
+
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
103
|
+
*/
|
|
104
|
+
ctx?: {
|
|
105
|
+
sessionId?: string;
|
|
106
|
+
home?: string;
|
|
107
|
+
}): {
|
|
108
|
+
inputTokens?: number;
|
|
109
|
+
outputTokens?: number;
|
|
110
|
+
cacheReadTokens?: number;
|
|
111
|
+
cacheCreationTokens?: number;
|
|
112
|
+
costUsd?: number;
|
|
113
|
+
};
|
|
84
114
|
interface CliRequestPrep {
|
|
85
115
|
corrId: string;
|
|
86
116
|
effectivePrompt: string;
|
|
@@ -191,6 +221,35 @@ export declare function prepareGeminiRequest(params: {
|
|
|
191
221
|
policyFiles?: string[];
|
|
192
222
|
adminPolicyFiles?: string[];
|
|
193
223
|
attachments?: string[];
|
|
224
|
+
/**
|
|
225
|
+
* Phase 4 slice γ: emit `--skip-trust` so first-run workspaces don't
|
|
226
|
+
* block headless invocations on the interactive trust prompt. Default
|
|
227
|
+
* is undefined (preserves current prompt behaviour for legacy callers).
|
|
228
|
+
*/
|
|
229
|
+
skipTrust?: boolean;
|
|
230
|
+
}, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
|
|
231
|
+
export declare function prepareGrokRequest(params: {
|
|
232
|
+
prompt?: string;
|
|
233
|
+
promptParts?: PromptParts;
|
|
234
|
+
model?: string;
|
|
235
|
+
outputFormat?: string;
|
|
236
|
+
alwaysApprove?: boolean;
|
|
237
|
+
permissionMode?: string;
|
|
238
|
+
effort?: string;
|
|
239
|
+
reasoningEffort?: string;
|
|
240
|
+
allowedTools?: string[];
|
|
241
|
+
disallowedTools?: string[];
|
|
242
|
+
approvalStrategy: "legacy" | "mcp_managed";
|
|
243
|
+
approvalPolicy?: string;
|
|
244
|
+
mcpServers?: ClaudeMcpServerName[];
|
|
245
|
+
correlationId?: string;
|
|
246
|
+
optimizePrompt: boolean;
|
|
247
|
+
operation: string;
|
|
248
|
+
/**
|
|
249
|
+
* Phase 4 slice δ: emit `--max-turns N` so callers can cap agent-loop
|
|
250
|
+
* iterations for cost / latency control. Mirrors Claude's wiring.
|
|
251
|
+
*/
|
|
252
|
+
maxTurns?: number;
|
|
194
253
|
}, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
|
|
195
254
|
export declare function prepareMistralRequest(params: {
|
|
196
255
|
prompt?: string;
|
|
@@ -208,9 +267,34 @@ export declare function prepareMistralRequest(params: {
|
|
|
208
267
|
correlationId?: string;
|
|
209
268
|
optimizePrompt: boolean;
|
|
210
269
|
operation: string;
|
|
270
|
+
/**
|
|
271
|
+
* Phase 4 slice γ: emit `--trust` to bypass Vibe's interactive trust
|
|
272
|
+
* prompt for this invocation only (not persisted). Default undefined.
|
|
273
|
+
*/
|
|
274
|
+
trust?: boolean;
|
|
275
|
+
/** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
|
|
276
|
+
maxTurns?: number;
|
|
277
|
+
/** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
|
|
278
|
+
maxPrice?: number;
|
|
211
279
|
}, runtime?: GatewayServerRuntime): (CliRequestPrep & {
|
|
212
280
|
mistralEnv: Record<string, string>;
|
|
213
281
|
}) | ExtendedToolResponse;
|
|
282
|
+
/**
|
|
283
|
+
* Phase 4 slice δ post-review: pure helper extracted from
|
|
284
|
+
* `handleMistralRequest` so the retry-path arg-preservation invariants
|
|
285
|
+
* (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
|
|
286
|
+
* without mocking awaitJobOrDefer. Any param the wrapper threads into
|
|
287
|
+
* the FIRST `buildMistralCliInvocation` call MUST also be threaded
|
|
288
|
+
* through here, or a fresh-workspace / budgeted run can degrade on
|
|
289
|
+
* the second attempt.
|
|
290
|
+
*/
|
|
291
|
+
export declare function buildMistralRetryPrep(params: Pick<MistralRequestParams, "outputFormat" | "permissionMode" | "effort" | "reasoningEffort" | "allowedTools" | "disallowedTools" | "approvalStrategy" | "trust" | "maxTurns" | "maxPrice"> & {
|
|
292
|
+
effectivePrompt: string;
|
|
293
|
+
}, recoveryModel: string): {
|
|
294
|
+
args: string[];
|
|
295
|
+
env: Record<string, string>;
|
|
296
|
+
ignoredDisallowedTools: boolean;
|
|
297
|
+
};
|
|
214
298
|
export interface GeminiRequestParams {
|
|
215
299
|
prompt?: string;
|
|
216
300
|
promptParts?: PromptParts;
|
|
@@ -235,6 +319,8 @@ export interface GeminiRequestParams {
|
|
|
235
319
|
policyFiles?: string[];
|
|
236
320
|
adminPolicyFiles?: string[];
|
|
237
321
|
attachments?: string[];
|
|
322
|
+
/** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
|
|
323
|
+
skipTrust?: boolean;
|
|
238
324
|
}
|
|
239
325
|
export interface HandlerDeps {
|
|
240
326
|
sessionManager: ISessionManager;
|
|
@@ -273,6 +359,8 @@ export interface GrokRequestParams {
|
|
|
273
359
|
optimizeResponse?: boolean;
|
|
274
360
|
idleTimeoutMs?: number;
|
|
275
361
|
forceRefresh?: boolean;
|
|
362
|
+
/** Phase 4 slice δ: cap agent-loop iterations via `--max-turns N`. */
|
|
363
|
+
maxTurns?: number;
|
|
276
364
|
}
|
|
277
365
|
export declare function handleGrokRequest(deps: HandlerDeps, params: GrokRequestParams): Promise<ExtendedToolResponse>;
|
|
278
366
|
export declare function handleGrokRequestAsync(deps: AsyncHandlerDeps, params: Omit<GrokRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
|
@@ -297,6 +385,12 @@ export interface MistralRequestParams {
|
|
|
297
385
|
optimizeResponse?: boolean;
|
|
298
386
|
idleTimeoutMs?: number;
|
|
299
387
|
forceRefresh?: boolean;
|
|
388
|
+
/** Phase 4 slice γ: emit `--trust` for fresh-workspace headless runs. */
|
|
389
|
+
trust?: boolean;
|
|
390
|
+
/** Phase 4 slice δ: Vibe `--max-turns N` cap on agent-loop iterations. */
|
|
391
|
+
maxTurns?: number;
|
|
392
|
+
/** Phase 4 slice δ: Vibe `--max-price DOLLARS` cumulative-cost cap. */
|
|
393
|
+
maxPrice?: number;
|
|
300
394
|
}
|
|
301
395
|
export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
|
|
302
396
|
export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|
package/dist/index.js
CHANGED
|
@@ -10,6 +10,8 @@ import { executeCli, killAllProcessGroups } from "./executor.js";
|
|
|
10
10
|
import { parseStreamJson } from "./stream-json-parser.js";
|
|
11
11
|
import { parseCodexJsonStream } from "./codex-json-parser.js";
|
|
12
12
|
import { parseGeminiJson } from "./gemini-json-parser.js";
|
|
13
|
+
import { parseVibeMetaJson } from "./mistral-meta-json-parser.js";
|
|
14
|
+
import { homedir } from "os";
|
|
13
15
|
import { createSessionManager } from "./session-manager.js";
|
|
14
16
|
import { ResourceProvider } from "./resources.js";
|
|
15
17
|
import { PerformanceMetrics } from "./metrics.js";
|
|
@@ -227,6 +229,23 @@ function getApprovalManager(runtimeLogger = logger) {
|
|
|
227
229
|
return approvalManager;
|
|
228
230
|
}
|
|
229
231
|
const MCP_SERVER_ENUM = z.enum(CLAUDE_MCP_SERVER_NAMES);
|
|
232
|
+
/**
|
|
233
|
+
* Phase 4 slice δ — shared Zod fragments for `maxTurns` / `maxPrice`.
|
|
234
|
+
*
|
|
235
|
+
* Both flags reach the upstream CLIs as decimal-formatted argv strings via
|
|
236
|
+
* `String(N)`. `z.number().int().positive()` alone lets values past
|
|
237
|
+
* `Number.MAX_SAFE_INTEGER` through, after which `String(1e21)` emits
|
|
238
|
+
* scientific notation that Grok and Vibe both reject. The bounds below
|
|
239
|
+
* (safe-integer cap + 10000 ceiling for turns; finite + 10000 USD ceiling
|
|
240
|
+
* for price) guarantee a lossless decimal stringification AND a sane
|
|
241
|
+
* upper bound — no plausible single agent loop exceeds 10k turns or 10k USD.
|
|
242
|
+
*/
|
|
243
|
+
export const MAX_TURNS_SCHEMA = z.number().int().positive().safe().max(10_000);
|
|
244
|
+
// `.min(1e-6)` keeps the value in JS's decimal-stringify range:
|
|
245
|
+
// String(1e-6) === "0.000001" but String(1e-7) === "1e-7", which both
|
|
246
|
+
// upstream CLIs would reject. 1µUSD per request is fine-grained enough
|
|
247
|
+
// for any plausible budget-cap use.
|
|
248
|
+
export const MAX_PRICE_SCHEMA = z.number().positive().finite().min(1e-6).max(10_000);
|
|
230
249
|
// U22: Session-provider enum extended to five providers. The storage layer's
|
|
231
250
|
// CLI_TYPES already includes "mistral"; the MCP-tool layer mirrors that here so
|
|
232
251
|
// session_create / session_list / session_clear_all accept the fifth provider.
|
|
@@ -477,7 +496,14 @@ function createErrorResponse(cli, code, stderr, correlationId, error) {
|
|
|
477
496
|
},
|
|
478
497
|
};
|
|
479
498
|
}
|
|
480
|
-
function extractUsageAndCost(cli, output, outputFormat
|
|
499
|
+
export function extractUsageAndCost(cli, output, outputFormat,
|
|
500
|
+
/**
|
|
501
|
+
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
502
|
+
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
503
|
+
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
504
|
+
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
505
|
+
*/
|
|
506
|
+
ctx) {
|
|
481
507
|
if (cli === "claude" && outputFormat === "stream-json") {
|
|
482
508
|
const parsed = parseStreamJson(output);
|
|
483
509
|
if (!parsed.usage) {
|
|
@@ -515,9 +541,14 @@ function extractUsageAndCost(cli, output, outputFormat) {
|
|
|
515
541
|
cacheReadTokens: parsed.usage.cache_read_tokens,
|
|
516
542
|
};
|
|
517
543
|
}
|
|
518
|
-
// Mistral/Vibe:
|
|
519
|
-
//
|
|
520
|
-
//
|
|
544
|
+
// Mistral/Vibe: usage/cost live on disk in `~/.vibe/logs/session/<id>/meta.json`
|
|
545
|
+
// (Phase 4 slice β). Best-effort: if we don't know the sessionId (fresh
|
|
546
|
+
// session whose Vibe-assigned UUID we never observed) or the file is
|
|
547
|
+
// missing/malformed, the parser returns `{}` and the FR row simply lacks
|
|
548
|
+
// usage data — matching pre-slice behaviour. No stdout fallback exists.
|
|
549
|
+
if (cli === "mistral") {
|
|
550
|
+
return parseVibeMetaJson(ctx?.home ?? homedir(), ctx?.sessionId);
|
|
551
|
+
}
|
|
521
552
|
return {};
|
|
522
553
|
}
|
|
523
554
|
/**
|
|
@@ -530,9 +561,13 @@ function extractUsageAndCost(cli, output, outputFormat) {
|
|
|
530
561
|
function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat) {
|
|
531
562
|
// Extract primitives BEFORE building the closure — capturing `prep` or
|
|
532
563
|
// `params` directly would pin large attachments / promptParts on the
|
|
533
|
-
// AsyncJobRecord for JOB_TTL_MS.
|
|
564
|
+
// AsyncJobRecord for JOB_TTL_MS. Phase 4 slice β: `sid` and `home` are
|
|
565
|
+
// primitives too, threaded through so the Mistral branch of
|
|
566
|
+
// extractUsageAndCost can read `~/.vibe/logs/session/<id>/meta.json`.
|
|
534
567
|
const cli = cliName;
|
|
535
568
|
const fmt = outputFormat;
|
|
569
|
+
const sid = sessionId;
|
|
570
|
+
const home = homedir();
|
|
536
571
|
return {
|
|
537
572
|
flightRecorderEntry: {
|
|
538
573
|
model: prep.resolvedModel || "default",
|
|
@@ -541,7 +576,7 @@ function buildAsyncFlightRecorderHandoff(cliName, prep, sessionId, outputFormat)
|
|
|
541
576
|
stablePrefixHash: prep.stablePrefixHash ?? undefined,
|
|
542
577
|
stablePrefixTokens: prep.stablePrefixTokens ?? undefined,
|
|
543
578
|
},
|
|
544
|
-
extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt),
|
|
579
|
+
extractUsage: (stdout) => extractUsageAndCost(cli, stdout, fmt, { sessionId: sid, home }),
|
|
545
580
|
};
|
|
546
581
|
}
|
|
547
582
|
function safeFlightStart(entry, runtime = resolveGatewayServerRuntime()) {
|
|
@@ -1081,11 +1116,12 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1081
1116
|
args.push("--json");
|
|
1082
1117
|
}
|
|
1083
1118
|
args.push("--skip-git-repo-check");
|
|
1084
|
-
// U26: High-impact feature flags.
|
|
1085
|
-
//
|
|
1086
|
-
// only emit
|
|
1087
|
-
//
|
|
1088
|
-
//
|
|
1119
|
+
// U26: High-impact feature flags. `--search` is rejected by
|
|
1120
|
+
// `codex exec resume` (resume inherits the original session's web-search
|
|
1121
|
+
// state), so we only emit it on a NEW session. `--output-schema`,
|
|
1122
|
+
// `-c key=value`, profile, ephemeral, images, and the ignore-* flags are
|
|
1123
|
+
// all accepted on resume per `codex exec resume --help` (codex-cli 0.133.0)
|
|
1124
|
+
// and are emitted in both branches.
|
|
1089
1125
|
let highImpactCleanup;
|
|
1090
1126
|
if (sessionPlan.mode === "new") {
|
|
1091
1127
|
const high = prepareCodexHighImpactFlags({
|
|
@@ -1105,12 +1141,10 @@ export function prepareCodexRequest(params, runtime = resolveGatewayServerRuntim
|
|
|
1105
1141
|
highImpactCleanup = high.cleanup;
|
|
1106
1142
|
}
|
|
1107
1143
|
else {
|
|
1108
|
-
// On resume, emit only the resume-safe subset (profile, ephemeral,
|
|
1109
|
-
// images, ignoreUserConfig, ignoreRules). outputSchema, search, and
|
|
1110
|
-
// configOverrides are dropped silently to mirror existing behavior for
|
|
1111
|
-
// sandbox/ask-for-approval on resume.
|
|
1112
1144
|
const high = prepareCodexHighImpactFlags({
|
|
1145
|
+
outputSchema: params.outputSchema,
|
|
1113
1146
|
profile: params.profile,
|
|
1147
|
+
configOverrides: params.configOverrides,
|
|
1114
1148
|
ephemeral: params.ephemeral,
|
|
1115
1149
|
images: params.images,
|
|
1116
1150
|
ignoreUserConfig: params.ignoreUserConfig,
|
|
@@ -1240,6 +1274,10 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1240
1274
|
if (params.outputFormat === "json") {
|
|
1241
1275
|
args.push("-o", "json");
|
|
1242
1276
|
}
|
|
1277
|
+
// Phase 4 slice γ: opt-in trust-prompt bypass for fresh workspaces.
|
|
1278
|
+
if (params.skipTrust) {
|
|
1279
|
+
args.push("--skip-trust");
|
|
1280
|
+
}
|
|
1243
1281
|
return {
|
|
1244
1282
|
corrId,
|
|
1245
1283
|
effectivePrompt,
|
|
@@ -1252,7 +1290,7 @@ export function prepareGeminiRequest(params, runtime = resolveGatewayServerRunti
|
|
|
1252
1290
|
stablePrefixTokens,
|
|
1253
1291
|
};
|
|
1254
1292
|
}
|
|
1255
|
-
function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1293
|
+
export function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
1256
1294
|
const corrId = params.correlationId || randomUUID();
|
|
1257
1295
|
const cliInfo = getCliInfo();
|
|
1258
1296
|
const resolvedModel = resolveModelAlias("grok", params.model, cliInfo);
|
|
@@ -1328,6 +1366,9 @@ function prepareGrokRequest(params, runtime = resolveGatewayServerRuntime()) {
|
|
|
1328
1366
|
if (params.disallowedTools && params.disallowedTools.length > 0) {
|
|
1329
1367
|
args.push("--disallowed-tools", params.disallowedTools.join(","));
|
|
1330
1368
|
}
|
|
1369
|
+
if (params.maxTurns !== undefined) {
|
|
1370
|
+
args.push("--max-turns", String(params.maxTurns));
|
|
1371
|
+
}
|
|
1331
1372
|
return {
|
|
1332
1373
|
corrId,
|
|
1333
1374
|
effectivePrompt,
|
|
@@ -1411,6 +1452,9 @@ export function prepareMistralRequest(params, runtime = resolveGatewayServerRunt
|
|
|
1411
1452
|
reasoningEffort: params.reasoningEffort,
|
|
1412
1453
|
allowedTools: params.allowedTools,
|
|
1413
1454
|
disallowedTools: params.disallowedTools,
|
|
1455
|
+
trust: params.trust,
|
|
1456
|
+
maxTurns: params.maxTurns,
|
|
1457
|
+
maxPrice: params.maxPrice,
|
|
1414
1458
|
});
|
|
1415
1459
|
if (prep.ignoredDisallowedTools) {
|
|
1416
1460
|
runtime.logger.info(`[${corrId}] Mistral does not support disallowedTools; ignoring (caller passed ${params.disallowedTools?.length ?? 0} entries)`);
|
|
@@ -1441,6 +1485,32 @@ function selectMistralRecoveryModel(failedModel) {
|
|
|
1441
1485
|
].filter((model) => Boolean(model && model !== failedModel));
|
|
1442
1486
|
return candidates.find(model => model !== "local");
|
|
1443
1487
|
}
|
|
1488
|
+
/**
|
|
1489
|
+
* Phase 4 slice δ post-review: pure helper extracted from
|
|
1490
|
+
* `handleMistralRequest` so the retry-path arg-preservation invariants
|
|
1491
|
+
* (trust + maxTurns + maxPrice from slices γ/δ) are unit-testable
|
|
1492
|
+
* without mocking awaitJobOrDefer. Any param the wrapper threads into
|
|
1493
|
+
* the FIRST `buildMistralCliInvocation` call MUST also be threaded
|
|
1494
|
+
* through here, or a fresh-workspace / budgeted run can degrade on
|
|
1495
|
+
* the second attempt.
|
|
1496
|
+
*/
|
|
1497
|
+
export function buildMistralRetryPrep(params, recoveryModel) {
|
|
1498
|
+
return buildMistralCliInvocation({
|
|
1499
|
+
prompt: params.effectivePrompt,
|
|
1500
|
+
resolvedModel: recoveryModel,
|
|
1501
|
+
outputFormat: params.outputFormat,
|
|
1502
|
+
permissionMode: params.approvalStrategy === "mcp_managed"
|
|
1503
|
+
? "auto-approve"
|
|
1504
|
+
: (params.permissionMode ?? "auto-approve"),
|
|
1505
|
+
effort: params.effort,
|
|
1506
|
+
reasoningEffort: params.reasoningEffort,
|
|
1507
|
+
allowedTools: params.allowedTools,
|
|
1508
|
+
disallowedTools: params.disallowedTools,
|
|
1509
|
+
trust: params.trust,
|
|
1510
|
+
maxTurns: params.maxTurns,
|
|
1511
|
+
maxPrice: params.maxPrice,
|
|
1512
|
+
});
|
|
1513
|
+
}
|
|
1444
1514
|
function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep, durationMs, resumable, outputFormat, warnings) {
|
|
1445
1515
|
let finalStdout = stdout;
|
|
1446
1516
|
// Skip response optimization for JSON output to prevent corrupting structured data
|
|
@@ -1466,7 +1536,10 @@ function buildCliResponse(cli, stdout, optimizeResponse, corrId, sessionId, prep
|
|
|
1466
1536
|
correlationId: corrId,
|
|
1467
1537
|
sessionId: sessionId || null,
|
|
1468
1538
|
durationMs,
|
|
1469
|
-
|
|
1539
|
+
// Phase 4 slice β: thread sessionId + home so the Mistral branch of
|
|
1540
|
+
// extractUsageAndCost can read `~/.vibe/logs/session/<dir>/meta.json`.
|
|
1541
|
+
// Other CLIs ignore the ctx (their usage source is stdout).
|
|
1542
|
+
...extractUsageAndCost(cli, stdout, outputFormat, { sessionId, home: homedir() }),
|
|
1470
1543
|
exitCode: 0,
|
|
1471
1544
|
retryCount: 0,
|
|
1472
1545
|
},
|
|
@@ -1564,6 +1637,7 @@ export async function handleGeminiRequest(deps, params) {
|
|
|
1564
1637
|
policyFiles: params.policyFiles,
|
|
1565
1638
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
1566
1639
|
attachments: params.attachments,
|
|
1640
|
+
skipTrust: params.skipTrust,
|
|
1567
1641
|
}, runtime);
|
|
1568
1642
|
if (!("args" in prep))
|
|
1569
1643
|
return prep;
|
|
@@ -1692,6 +1766,7 @@ export async function handleGeminiRequestAsync(deps, params) {
|
|
|
1692
1766
|
policyFiles: params.policyFiles,
|
|
1693
1767
|
adminPolicyFiles: params.adminPolicyFiles,
|
|
1694
1768
|
attachments: params.attachments,
|
|
1769
|
+
skipTrust: params.skipTrust,
|
|
1695
1770
|
}, runtime);
|
|
1696
1771
|
if (!("args" in prep))
|
|
1697
1772
|
return prep;
|
|
@@ -1774,6 +1849,7 @@ export async function handleGrokRequest(deps, params) {
|
|
|
1774
1849
|
correlationId: params.correlationId,
|
|
1775
1850
|
optimizePrompt: params.optimizePrompt,
|
|
1776
1851
|
operation: "grok_request",
|
|
1852
|
+
maxTurns: params.maxTurns,
|
|
1777
1853
|
}, runtime);
|
|
1778
1854
|
if (!("args" in prep))
|
|
1779
1855
|
return prep;
|
|
@@ -1894,6 +1970,7 @@ export async function handleGrokRequestAsync(deps, params) {
|
|
|
1894
1970
|
correlationId: params.correlationId,
|
|
1895
1971
|
optimizePrompt: params.optimizePrompt,
|
|
1896
1972
|
operation: "grok_request_async",
|
|
1973
|
+
maxTurns: params.maxTurns,
|
|
1897
1974
|
}, runtime);
|
|
1898
1975
|
if (!("args" in prep))
|
|
1899
1976
|
return prep;
|
|
@@ -1975,6 +2052,9 @@ export async function handleMistralRequest(deps, params) {
|
|
|
1975
2052
|
correlationId: params.correlationId,
|
|
1976
2053
|
optimizePrompt: params.optimizePrompt,
|
|
1977
2054
|
operation: "mistral_request",
|
|
2055
|
+
trust: params.trust,
|
|
2056
|
+
maxTurns: params.maxTurns,
|
|
2057
|
+
maxPrice: params.maxPrice,
|
|
1978
2058
|
}, runtime);
|
|
1979
2059
|
if (!("args" in prep))
|
|
1980
2060
|
return prep;
|
|
@@ -2007,18 +2087,7 @@ export async function handleMistralRequest(deps, params) {
|
|
|
2007
2087
|
const recoveryModel = selectMistralRecoveryModel(prep.resolvedModel);
|
|
2008
2088
|
if (recoveryModel) {
|
|
2009
2089
|
deps.logger.info(`[${corrId}] mistral_request detected stale Vibe model selection; retrying once with ${recoveryModel}`);
|
|
2010
|
-
const retryPrep =
|
|
2011
|
-
prompt: prep.effectivePrompt,
|
|
2012
|
-
resolvedModel: recoveryModel,
|
|
2013
|
-
outputFormat: params.outputFormat,
|
|
2014
|
-
permissionMode: params.approvalStrategy === "mcp_managed"
|
|
2015
|
-
? "auto-approve"
|
|
2016
|
-
: (params.permissionMode ?? "auto-approve"),
|
|
2017
|
-
effort: params.effort,
|
|
2018
|
-
reasoningEffort: params.reasoningEffort,
|
|
2019
|
-
allowedTools: params.allowedTools,
|
|
2020
|
-
disallowedTools: params.disallowedTools,
|
|
2021
|
-
});
|
|
2090
|
+
const retryPrep = buildMistralRetryPrep({ ...params, effectivePrompt: prep.effectivePrompt }, recoveryModel);
|
|
2022
2091
|
const retryArgs = [...retryPrep.args, ...sessionResult.resumeArgs];
|
|
2023
2092
|
// Reuse the FR handoff built above — the retry preserves corrId,
|
|
2024
2093
|
// so the manager's logComplete still updates the original row.
|
|
@@ -2118,6 +2187,9 @@ export async function handleMistralRequestAsync(deps, params) {
|
|
|
2118
2187
|
correlationId: params.correlationId,
|
|
2119
2188
|
optimizePrompt: params.optimizePrompt,
|
|
2120
2189
|
operation: "mistral_request_async",
|
|
2190
|
+
trust: params.trust,
|
|
2191
|
+
maxTurns: params.maxTurns,
|
|
2192
|
+
maxPrice: params.maxPrice,
|
|
2121
2193
|
}, runtime);
|
|
2122
2194
|
if (!("args" in prep))
|
|
2123
2195
|
return prep;
|
|
@@ -3006,7 +3078,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3006
3078
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3007
3079
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3008
3080
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3009
|
-
|
|
3081
|
+
skipTrust: z
|
|
3082
|
+
.boolean()
|
|
3083
|
+
.default(false)
|
|
3084
|
+
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3085
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
|
|
3010
3086
|
return handleGeminiRequest({ sessionManager, logger, runtime }, {
|
|
3011
3087
|
prompt,
|
|
3012
3088
|
promptParts,
|
|
@@ -3030,6 +3106,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3030
3106
|
policyFiles,
|
|
3031
3107
|
adminPolicyFiles,
|
|
3032
3108
|
attachments,
|
|
3109
|
+
skipTrust,
|
|
3033
3110
|
});
|
|
3034
3111
|
});
|
|
3035
3112
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3104,7 +3181,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3104
3181
|
.boolean()
|
|
3105
3182
|
.default(false)
|
|
3106
3183
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3107
|
-
|
|
3184
|
+
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3185
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, maxTurns, }) => {
|
|
3108
3186
|
return handleGrokRequest({ sessionManager, logger, runtime }, {
|
|
3109
3187
|
prompt,
|
|
3110
3188
|
promptParts,
|
|
@@ -3127,6 +3205,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3127
3205
|
optimizeResponse,
|
|
3128
3206
|
idleTimeoutMs,
|
|
3129
3207
|
forceRefresh,
|
|
3208
|
+
maxTurns,
|
|
3130
3209
|
});
|
|
3131
3210
|
});
|
|
3132
3211
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3200,7 +3279,13 @@ export function createGatewayServer(deps = {}) {
|
|
|
3200
3279
|
.boolean()
|
|
3201
3280
|
.default(false)
|
|
3202
3281
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3203
|
-
|
|
3282
|
+
trust: z
|
|
3283
|
+
.boolean()
|
|
3284
|
+
.default(false)
|
|
3285
|
+
.describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
|
|
3286
|
+
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3287
|
+
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
3288
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, optimizeResponse, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, }) => {
|
|
3204
3289
|
return handleMistralRequest({ sessionManager, logger, runtime }, {
|
|
3205
3290
|
prompt,
|
|
3206
3291
|
promptParts,
|
|
@@ -3222,6 +3307,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3222
3307
|
optimizeResponse,
|
|
3223
3308
|
idleTimeoutMs,
|
|
3224
3309
|
forceRefresh,
|
|
3310
|
+
trust,
|
|
3311
|
+
maxTurns,
|
|
3312
|
+
maxPrice,
|
|
3225
3313
|
});
|
|
3226
3314
|
});
|
|
3227
3315
|
//──────────────────────────────────────────────────────────────────────────────
|
|
@@ -3612,7 +3700,11 @@ export function createGatewayServer(deps = {}) {
|
|
|
3612
3700
|
policyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.policyFiles.describe("Policy file paths (--policy <path>, one per file). Paths must exist."),
|
|
3613
3701
|
adminPolicyFiles: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.adminPolicyFiles.describe("Admin policy file paths (--admin-policy <path>, one per file). Paths must exist."),
|
|
3614
3702
|
attachments: GEMINI_HIGH_IMPACT_PARAMS_SCHEMA.shape.attachments.describe("Absolute file paths prepended as @<path> tokens to the prompt"),
|
|
3615
|
-
|
|
3703
|
+
skipTrust: z
|
|
3704
|
+
.boolean()
|
|
3705
|
+
.default(false)
|
|
3706
|
+
.describe("Emit `--skip-trust` so Gemini trusts the workspace for this session and skips the interactive trust prompt (Phase 4 slice γ). Required for headless runs in fresh workspaces."),
|
|
3707
|
+
}, async ({ prompt, promptParts, model, sessionId, resumeLatest, createNewSession, approvalMode, approvalStrategy, approvalPolicy, mcpServers, allowedTools, includeDirs, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, outputFormat, sandbox, policyFiles, adminPolicyFiles, attachments, skipTrust, }) => {
|
|
3616
3708
|
return handleGeminiRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3617
3709
|
prompt,
|
|
3618
3710
|
promptParts,
|
|
@@ -3635,6 +3727,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3635
3727
|
policyFiles,
|
|
3636
3728
|
adminPolicyFiles,
|
|
3637
3729
|
attachments,
|
|
3730
|
+
skipTrust,
|
|
3638
3731
|
});
|
|
3639
3732
|
});
|
|
3640
3733
|
server.tool("grok_request_async", {
|
|
@@ -3705,7 +3798,8 @@ export function createGatewayServer(deps = {}) {
|
|
|
3705
3798
|
.boolean()
|
|
3706
3799
|
.default(false)
|
|
3707
3800
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3708
|
-
|
|
3801
|
+
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Grok `--max-turns N`: cap on agent-loop iterations for cost / latency control (Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3802
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, alwaysApprove, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, maxTurns, }) => {
|
|
3709
3803
|
return handleGrokRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3710
3804
|
prompt,
|
|
3711
3805
|
promptParts,
|
|
@@ -3727,6 +3821,7 @@ export function createGatewayServer(deps = {}) {
|
|
|
3727
3821
|
optimizePrompt,
|
|
3728
3822
|
idleTimeoutMs,
|
|
3729
3823
|
forceRefresh,
|
|
3824
|
+
maxTurns,
|
|
3730
3825
|
});
|
|
3731
3826
|
});
|
|
3732
3827
|
server.tool("mistral_request_async", {
|
|
@@ -3796,7 +3891,13 @@ export function createGatewayServer(deps = {}) {
|
|
|
3796
3891
|
.boolean()
|
|
3797
3892
|
.default(false)
|
|
3798
3893
|
.describe("Bypass dedup and force a fresh CLI run even if a recent identical request exists"),
|
|
3799
|
-
|
|
3894
|
+
trust: z
|
|
3895
|
+
.boolean()
|
|
3896
|
+
.default(false)
|
|
3897
|
+
.describe("Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted to trusted_folders.toml) and skips the interactive trust prompt (Phase 4 slice γ)."),
|
|
3898
|
+
maxTurns: MAX_TURNS_SCHEMA.optional().describe("Vibe `--max-turns N`: cap the agent-loop iteration count (programmatic mode only, Phase 4 slice δ). Bounded to safe integers ≤ 10000."),
|
|
3899
|
+
maxPrice: MAX_PRICE_SCHEMA.optional().describe("Vibe `--max-price DOLLARS`: interrupt the session when cumulative cost crosses this cap (programmatic mode only, Phase 4 slice δ). Bounded to finite values ≤ 10000 USD."),
|
|
3900
|
+
}, async ({ prompt, promptParts, model, outputFormat, sessionId, resumeLatest, createNewSession, permissionMode, effort, reasoningEffort, approvalStrategy, approvalPolicy, mcpServers, allowedTools, disallowedTools, correlationId, optimizePrompt, idleTimeoutMs, forceRefresh, trust, maxTurns, maxPrice, }) => {
|
|
3800
3901
|
return handleMistralRequestAsync({ sessionManager, asyncJobManager, logger, runtime }, {
|
|
3801
3902
|
prompt,
|
|
3802
3903
|
promptParts,
|
|
@@ -3817,6 +3918,9 @@ export function createGatewayServer(deps = {}) {
|
|
|
3817
3918
|
optimizePrompt,
|
|
3818
3919
|
idleTimeoutMs,
|
|
3819
3920
|
forceRefresh,
|
|
3921
|
+
trust,
|
|
3922
|
+
maxTurns,
|
|
3923
|
+
maxPrice,
|
|
3820
3924
|
});
|
|
3821
3925
|
});
|
|
3822
3926
|
server.tool("llm_job_status", {
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 4 slice β — Mistral Vibe `meta.json` parser.
|
|
3
|
+
*
|
|
4
|
+
* Vibe writes per-session telemetry to
|
|
5
|
+
*
|
|
6
|
+
* ~/.vibe/logs/session/session_<YYYYMMDD>_<HHMMSS>_<first8hex>/meta.json
|
|
7
|
+
*
|
|
8
|
+
* where `<first8hex>` is the first 8 lowercase hex characters of the full
|
|
9
|
+
* session UUID. Inside the file:
|
|
10
|
+
*
|
|
11
|
+
* {
|
|
12
|
+
* "session_id": "<full-uuid>",
|
|
13
|
+
* "stats": {
|
|
14
|
+
* "session_prompt_tokens": <number> → inputTokens
|
|
15
|
+
* "session_completion_tokens": <number> → outputTokens
|
|
16
|
+
* "session_cost": <number> → costUsd
|
|
17
|
+
* }
|
|
18
|
+
* }
|
|
19
|
+
*
|
|
20
|
+
* The gateway's mistral session-id surface accepts the full UUID (so does
|
|
21
|
+
* `vibe --resume <uuid>`). To find the right directory we glob for
|
|
22
|
+
* `session_*_<first8>` and disambiguate by reading each candidate's
|
|
23
|
+
* `session_id` field. If callers happen to pass the directory basename
|
|
24
|
+
* itself we still honour that — useful for tests and for forward-compat if
|
|
25
|
+
* Vibe ever changes its dir naming scheme.
|
|
26
|
+
*
|
|
27
|
+
* Cache-token surfaces are not exposed by Vibe today, so `cacheReadTokens`
|
|
28
|
+
* and `cacheCreationTokens` are intentionally absent.
|
|
29
|
+
*
|
|
30
|
+
* Best-effort by design: any failure (missing file, bad JSON, missing
|
|
31
|
+
* fields, gateway-generated `gw-*` sessionId, unresolvable UUID, path
|
|
32
|
+
* outside the session log root) returns `{}` so the flight-recorder row
|
|
33
|
+
* simply lacks usage data.
|
|
34
|
+
*/
|
|
35
|
+
import { existsSync, readdirSync, readFileSync, realpathSync, statSync } from "fs";
|
|
36
|
+
import { join, resolve, sep } from "path";
|
|
37
|
+
import { GATEWAY_SESSION_PREFIX } from "./request-helpers.js";
|
|
38
|
+
function asPositiveNumber(value) {
|
|
39
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
return value;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Read a file only if its realpath lives under `realBase`. Returns undefined
|
|
46
|
+
* on any error, missing file, or out-of-tree symlink target. This is the one
|
|
47
|
+
* place that calls `readFileSync` for meta.json content — the rest of the
|
|
48
|
+
* module routes through it so the security boundary is uniform.
|
|
49
|
+
*/
|
|
50
|
+
function readInBase(realBase, candidate) {
|
|
51
|
+
if (!existsSync(candidate))
|
|
52
|
+
return undefined;
|
|
53
|
+
let realCandidate;
|
|
54
|
+
try {
|
|
55
|
+
realCandidate = realpathSync(candidate);
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
const realBaseWithSep = realBase.endsWith(sep) ? realBase : realBase + sep;
|
|
61
|
+
if (!realCandidate.startsWith(realBaseWithSep))
|
|
62
|
+
return undefined;
|
|
63
|
+
try {
|
|
64
|
+
return readFileSync(realCandidate, "utf-8");
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return undefined;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// UUID v4-ish (Vibe's own session UUIDs are not strictly v4, so we
|
|
71
|
+
// validate against the broader 8-4-4-4-12 lowercase-hex shape) OR
|
|
72
|
+
// Vibe's session_<digits>_<digits>_<first8> directory basename.
|
|
73
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
74
|
+
const DIRNAME_RE = /^session_\d{8}_\d{6}_[0-9a-f]{8}$/;
|
|
75
|
+
/**
|
|
76
|
+
* Resolve the session-log directory basename for a given gateway sessionId.
|
|
77
|
+
* Returns undefined when no candidate can be found or the input is
|
|
78
|
+
* unsuitable. Pure with respect to side-effects on the caller — only reads
|
|
79
|
+
* the filesystem.
|
|
80
|
+
*
|
|
81
|
+
* Security invariants enforced here:
|
|
82
|
+
* - Inputs are charset-gated (UUID or DIRNAME) before any filesystem read.
|
|
83
|
+
* - For UUID input, the chosen candidate's meta.json MUST advertise the
|
|
84
|
+
* same `session_id` — single-candidate is NOT trusted, because two
|
|
85
|
+
* UUIDs sharing the first 8 hex chars would otherwise cross-attribute
|
|
86
|
+
* usage (and leak telemetry to the caller of the other session).
|
|
87
|
+
*/
|
|
88
|
+
function resolveVibeSessionDirname(baseDir, realBase, sessionId) {
|
|
89
|
+
// 1. Caller already supplied the directory name verbatim.
|
|
90
|
+
if (DIRNAME_RE.test(sessionId) && existsSync(join(baseDir, sessionId, "meta.json"))) {
|
|
91
|
+
return sessionId;
|
|
92
|
+
}
|
|
93
|
+
// 2. Treat the input as a full session UUID.
|
|
94
|
+
if (!UUID_RE.test(sessionId))
|
|
95
|
+
return undefined;
|
|
96
|
+
const short = sessionId.slice(0, 8).toLowerCase();
|
|
97
|
+
let entries;
|
|
98
|
+
try {
|
|
99
|
+
entries = readdirSync(baseDir);
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
// Filter to candidates matching `session_*_<short>`. Sort newest-first
|
|
105
|
+
// by mtime; we still require an exact session_id match below.
|
|
106
|
+
const candidates = entries
|
|
107
|
+
.filter(name => DIRNAME_RE.test(name) && name.endsWith(`_${short}`))
|
|
108
|
+
.map(name => {
|
|
109
|
+
let mtimeMs = 0;
|
|
110
|
+
try {
|
|
111
|
+
mtimeMs = statSync(join(baseDir, name)).mtimeMs;
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
/* ignore */
|
|
115
|
+
}
|
|
116
|
+
return { name, mtimeMs };
|
|
117
|
+
})
|
|
118
|
+
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
|
119
|
+
for (const { name } of candidates) {
|
|
120
|
+
const text = readInBase(realBase, join(baseDir, name, "meta.json"));
|
|
121
|
+
if (text === undefined)
|
|
122
|
+
continue;
|
|
123
|
+
try {
|
|
124
|
+
const parsed = JSON.parse(text);
|
|
125
|
+
if (typeof parsed.session_id === "string" && parsed.session_id === sessionId) {
|
|
126
|
+
return name;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
/* ignore and continue */
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return undefined;
|
|
134
|
+
}
|
|
135
|
+
export function parseVibeMetaJson(home, sessionId) {
|
|
136
|
+
if (!sessionId)
|
|
137
|
+
return {};
|
|
138
|
+
if (sessionId.startsWith(GATEWAY_SESSION_PREFIX)) {
|
|
139
|
+
// gw-* IDs are gateway internal — Vibe never wrote a meta.json under that name.
|
|
140
|
+
return {};
|
|
141
|
+
}
|
|
142
|
+
const baseDir = resolve(join(home, ".vibe", "logs", "session"));
|
|
143
|
+
let realBase;
|
|
144
|
+
try {
|
|
145
|
+
realBase = realpathSync(baseDir);
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
return {};
|
|
149
|
+
}
|
|
150
|
+
const dirname = resolveVibeSessionDirname(baseDir, realBase, sessionId);
|
|
151
|
+
if (!dirname)
|
|
152
|
+
return {};
|
|
153
|
+
// `readInBase` is the security boundary: it realpath-resolves the file
|
|
154
|
+
// and rejects anything whose target lives outside `realBase`. Re-routing
|
|
155
|
+
// the final read through it (instead of a bespoke readFileSync) keeps
|
|
156
|
+
// the in-tree-only invariant in one place.
|
|
157
|
+
const text = readInBase(realBase, join(baseDir, dirname, "meta.json"));
|
|
158
|
+
if (text === undefined)
|
|
159
|
+
return {};
|
|
160
|
+
let raw;
|
|
161
|
+
try {
|
|
162
|
+
raw = JSON.parse(text);
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
return {};
|
|
166
|
+
}
|
|
167
|
+
const stats = raw?.stats;
|
|
168
|
+
if (!stats || typeof stats !== "object")
|
|
169
|
+
return {};
|
|
170
|
+
return {
|
|
171
|
+
inputTokens: asPositiveNumber(stats.session_prompt_tokens),
|
|
172
|
+
outputTokens: asPositiveNumber(stats.session_completion_tokens),
|
|
173
|
+
costUsd: asPositiveNumber(stats.session_cost),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
@@ -107,6 +107,24 @@ export interface PrepareMistralRequestInput {
|
|
|
107
107
|
* emit a `logger.warn` when this is non-empty.
|
|
108
108
|
*/
|
|
109
109
|
disallowedTools?: string[];
|
|
110
|
+
/**
|
|
111
|
+
* Phase 4 slice γ: emit `--trust` so non-interactive runs in fresh
|
|
112
|
+
* workspaces skip Vibe's interactive trust prompt for this invocation
|
|
113
|
+
* only (not persisted to `trusted_folders.toml`). Default undefined →
|
|
114
|
+
* Vibe's prompt behaviour is preserved for existing callers.
|
|
115
|
+
*/
|
|
116
|
+
trust?: boolean;
|
|
117
|
+
/**
|
|
118
|
+
* Phase 4 slice δ: emit `--max-turns N` to cap the agent-loop iteration
|
|
119
|
+
* count (only applies in programmatic mode with `-p`).
|
|
120
|
+
*/
|
|
121
|
+
maxTurns?: number;
|
|
122
|
+
/**
|
|
123
|
+
* Phase 4 slice δ: emit `--max-price DOLLARS` so the session is
|
|
124
|
+
* interrupted when cumulative cost crosses the cap (programmatic mode
|
|
125
|
+
* only).
|
|
126
|
+
*/
|
|
127
|
+
maxPrice?: number;
|
|
110
128
|
}
|
|
111
129
|
export interface PrepareMistralRequestResult {
|
|
112
130
|
args: string[];
|
|
@@ -204,9 +222,11 @@ export declare function resolveCodexSandboxFlags(input: CodexSandboxFlagsInput):
|
|
|
204
222
|
* Flags that `codex exec resume` rejects (the original session's policy is
|
|
205
223
|
* inherited). Callers must drop these when building resume argv.
|
|
206
224
|
*
|
|
207
|
-
*
|
|
208
|
-
* `--
|
|
209
|
-
*
|
|
225
|
+
* Verified against `codex exec resume --help` (codex-cli 0.133.0):
|
|
226
|
+
* `--full-auto`, `--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`, and
|
|
227
|
+
* `--search` are rejected. `--output-schema` and `-c key=value` ARE accepted
|
|
228
|
+
* on resume and therefore are NOT in this filter (Phase 4 slice α restored
|
|
229
|
+
* the previously-silent drop of those two).
|
|
210
230
|
*/
|
|
211
231
|
export declare const CODEX_RESUME_FILTERED_FLAGS: ReadonlySet<string>;
|
|
212
232
|
/**
|
|
@@ -398,8 +418,8 @@ export declare const CODEX_HIGH_IMPACT_PARAMS_SCHEMA: z.ZodObject<{
|
|
|
398
418
|
ignoreRules: z.ZodOptional<z.ZodBoolean>;
|
|
399
419
|
}, "strip", z.ZodTypeAny, {
|
|
400
420
|
search?: boolean | undefined;
|
|
401
|
-
profile?: string | undefined;
|
|
402
421
|
outputSchema?: string | Record<string, unknown> | undefined;
|
|
422
|
+
profile?: string | undefined;
|
|
403
423
|
configOverrides?: Record<string, string> | undefined;
|
|
404
424
|
ephemeral?: boolean | undefined;
|
|
405
425
|
images?: string[] | undefined;
|
|
@@ -407,8 +427,8 @@ export declare const CODEX_HIGH_IMPACT_PARAMS_SCHEMA: z.ZodObject<{
|
|
|
407
427
|
ignoreRules?: boolean | undefined;
|
|
408
428
|
}, {
|
|
409
429
|
search?: boolean | undefined;
|
|
410
|
-
profile?: string | undefined;
|
|
411
430
|
outputSchema?: string | Record<string, unknown> | undefined;
|
|
431
|
+
profile?: string | undefined;
|
|
412
432
|
configOverrides?: Record<string, string> | undefined;
|
|
413
433
|
ephemeral?: boolean | undefined;
|
|
414
434
|
images?: string[] | undefined;
|
package/dist/request-helpers.js
CHANGED
|
@@ -176,6 +176,15 @@ export function prepareMistralRequest(input) {
|
|
|
176
176
|
args.push("--enabled-tools", tool);
|
|
177
177
|
}
|
|
178
178
|
}
|
|
179
|
+
if (input.trust) {
|
|
180
|
+
args.push("--trust");
|
|
181
|
+
}
|
|
182
|
+
if (input.maxTurns !== undefined) {
|
|
183
|
+
args.push("--max-turns", String(input.maxTurns));
|
|
184
|
+
}
|
|
185
|
+
if (input.maxPrice !== undefined) {
|
|
186
|
+
args.push("--max-price", String(input.maxPrice));
|
|
187
|
+
}
|
|
179
188
|
const ignoredDisallowedTools = Boolean(input.disallowedTools && input.disallowedTools.length > 0);
|
|
180
189
|
return { args, env, ignoredDisallowedTools };
|
|
181
190
|
}
|
|
@@ -279,9 +288,11 @@ export function resolveCodexSandboxFlags(input) {
|
|
|
279
288
|
* Flags that `codex exec resume` rejects (the original session's policy is
|
|
280
289
|
* inherited). Callers must drop these when building resume argv.
|
|
281
290
|
*
|
|
282
|
-
*
|
|
283
|
-
* `--
|
|
284
|
-
*
|
|
291
|
+
* Verified against `codex exec resume --help` (codex-cli 0.133.0):
|
|
292
|
+
* `--full-auto`, `--sandbox`, `--ask-for-approval`, `--add-dir`, `-C`, and
|
|
293
|
+
* `--search` are rejected. `--output-schema` and `-c key=value` ARE accepted
|
|
294
|
+
* on resume and therefore are NOT in this filter (Phase 4 slice α restored
|
|
295
|
+
* the previously-silent drop of those two).
|
|
285
296
|
*/
|
|
286
297
|
export const CODEX_RESUME_FILTERED_FLAGS = new Set([
|
|
287
298
|
"--full-auto",
|
|
@@ -289,7 +300,6 @@ export const CODEX_RESUME_FILTERED_FLAGS = new Set([
|
|
|
289
300
|
"--ask-for-approval",
|
|
290
301
|
"--add-dir",
|
|
291
302
|
"-C",
|
|
292
|
-
"--output-schema",
|
|
293
303
|
"--search",
|
|
294
304
|
]);
|
|
295
305
|
/**
|
|
@@ -301,7 +311,6 @@ const CODEX_RESUME_FILTERED_FLAGS_WITH_VALUE = new Set([
|
|
|
301
311
|
"--ask-for-approval",
|
|
302
312
|
"--add-dir",
|
|
303
313
|
"-C",
|
|
304
|
-
"--output-schema",
|
|
305
314
|
]);
|
|
306
315
|
/**
|
|
307
316
|
* Strip resume-incompatible flag/value pairs from a Codex argv segment.
|
|
@@ -133,14 +133,11 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
133
133
|
"ignoreRules",
|
|
134
134
|
],
|
|
135
135
|
resumeOnlyFlags: ["--last"],
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
"--search",
|
|
142
|
-
"-c",
|
|
143
|
-
],
|
|
136
|
+
// Phase 4 slice α (v1.8.0) verified that `codex exec resume` accepts
|
|
137
|
+
// `--output-schema` and `-c` (codex-cli 0.133.0 `exec resume --help`),
|
|
138
|
+
// so they're no longer forbidden. `--search` stays forbidden (resume
|
|
139
|
+
// inherits the original session's web-search state).
|
|
140
|
+
resumeForbiddenFlags: ["--sandbox", "--ask-for-approval", "--full-auto", "--search"],
|
|
144
141
|
flags: {
|
|
145
142
|
"--last": { arity: "none", description: "Resume latest session" },
|
|
146
143
|
"--model": { arity: "one", description: "Model selector" },
|
|
@@ -189,9 +186,24 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
189
186
|
expect: "fail",
|
|
190
187
|
},
|
|
191
188
|
{
|
|
189
|
+
// Phase 4 slice α: --output-schema IS accepted on resume per
|
|
190
|
+
// codex-cli 0.133.0; this fixture pins the new behaviour so future
|
|
191
|
+
// contract changes can't silently regress.
|
|
192
192
|
id: "codex-resume-output-schema",
|
|
193
|
-
description: "
|
|
193
|
+
description: "Phase 4 slice α: --output-schema accepted on resume (codex-cli 0.133.0)",
|
|
194
194
|
args: ["exec", "resume", "--output-schema", "/tmp/schema.json", "session-id", "hello"],
|
|
195
|
+
expect: "pass",
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
id: "codex-resume-config-override",
|
|
199
|
+
description: "Phase 4 slice α: -c key=value accepted on resume",
|
|
200
|
+
args: ["exec", "resume", "-c", "model.foo=bar", "session-id", "hello"],
|
|
201
|
+
expect: "pass",
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
id: "codex-resume-search-still-forbidden",
|
|
205
|
+
description: "Phase 4 slice α: --search remains forbidden on resume",
|
|
206
|
+
args: ["exec", "resume", "--search", "session-id", "hello"],
|
|
195
207
|
expect: "fail",
|
|
196
208
|
},
|
|
197
209
|
],
|
|
@@ -219,6 +231,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
219
231
|
"policyFiles",
|
|
220
232
|
"adminPolicyFiles",
|
|
221
233
|
"attachments",
|
|
234
|
+
// Phase 4 slice γ
|
|
235
|
+
"skipTrust",
|
|
222
236
|
],
|
|
223
237
|
flags: {
|
|
224
238
|
"-p": { arity: "one", description: "Prompt text" },
|
|
@@ -236,6 +250,10 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
236
250
|
"--admin-policy": { arity: "one", description: "Admin policy file path" },
|
|
237
251
|
"-o": { arity: "one", values: ["json"], description: "Output format" },
|
|
238
252
|
"--resume": { arity: "one", description: "Resume session" },
|
|
253
|
+
"--skip-trust": {
|
|
254
|
+
arity: "none",
|
|
255
|
+
description: "Trust workspace for this session (Phase 4 slice γ)",
|
|
256
|
+
},
|
|
239
257
|
},
|
|
240
258
|
env: {},
|
|
241
259
|
conformanceFixtures: [
|
|
@@ -251,6 +269,12 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
251
269
|
args: ["-p", "hello", "--not-a-gemini-flag"],
|
|
252
270
|
expect: "fail",
|
|
253
271
|
},
|
|
272
|
+
{
|
|
273
|
+
id: "gemini-skip-trust",
|
|
274
|
+
description: "Phase 4 slice γ: --skip-trust is accepted",
|
|
275
|
+
args: ["-p", "hello", "--skip-trust"],
|
|
276
|
+
expect: "pass",
|
|
277
|
+
},
|
|
254
278
|
],
|
|
255
279
|
},
|
|
256
280
|
grok: {
|
|
@@ -275,6 +299,8 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
275
299
|
"mcpServers",
|
|
276
300
|
"allowedTools",
|
|
277
301
|
"disallowedTools",
|
|
302
|
+
// Phase 4 slice δ
|
|
303
|
+
"maxTurns",
|
|
278
304
|
],
|
|
279
305
|
flags: {
|
|
280
306
|
"-p": { arity: "one", description: "Prompt text" },
|
|
@@ -299,6 +325,11 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
299
325
|
},
|
|
300
326
|
"--resume": { arity: "one", description: "Resume session" },
|
|
301
327
|
"--continue": { arity: "none", description: "Continue latest session" },
|
|
328
|
+
"--max-turns": {
|
|
329
|
+
arity: "one",
|
|
330
|
+
pattern: /^[1-9][0-9]*$/,
|
|
331
|
+
description: "Agent-loop iteration cap (Phase 4 slice δ)",
|
|
332
|
+
},
|
|
302
333
|
},
|
|
303
334
|
env: {},
|
|
304
335
|
conformanceFixtures: [
|
|
@@ -314,6 +345,18 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
314
345
|
args: ["-p", "hello", "--not-a-grok-flag"],
|
|
315
346
|
expect: "fail",
|
|
316
347
|
},
|
|
348
|
+
{
|
|
349
|
+
id: "grok-max-turns",
|
|
350
|
+
description: "Phase 4 slice δ: --max-turns N is accepted",
|
|
351
|
+
args: ["-p", "hello", "--max-turns", "5"],
|
|
352
|
+
expect: "pass",
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
id: "grok-max-turns-invalid-zero",
|
|
356
|
+
description: "Phase 4 slice δ: --max-turns 0 is rejected by contract pattern",
|
|
357
|
+
args: ["-p", "hello", "--max-turns", "0"],
|
|
358
|
+
expect: "fail",
|
|
359
|
+
},
|
|
317
360
|
],
|
|
318
361
|
},
|
|
319
362
|
mistral: {
|
|
@@ -337,6 +380,11 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
337
380
|
"mcpServers",
|
|
338
381
|
"allowedTools",
|
|
339
382
|
"disallowedTools",
|
|
383
|
+
// Phase 4 slice γ
|
|
384
|
+
"trust",
|
|
385
|
+
// Phase 4 slice δ
|
|
386
|
+
"maxTurns",
|
|
387
|
+
"maxPrice",
|
|
340
388
|
],
|
|
341
389
|
flags: {
|
|
342
390
|
"-p": { arity: "one", description: "Prompt text" },
|
|
@@ -355,6 +403,22 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
355
403
|
"--enabled-tools": { arity: "one", description: "Enabled tool" },
|
|
356
404
|
"--resume": { arity: "one", description: "Resume session" },
|
|
357
405
|
"--continue": { arity: "none", description: "Continue latest session" },
|
|
406
|
+
"--trust": {
|
|
407
|
+
arity: "none",
|
|
408
|
+
description: "Trust cwd for this invocation only (Phase 4 slice γ)",
|
|
409
|
+
},
|
|
410
|
+
"--max-turns": {
|
|
411
|
+
arity: "one",
|
|
412
|
+
pattern: /^[1-9][0-9]*$/,
|
|
413
|
+
description: "Agent-loop iteration cap (Phase 4 slice δ, programmatic mode only)",
|
|
414
|
+
},
|
|
415
|
+
"--max-price": {
|
|
416
|
+
arity: "one",
|
|
417
|
+
// Decimal-only: matches the MAX_PRICE_SCHEMA min(1e-6) lower bound
|
|
418
|
+
// that keeps String(N) in decimal form (no scientific notation).
|
|
419
|
+
pattern: /^(0|[1-9][0-9]*)(\.[0-9]+)?$/,
|
|
420
|
+
description: "Cumulative cost cap in USD (Phase 4 slice δ, programmatic mode only)",
|
|
421
|
+
},
|
|
358
422
|
},
|
|
359
423
|
env: {
|
|
360
424
|
VIBE_ACTIVE_MODEL: {
|
|
@@ -378,6 +442,27 @@ export const UPSTREAM_CLI_CONTRACTS = {
|
|
|
378
442
|
env: { CODEX_MODEL: "gpt-5.5" },
|
|
379
443
|
expect: "fail",
|
|
380
444
|
},
|
|
445
|
+
{
|
|
446
|
+
id: "mistral-trust",
|
|
447
|
+
description: "Phase 4 slice γ: --trust is accepted",
|
|
448
|
+
args: ["-p", "hello", "--agent", "auto-approve", "--trust"],
|
|
449
|
+
env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
|
|
450
|
+
expect: "pass",
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
id: "mistral-max-turns-and-price",
|
|
454
|
+
description: "Phase 4 slice δ: --max-turns + --max-price are accepted together",
|
|
455
|
+
args: ["-p", "hello", "--agent", "auto-approve", "--max-turns", "3", "--max-price", "0.01"],
|
|
456
|
+
env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
|
|
457
|
+
expect: "pass",
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
id: "mistral-max-price-scientific-notation",
|
|
461
|
+
description: "Phase 4 slice δ: scientific-notation --max-price is rejected by contract pattern (matches MAX_PRICE_SCHEMA bounds)",
|
|
462
|
+
args: ["-p", "hello", "--agent", "auto-approve", "--max-price", "1e-7"],
|
|
463
|
+
env: { VIBE_ACTIVE_MODEL: "mistral-medium-3.5" },
|
|
464
|
+
expect: "fail",
|
|
465
|
+
},
|
|
381
466
|
],
|
|
382
467
|
},
|
|
383
468
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-cli-gateway",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.9.0",
|
|
4
4
|
"mcpName": "io.github.verivus-oss/llm-cli-gateway",
|
|
5
5
|
"description": "MCP server providing unified access to Claude Code, Codex, Gemini, Grok, and Mistral Vibe CLIs with session management, retry logic, async job orchestration, durable job results, and cross-LLM validation.",
|
|
6
6
|
"license": "MIT",
|