llm-cli-gateway 1.17.9 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,146 @@ All notable changes to the llm-cli-gateway project.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## [2.1.0] - 2026-06-07: Grok Build 0.2.32, probe drift acknowledgement, docs currency
8
+
9
+ ### Added
10
+
11
+ - Grok Build 0.2.32 support: new `leaderSocket` parameter on `grok_request` /
12
+ `grok_request_async` maps to the new `--leader-socket <PATH>` flag (isolated
13
+ leader process for local/branch Grok builds; default `~/.grok/leader.sock`).
14
+ Contract declares the flag with arity-one validation plus conformance
15
+ fixtures. The release's other changes (plugin slash commands in all
16
+ conversations, ordered rapid prompt submissions, faster grep on large
17
+ repos) are CLI-internal and inherited automatically. Probe at 0.2.32:
18
+ missingFlags/warnings clean.
19
+
20
+ ### Fixed
21
+
22
+ - Upstream-contract probe drift after the 2026-06 provider CLI upgrades
23
+ (gemini 0.45.2, grok 0.2.22, vibe 2.14.0): `CliFlagContract.hiddenFromHelp`
24
+ marks real flags hidden from a binary's `--help` (Claude `--max-turns`), and
25
+ `CliContract.acknowledgedUpstreamFlags` acknowledges upstream-only flags the
26
+ gateway never emits (29 Claude, 18 Gemini). Both are probe-only — the argv
27
+ allowlist is unchanged — with stale-marker warnings in both directions and a
28
+ new `acknowledgedExtraFlags` probe field. New pure `computeFlagDrift` plus
29
+ 7 unit tests.
30
+ - MCP server version now reports the real package version (was hardcoded
31
+ `1.0.0`).
32
+
33
+ ### Documentation
34
+
35
+ - Cross-LLM documentation currency review (Codex + Gemini + Grok + Mistral):
36
+ README tool reference gains `codex_fork_session`, `llm_request_result`,
37
+ `llm_process_health`, `upstream_contracts`, and `list_available_models`;
38
+ `claude_request` parameter list completed (`outputFormat` default is
39
+ `stream-json`); Codex `fullAuto` documented as deprecated in favour of
40
+ `sandboxMode`; Gemini approval modes include `plan`; grok/mistral upgrade
41
+ strategies documented; stale test counts, provider lists, and
42
+ `BEST_PRACTICES.md` path pointers corrected across README, AGENTS.md,
43
+ .cursorrules, CLAUDE.md, docs/guides, docs/personal-mcp (Mistral/Vibe row
44
+ added to the provider support matrix), and docs/upstream.
45
+
46
+ ## [2.0.0] - 2026-06-04: node:sqlite migration — native module out of the prod graph
47
+
48
+ Major release. Persistence moves from the native `better-sqlite3` binding to
49
+ Node's built-in `node:sqlite` module behind a thin adapter. The entire
50
+ 1.17.6-1.17.8 supply-chain incident class — every one of which traced to
51
+ `better-sqlite3`'s install path (`prebuild-install → tar-fs → tar-stream`),
52
+ not its runtime — is now **structurally** gone: the production dependency
53
+ graph contains zero native modules, zero install scripts, and no
54
+ `prebuild-install`/`tar-fs`/`tar-stream` chain. Verified end to end against a
55
+ verdaccio registry reproduction (`scripts/verify-registry-install.sh`):
56
+ consumer tree reified at 94 packages (down from ~124 in 1.17.9), `npm ls`
57
+ exits 0, and no `better-sqlite3`/`tar-stream`/`prebuild-install` appears
58
+ anywhere in the consumer tree.
59
+
60
+ ### BREAKING
61
+
62
+ - **`engines.node` is now `>=24.4.0`** (was `>=20.0.0`). Node 20 is EOL
63
+ (April 2026). The 24.4 floor is required because `node:sqlite`'s
64
+ `allowBareNamedParameters` defaults to `true` only from Node 24.4 — the
65
+ persistence layer binds bare `{ id: ... }` objects to `@id` placeholders
66
+ throughout, and on 24.0-24.3 that would need a per-statement
67
+ `setAllowBareNamedParameters(true)` call. The adapter unit tests assert
68
+ bare-name binding works, so a regression in either direction is caught.
69
+
70
+ ### Added
71
+
72
+ - `src/sqlite-driver.ts`: thin adapter over `node:sqlite`'s `DatabaseSync`.
73
+ Exports `openDatabase`, `openReadOnly`, and a `GatewayDatabase` /
74
+ `GatewayStatement` surface (`exec`/`prepare`/`run`/`get`/`all`/
75
+ `withTransaction`/`close`). It is the ONLY production module that touches
76
+ `node:sqlite`; the release security audit hard-fails if any other
77
+ production module references it. Preserves the flight recorder's
78
+ graceful-degradation path (constructor failure → recorder disabled, gateway
79
+ still runs).
80
+ - Read-only `queryRequests` connection: `openReadOnly` opens the DB with
81
+ `{ readOnly: true }`, so write-disguised-as-read SQL fails at the SQLite
82
+ engine level (`SQLITE_READONLY`). This is **stronger** than the old
83
+ better-sqlite3 `stmt.readonly` JS-property check it replaces — enforcement
84
+ is at the engine, not in JavaScript — with one belt-and-braces guard: the
85
+ read-only connection also rejects `VACUUM`/`VACUUM INTO`, the one statement
86
+ that writes a new file to disk despite `{ readOnly: true }` (and that
87
+ `stmt.readonly` previously blocked). ATTACH-then-write and
88
+ `writable_schema` schema edits are already engine-rejected.
89
+ - Cross-engine WAL crash-recovery fixtures in both directions
90
+ (`src/__tests__/cross-engine-wal.test.ts`): a `better-sqlite3`-written DB
91
+ (SQLite 3.53.1) with live `-wal`/`-shm` from a simulated unclean stop is
92
+ opened and exercised under `node:sqlite` (3.51.3), and the reverse for the
93
+ rollback direction. These gate the "zero data migration" claim across the
94
+ engine-version skew.
95
+
96
+ ### Changed
97
+
98
+ - `better-sqlite3` **moved from `dependencies` to `devDependencies`** (same
99
+ `^12.10.0` range; `@types/better-sqlite3` stays in devDependencies). It is
100
+ retained at dev time deliberately: two suites seed legacy-schema DB files
101
+ with it (`src/__tests__/flight-recorder.test.ts`,
102
+ `src/__tests__/test-veracity-regressions-slice-kappa.test.ts`) to simulate
103
+ databases written by pre-2.0.0 gateways — that realism is the point, and it
104
+ makes them standing old-engine-writer → node:sqlite-reader coverage on every
105
+ CI run — and the cross-engine WAL fixtures need a better-sqlite3 writer.
106
+ Consumers never see it: devDependencies do not install transitively, and the
107
+ prod-only shrinkwrap excludes the whole subtree.
108
+ - `flight-recorder.ts` / `job-store.ts` now open SQLite through the adapter
109
+ (`openDatabase`/`openReadOnly`/`withTransaction`) instead of
110
+ `require("better-sqlite3")`. SQL, schema, migrations, and pragmas are
111
+ unchanged.
112
+ - `package.json#overrides`: the `tar-stream` pin is **removed** (the chain
113
+ that needed it is gone from the prod graph). The `type-is` and `content-type`
114
+ pins stay — unrelated to this chain.
115
+ - `scripts/release-security-audit.sh`: the `consumerAdvisory` carve-out is
116
+ **deleted** — blocked `tar-stream` versions are now hard-fail tripwires
117
+ everywhere (the chain no longer exists in any prod tree). The packed-consumer
118
+ policy now hard-fails on ANY `tar-stream` in the consumer tree (was an
119
+ advisory warning). The repo-lockfile tripwire skips dev-only entries so the
120
+ deliberate devDependency `tar-stream@2.2.0` does not false-fail, while still
121
+ hard-failing any blocked version that re-enters the prod graph. The
122
+ better-sqlite3 PRAGMA scan is repointed at the adapter: it now also asserts
123
+ `node:sqlite` is referenced only by `src/sqlite-driver.ts`.
124
+ - `scripts/pre-release.sh`: the better-sqlite3 native-binding sanity guard is
125
+ removed (the test suite exercises the binding as a devDep and fails loudly if
126
+ broken); the `npm ls tar-stream` step is replaced by an absence assertion
127
+ against the generated prod-only shrinkwrap
128
+ (`better-sqlite3`/`prebuild-install`/`tar-fs`/`tar-stream` must be absent).
129
+ - `scripts/verify-registry-install.sh`: assertions updated for 2.0.0 —
130
+ `tar-stream`/`better-sqlite3`/`prebuild-install` must be ABSENT from the
131
+ consumer tree; consumer `npm ls` must exit 0 (the out-of-range pin that
132
+ caused ELSPROBLEMS is gone); a `node:sqlite` runtime smoke
133
+ (`new DatabaseSync(':memory:')`) confirms the engine; and the reified package
134
+ count is asserted at 94 ±2.
135
+ - README, `socket.yml`, and `docs/personal-mcp/RELEASE_READINESS.md` updated to
136
+ reflect the node:sqlite reality (no native binding, no install scripts,
137
+ Node >=24.4.0, adapter-isolation audit replacing the PRAGMA-helper note).
138
+
139
+ ### Rollback
140
+
141
+ Reverting the 2.0.0 commit re-adds `better-sqlite3` to `dependencies`, the
142
+ `tar-stream` override, and the audit advisory carve-out. DB files are
143
+ compatible in both directions — exactly what the cross-engine WAL fixtures
144
+ prove (the rollback claim inherits that gate; it is not asserted
145
+ independently).
146
+
7
147
  ## [1.17.9] - 2026-06-04: prod-only shrinkwrap + registry-fidelity verification
8
148
 
9
149
  Patch release shipping a prod-only `npm-shrinkwrap.json` and correcting the
package/README.md CHANGED
@@ -205,7 +205,7 @@ Opt-in flags (all default off) live under `[cache_awareness]` in `~/.llm-cli-gat
205
205
 
206
206
  ### Security & Quality
207
207
 
208
- - **Comprehensive Testing**: 900+ tests covering unit, integration, and regression scenarios with real CLI execution
208
+ - **Comprehensive Testing**: 1,000+ tests covering unit, integration, and regression scenarios with real CLI execution
209
209
  - **Input Validation**: Zod schemas prevent injection attacks
210
210
  - **No Secret Leakage**: Generic session descriptions only (file permissions 0o600)
211
211
  - **No ReDoS**: Bounded regex patterns prevent catastrophic backtracking
@@ -214,6 +214,8 @@ Opt-in flags (all default off) live under `[cache_awareness]` in `~/.llm-cli-gat
214
214
 
215
215
  ## Prerequisites
216
216
 
217
+ **Node.js >= 24.4.0** is required (`engines.node` in `package.json`). The gateway uses Node's built-in `node:sqlite` module for persistence — there is no native binding to compile and no install scripts run. The 24.4 floor is where `allowBareNamedParameters` defaults to `true`, which the persistence layer relies on.
218
+
217
219
  Before using this gateway, you need to install the CLI tools you want to use:
218
220
 
219
221
  ### Claude Code CLI
@@ -342,6 +344,7 @@ The personal-appliance surface exposes simplified validation tools for non-devel
342
344
  - `consensus_check`: check whether providers agree with a claim.
343
345
  - `ask_model`: ask one provider through the simplified surface.
344
346
  - `synthesize_validation`: run an explicit judge model after provider results have been collected.
347
+ - `list_available_models`: list the models each provider CLI exposes through the simplified surface.
345
348
  - `job_status` and `job_result`: poll and collect validation job outputs.
346
349
 
347
350
  The validation report preserves per-provider disagreement. Optional judge synthesis is explicit about which provider produced the judge job.
@@ -354,15 +357,29 @@ Execute a Claude Code request with optional session management.
354
357
 
355
358
  **Parameters:**
356
359
 
357
- - `prompt` (string, required): The prompt to send (1-100,000 chars)
360
+ - `prompt` (string, optional*): The prompt to send (1-100,000 chars). *Exactly one of `prompt` or `promptParts` is required (mutually exclusive)
358
361
  - `model` (string, optional): Model name or alias (use `list_models` for available values; supports `latest`)
359
- - `outputFormat` (string, optional): Output format ("text" or "json"), default: "text"
362
+ - `outputFormat` (string, optional): Output format (`text|json|stream-json`), default: `stream-json` — the gateway parses NDJSON usage events for token/cost observability; override to `text` only when you want unparsed stdout
360
363
  - `sessionId` (string, optional): Specific session ID to use
361
364
  - `continueSession` (boolean, optional): Continue the active session
362
365
  - `createNewSession` (boolean, optional): Always create a new session
366
+ - `forkSession` (boolean, optional): Fork the resumed session instead of appending to it
363
367
  - `allowedTools` (string[], optional): Restrict Claude tools to this allow-list
364
368
  - `disallowedTools` (string[], optional): Explicitly deny listed Claude tools
365
- - `dangerouslySkipPermissions` (boolean, optional): Request CLI-side permission bypass (legacy mode only)
369
+ - `permissionMode` (string, optional): Claude permission mode (`default|acceptEdits|plan|auto|dontAsk|bypassPermissions`); preferred over `dangerouslySkipPermissions`
370
+ - `dangerouslySkipPermissions` (boolean, optional): Deprecated — maps to `permissionMode: "bypassPermissions"`; `permissionMode` wins when both are set
371
+ - `agent` (string, optional): Named sub-agent to run as
372
+ - `agents` (string, optional): Inline agent definitions JSON
373
+ - `systemPrompt` / `appendSystemPrompt` (string, optional): Replace or extend the system prompt
374
+ - `maxBudgetUsd` (number, optional): Budget cap in USD for the request
375
+ - `maxTurns` (integer, optional): Agent-loop turn cap
376
+ - `effort` (string, optional): Reasoning effort (`low|medium|high|xhigh|max`)
377
+ - `fallbackModel` (string, optional): Auto-fallback model when the default is overloaded
378
+ - `jsonSchema` (string, optional): JSON Schema literal constraining structured output
379
+ - `addDir` (string[], optional): Additional workspace directories
380
+ - `noSessionPersistence` (boolean, optional): Ephemeral session (not persisted to disk)
381
+ - `settingSources` / `settings` / `tools` (optional): Setting sources to load, settings JSON path/literal, built-in tool restriction
382
+ - `excludeDynamicSystemPromptSections` (boolean, optional): Trim dynamic system prompt sections
366
383
  - `approvalStrategy` (string, optional): `"legacy"` (default) or `"mcp_managed"`
367
384
  - `approvalPolicy` (string, optional): `"strict"`, `"balanced"`, or `"permissive"`
368
385
  - `mcpServers` (string[], optional): Claude MCP servers to expose (default: `["sqry","exa","ref_tools"]`; `"trstr"` available as opt-in)
@@ -370,6 +387,10 @@ Execute a Claude Code request with optional session management.
370
387
  - `optimizePrompt` (boolean, optional): Optimize prompt for token efficiency (44% reduction), default: false
371
388
  - `optimizeResponse` (boolean, optional): Optimize response for token efficiency (37% reduction), default: false
372
389
  - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
390
+ - `idleTimeoutMs` (integer, optional): Kill a stuck process after output inactivity; 30,000 to 3,600,000 ms
391
+ - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
392
+ - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
393
+ - `forceRefresh` (boolean, optional): Bypass dedup and force a fresh CLI run, default: false
373
394
 
374
395
  **Response extras:**
375
396
 
@@ -394,19 +415,33 @@ Execute a Codex request with optional session tracking.
394
415
 
395
416
  **Parameters:**
396
417
 
397
- - `prompt` (string, required): The prompt to send (1-100,000 chars)
398
- - `model` (string, optional): Model name or alias (use `list_models` for available values; supports `latest`, recommended: `gpt-5.4`)
399
- - `fullAuto` (boolean, optional): Enable full-auto mode, default: false
418
+ - `prompt` (string, optional*): The prompt to send (1-100,000 chars). *Exactly one of `prompt` or `promptParts` is required (mutually exclusive)
419
+ - `model` (string, optional): Model name or alias (use `list_models` for available values; supports `latest`, recommended: `gpt-5.5`)
420
+ - `fullAuto` (boolean, optional): Deprecated — expands to `--sandbox workspace-write` only (current Codex no longer accepts approval-policy flags); prefer `sandboxMode`
421
+ - `sandboxMode` (string, optional): Codex sandbox (`read-only|workspace-write|danger-full-access`)
400
422
  - `dangerouslyBypassApprovalsAndSandbox` (boolean, optional): Request Codex bypass flags
401
423
  - `approvalStrategy` (string, optional): `"legacy"` (default) or `"mcp_managed"`
402
424
  - `approvalPolicy` (string, optional): `"strict"`, `"balanced"`, or `"permissive"`
403
425
  - `mcpServers` (string[], optional): MCP servers expected for Codex execution context
404
426
  - `sessionId` (string, optional): Session identifier for tracking
427
+ - `resumeLatest` (boolean, optional): Resume the most recent Codex session in the current cwd (`codex exec resume --last`); ignored if `sessionId` is set
405
428
  - `createNewSession` (boolean, optional): Always create a new session
429
+ - `forceRefresh` (boolean, optional): Bypass dedup and force a fresh CLI run, default: false
430
+ - `outputFormat` (string, optional): `text` (default) or `json` (`--json` JSONL events for token usage extraction)
431
+ - `outputSchema` (string|object, optional): Codex `--output-schema` — path or inline JSON Schema
432
+ - `workingDir` (string, optional): Working root for this session (`-C`/`--cd`; new sessions only)
433
+ - `addDir` (string[], optional): Additional writable workspace directories (one `--add-dir` per entry; new sessions only)
434
+ - `ephemeral` (boolean, optional): Codex `--ephemeral` (no session persistence)
435
+ - `images` (string[], optional): Image attachments (one `-i <path>` per entry)
436
+ - `profile` (string, optional): Codex `--profile <name>` (new sessions only; ignored with a logged warning on resume)
437
+ - `configOverrides` (object, optional): Codex `-c key=value` overrides
438
+ - `ignoreRules` / `ignoreUserConfig` (boolean, optional): Codex `--ignore-rules` / `--ignore-user-config`
439
+ - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
440
+ - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
406
441
  - `optimizePrompt` (boolean, optional): Optimize prompt for token efficiency, default: false
407
442
  - `optimizeResponse` (boolean, optional): Optimize response for token efficiency, default: false
408
443
  - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
409
- - `idleTimeoutMs` (number, optional): Kill a stuck Codex process after output inactivity; 30,000 to 3,600,000 ms
444
+ - `idleTimeoutMs` (integer, optional): Kill a stuck Codex process after output inactivity; 30,000 to 3,600,000 ms
410
445
 
411
446
  **Response extras:**
412
447
 
@@ -418,32 +453,56 @@ Execute a Codex request with optional session tracking.
418
453
  ```json
419
454
  {
420
455
  "prompt": "Create a REST API endpoint",
421
- "model": "gpt-5.4",
422
- "fullAuto": true,
456
+ "model": "gpt-5.5",
457
+ "sandboxMode": "workspace-write",
423
458
  "optimizePrompt": true
424
459
  }
425
460
  ```
426
461
 
462
+ ##### `codex_fork_session`
463
+
464
+ Fork an existing Codex session into a new branch (`codex fork <SESSION_ID|--last> <prompt>`), preserving the original session's history while the fork diverges.
465
+
466
+ **Parameters:**
467
+
468
+ - `prompt` (string, required): Prompt text for the forked session (1-100,000 chars)
469
+ - `sessionId` (string, optional): Codex session UUID to fork from (mutually exclusive with `forkLast`)
470
+ - `forkLast` (boolean, optional): Fork the most recent Codex session instead of naming one
471
+ - `model` (string, optional): Model name or alias (e.g. `gpt-5.5`, `latest`)
472
+ - `sandboxMode` (string, optional): Codex sandbox (`read-only|workspace-write|danger-full-access`)
473
+ - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
474
+ - `idleTimeoutMs` (number, optional): Idle timeout in ms (30s-1h, omit for CLI default)
475
+
427
476
  ##### `gemini_request`
428
477
 
429
478
  Execute a Gemini CLI request with session support.
430
479
 
431
480
  **Parameters:**
432
481
 
433
- - `prompt` (string, required): The prompt to send (1-100,000 chars)
482
+ - `prompt` (string, optional*): The prompt to send (1-100,000 chars). *Exactly one of `prompt` or `promptParts` is required (mutually exclusive)
434
483
  - `model` (string, optional): Model name or alias (use `list_models` for available values; supports `latest`, `pro`, `flash`)
435
484
  - `sessionId` (string, optional): Session ID to resume
436
485
  - `resumeLatest` (boolean, optional): Resume the latest session automatically
437
486
  - `createNewSession` (boolean, optional): Always create a new session
438
- - `approvalMode` (string, optional): Gemini approval mode (`default|auto_edit|yolo`) in legacy mode
487
+ - `approvalMode` (string, optional): Gemini approval mode (`default|auto_edit|yolo|plan`) in legacy mode
439
488
  - `approvalStrategy` (string, optional): `"legacy"` (default) or `"mcp_managed"`
440
489
  - `approvalPolicy` (string, optional): `"strict"`, `"balanced"`, or `"permissive"`
441
490
  - `mcpServers` (string[], optional): Allowed Gemini MCP server names
442
491
  - `allowedTools` (string[], optional): Restrict Gemini tools to this allow-list
443
492
  - `includeDirs` (string[], optional): Additional workspace directories for Gemini
493
+ - `outputFormat` (string, optional): `text` (default), `json` (`-o json`), or `stream-json` (`-o stream-json`, NDJSON with usage extraction)
494
+ - `sandbox` (boolean, optional): Run Gemini in sandbox mode (`-s`)
495
+ - `policyFiles` / `adminPolicyFiles` (string[], optional): Policy / admin-policy file paths (one `--policy`/`--admin-policy` per file; paths must exist)
496
+ - `attachments` (string[], optional): Absolute file paths prepended as `@<path>` tokens to the prompt
497
+ - `skipTrust` (boolean, optional): Emit `--skip-trust` to trust the workspace for this session (required for headless runs in fresh workspaces)
498
+ - `yolo` (boolean, optional): Auto-approve all; equivalent to `approvalMode: "yolo"`. Emits `--yolo` only when `--approval-mode yolo` is not already being emitted (never both)
499
+ - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
500
+ - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
444
501
  - `optimizePrompt` (boolean, optional): Optimize prompt for token efficiency, default: false
445
502
  - `optimizeResponse` (boolean, optional): Optimize response for token efficiency, default: false
446
503
  - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
504
+ - `idleTimeoutMs` (integer, optional): Kill a stuck process after output inactivity; 30,000 to 3,600,000 ms
505
+ - `forceRefresh` (boolean, optional): Bypass dedup and force a fresh CLI run, default: false
447
506
 
448
507
  **Response extras:**
449
508
 
@@ -467,7 +526,7 @@ Execute a Grok CLI (xAI) request with session support.
467
526
 
468
527
  **Parameters:**
469
528
 
470
- - `prompt` (string, required): The prompt to send (1-100,000 chars)
529
+ - `prompt` (string, optional*): The prompt to send (1-100,000 chars). *Exactly one of `prompt` or `promptParts` is required (mutually exclusive)
471
530
  - `model` (string, optional): Model name or alias (e.g. `grok-build`, `latest`)
472
531
  - `outputFormat` (string, optional): `"plain"` (default), `"json"`, or `"streaming-json"`
473
532
  - `sessionId` (string, optional): Session ID to resume (`--resume <id>`)
@@ -482,9 +541,35 @@ Execute a Grok CLI (xAI) request with session support.
482
541
  - `mcpServers` (string[], optional): MCP server names tracked for approvals (Grok manages its own MCP config via `grok mcp`)
483
542
  - `allowedTools` (string[], optional): Allowed built-in tools (passed as `--tools` comma list)
484
543
  - `disallowedTools` (string[], optional): Disallowed built-in tools (passed as `--disallowed-tools` comma list)
544
+ - `maxTurns` (integer, optional): Agent-loop iteration cap (`--max-turns`)
545
+ - `workingDir` (string, optional): Working directory for this invocation (`--cwd`)
546
+ - `sandbox` (string, optional): Sandbox profile for filesystem/network access (`--sandbox`, freeform; also via `GROK_SANDBOX`)
547
+ - `rules` (string, optional): Extra rules appended to the system prompt (`--rules`; supports `@file` prefix)
548
+ - `systemPromptOverride` (string, optional): Replace the agent's system prompt entirely
549
+ - `allow` / `deny` (string[], optional): Permission allow/deny rules (one `--allow`/`--deny` per entry)
550
+ - `compactionMode` (string, optional): `summary` (default) `|transcript|segments`
551
+ - `compactionDetail` (string, optional): `none|minimal|balanced|verbose` (segments mode only)
552
+ - `agent` (string, optional): Agent name or definition file path
553
+ - `agents` (string|object, optional): Inline subagent definitions JSON
554
+ - `bestOfN` (integer, optional): Run the task N ways in parallel and pick the best (headless only)
555
+ - `check` (boolean, optional): Append a self-verification loop (headless only)
556
+ - `disableWebSearch` (boolean, optional): Disable web search and remote retrieval tools
557
+ - `todoGate` (boolean, optional): Enable runtime turn-end TodoGate (session-scoped)
558
+ - `verbatim` (boolean, optional): Send the prompt exactly as given (also skips gateway prompt optimisation)
559
+ - `promptFile` / `promptJson` / `single` (optional): Single-turn prompt from a file / JSON blocks / literal
560
+ - `experimentalMemory` / `noMemory` (boolean, optional): Enable/disable cross-session memory
561
+ - `noAltScreen` / `noPlan` / `noSubagents` (boolean, optional): Disable alt screen / plan mode / subagent spawning
562
+ - `oauth` (boolean, optional): Use OAuth during authentication
563
+ - `restoreCode` (boolean, optional): Check out the original session commit when resuming
564
+ - `leaderSocket` (string, optional): Custom leader socket path (`--leader-socket`, Grok 0.2.32+; default `~/.grok/leader.sock`) — targets an isolated leader process, e.g. a local/branch Grok build
565
+ - `nativeWorktree` (boolean|string, optional): Grok's own `--worktree` flag (`true` → bare, string → named); distinct from the gateway `worktree` option
566
+ - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
567
+ - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
485
568
  - `optimizePrompt` (boolean, optional): Optimize prompt for token efficiency, default: false
486
569
  - `optimizeResponse` (boolean, optional): Optimize response for token efficiency, default: false
487
570
  - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
571
+ - `idleTimeoutMs` (integer, optional): Kill a stuck process after output inactivity; 30,000 to 3,600,000 ms
572
+ - `forceRefresh` (boolean, optional): Bypass dedup and force a fresh CLI run, default: false
488
573
 
489
574
  **Example:**
490
575
 
@@ -738,6 +823,21 @@ Run a Mistral Vibe agentic coding request. Like `grok_request` in shape, but wit
738
823
  - `disallowedTools` (string[], optional): Accepted for parity with the other providers; ignored at the CLI boundary with a logged warning.
739
824
  - `outputFormat` (string, optional): Vibe 2.x values are `"text"`, `"json"`, or `"streaming"`; legacy aliases `"plain"` and `"stream-json"` are accepted and normalized before spawn.
740
825
  - `sessionId` / `resumeLatest` / `createNewSession`: standard session controls. Current Vibe defaults session logging to enabled; if an older config has `[session_logging] enabled = false`, `doctor --json` surfaces an actionable next-action.
826
+ - `trust` (boolean, optional): Emit `--trust` so Vibe trusts the cwd for this invocation only (not persisted; skips the interactive trust prompt)
827
+ - `maxTurns` (integer, optional): Agent-loop iteration cap (`--max-turns`, programmatic mode only)
828
+ - `maxPrice` (number, optional): Interrupt when cumulative cost crosses this USD cap (`--max-price`, programmatic mode only)
829
+ - `maxTokens` (integer, optional): Cap cumulative prompt + completion tokens (`--max-tokens`, programmatic mode only)
830
+ - `workingDir` (string, optional): Change to this directory before running (`--workdir`)
831
+ - `addDir` (string[], optional): Additional writable workspace directories (one `--add-dir` per entry)
832
+ - `approvalStrategy` (string, optional): `"legacy"` (default) or `"mcp_managed"`
833
+ - `approvalPolicy` (string, optional): `"strict"`, `"balanced"`, or `"permissive"`
834
+ - `mcpServers` (string[], optional): MCP server names tracked for approvals (Vibe manages its own MCP config via `vibe mcp`)
835
+ - `worktree` (boolean|object, optional): Run inside a gateway-owned git worktree (slice λ)
836
+ - `promptParts` (object, optional): Cache-aware structured prompt `{ system?, tools?, context?, task }`; mutually exclusive with `prompt`
837
+ - `optimizePrompt` / `optimizeResponse` (boolean, optional): Token-efficiency optimisation, default: false
838
+ - `correlationId` (string, optional): Request trace ID (auto-generated if omitted)
839
+ - `idleTimeoutMs` (integer, optional): Kill a stuck process after output inactivity; 30,000 to 3,600,000 ms
840
+ - `forceRefresh` (boolean, optional): Bypass dedup and force a fresh CLI run, default: false
741
841
 
742
842
  ##### `claude_request_async` / `codex_request_async` / `gemini_request_async` / `grok_request_async` / `mistral_request_async`
743
843
 
@@ -776,10 +876,33 @@ List recent MCP-managed approval decisions recorded by the gateway.
776
876
  **Parameters:**
777
877
 
778
878
  - `limit` (number, optional): Max records (1-500), default: 50
779
- - `cli` (string, optional): Filter by `"claude"`, `"codex"`, or `"gemini"`
879
+ - `cli` (string, optional): Filter by `"claude"`, `"codex"`, `"gemini"`, `"grok"`, or `"mistral"`
780
880
 
781
881
  Approval records are persisted to `~/.llm-cli-gateway/approvals.jsonl`.
782
882
 
883
+ ##### `llm_request_result`
884
+
885
+ Read back any persisted request — sync or async — by its correlation ID. Every response echoes its ID in `structuredContent.correlationId`; pass it here to recover the persisted prompt/response after the inline result is gone. Reads the flight recorder, so it works independently of async-job persistence (returns "not found" when flight recording is disabled).
886
+
887
+ **Parameters:**
888
+
889
+ - `correlationId` (string, required): Correlation ID from a prior request
890
+ - `maxChars` (number, optional): Max chars of the persisted response to return (1,000-2,000,000)
891
+ - `includePrompt` (boolean, optional): Include the full persisted prompt text, default: false
892
+
893
+ ##### `llm_process_health`
894
+
895
+ Report gateway process health: async-job manager state plus the resolved persistence block (`backend`, `dbPath`, config sources). Use it to confirm which config file and SQLite paths the gateway is actually running under.
896
+
897
+ ##### `upstream_contracts`
898
+
899
+ Return the gateway's declared provider CLI contracts, optionally probing the installed binaries for drift.
900
+
901
+ **Parameters:**
902
+
903
+ - `cli` (string, optional): Filter (`claude|codex|gemini|grok|mistral`)
904
+ - `probeInstalled` (boolean, optional, default `false`): Run local `--help` probes and compare advertised flags against the declared contract — strongly recommended after any provider CLI upgrade. The probe reports `missingFlags`, `extraFlags`, `acknowledgedExtraFlags` (known upstream-only flags filtered from `extraFlags`), `discoveredFlags`, and stale-marker `warnings`.
905
+
783
906
  #### Session Management Tools
784
907
 
785
908
  ##### `session_create`
@@ -922,6 +1045,9 @@ Plan or run an upgrade for one CLI.
922
1045
  - Codex latest: `codex update`
923
1046
  - Codex explicit target: `npm install -g @openai/codex@<target>`
924
1047
  - Gemini: `npm install -g @google/gemini-cli@<target>`
1048
+ - Grok latest: `grok update`
1049
+ - Grok explicit target: `grok update --version <target>`
1050
+ - Mistral (Vibe): dispatches to the detected installer (`pip`/`uv`/`brew`); errors with guidance when none is detected (Vibe ships no self-update command)
925
1051
 
926
1052
  **Example dry run:**
927
1053
 
@@ -1180,8 +1306,8 @@ If you're vetting `llm-cli-gateway` through [Socket](https://socket.dev/npm/pack
1180
1306
  | **Network access** | `src/http-transport.ts` opens an HTTP MCP transport when started via `npm run start:http`. `src/endpoint-exposure.ts` issues a HEAD probe to verify configured public/tunnel URLs. Socket also flagged `dist/upstream-contracts.js` in v1.17.2 from descriptive text, not a network call. | The transport binds to `127.0.0.1` by default and requires `LLM_GATEWAY_AUTH_TOKEN` to be set. The default stdio MCP entry point (`npm start`) opens no sockets. `src/upstream-contracts.ts` stores provider CLI metadata and imports no HTTP client APIs. |
1181
1307
  | **Shell access** | `src/executor.ts` uses `child_process.spawn(cmd, args, …)` to invoke the underlying LLM CLIs. | `spawn` is called with an argument array and **never** `shell: true`, so there is no shell interpolation path for caller input. The command name is restricted to an allow-list of known CLI binaries (`claude`, `codex`, `gemini`, `grok`, `vibe`). |
1182
1308
  | **Uses eval** | None in our source. Transitive: `@modelcontextprotocol/sdk` → `ajv@8` uses `new Function(...)` in `ajv/dist/compile/index.js` to compile JSON Schema validators. | This is ajv's standard codegen path. Only known schemas (defined in our source and the MCP SDK) flow into it; no caller-supplied data ever reaches the compiled function body. |
1183
- | **better-sqlite3 PRAGMA helper** | Transitive: `better-sqlite3/lib/methods/pragma.js` interpolates its caller-provided `source` into a `PRAGMA ${source}` statement. | We do not call `db.pragma()` from production source. Internal SQLite setup uses fixed literal `db.exec("PRAGMA ...")` statements, and `npm run security:audit` fails the release if production code reintroduces `.pragma()` calls. |
1184
- | **Dependency ownership** | A handful of small transitive packages (e.g. `bindings` via `better-sqlite3`, `media-typer` via `@modelcontextprotocol/sdk`) trip Socket's "unstable ownership" or "obfuscated code" heuristics. | These are pinned, well-known micro-deps in the Node ecosystem with no known issues. We pin direct override versions of `content-type` and `type-is` in `package.json#overrides`. Our previous direct dependency on `toml@3.0.0` (also single-maintainer, last released 2020) was replaced with the actively-maintained `smol-toml` to reduce inherited risk. |
1309
+ | **SQLite adapter isolation** | Persistence uses Node's built-in `node:sqlite` module (no native binding, no install scripts) through a single adapter, `src/sqlite-driver.ts`. | `node:sqlite` is touched by exactly one production module (the adapter); every other module talks to SQLite through its typed surface. We never call any `db.pragma()` helper (it does not exist on `node:sqlite`); SQLite setup uses fixed literal `db.exec("PRAGMA ...")` statements. `npm run security:audit` fails the release if production code references `node:sqlite` outside the adapter or reintroduces a `.pragma()` call. |
1310
+ | **Dependency ownership** | A handful of small transitive packages (e.g. `media-typer` via `@modelcontextprotocol/sdk`) trip Socket's "unstable ownership" or "obfuscated code" heuristics. | These are pinned, well-known micro-deps in the Node ecosystem with no known issues. We pin direct override versions of `content-type` and `type-is` in `package.json#overrides`. As of 2.0.0 the prod graph carries no native module (`better-sqlite3` moved to devDependencies; `node:sqlite` is built into Node), eliminating the entire `prebuild-install`/`tar-fs`/`tar-stream` install-time chain. Our earlier direct dependency on `toml@3.0.0` was replaced with `smol-toml`. |
1185
1311
 
1186
1312
  See [`socket.yml`](./socket.yml) for the same context in machine-readable form.
1187
1313
 
@@ -34,6 +34,9 @@ interface LoggerLike {
34
34
  export declare function resolveFlightRecorderDbPath(): string | null;
35
35
  export declare class FlightRecorder {
36
36
  private db;
37
+ private readOnlyDb;
38
+ private closed;
39
+ private readonly dbPath;
37
40
  private insertStartTxn;
38
41
  private updateCompleteTxn;
39
42
  constructor(dbPath: string);
@@ -1,10 +1,10 @@
1
- import { chmodSync, existsSync, mkdirSync } from "fs";
1
+ import { chmodSync } from "fs";
2
2
  import os from "os";
3
3
  import path from "path";
4
- import { createRequire } from "module";
4
+ import { openDatabase, openReadOnly } from "./sqlite-driver.js";
5
5
  const MAX_THINKING_BYTES = 1_000_000;
6
6
  function ensureRequestsCacheColumns(db) {
7
- const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
7
+ const rows = db.prepare("PRAGMA table_info(requests)").all();
8
8
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
9
9
  if (!names.has("cache_read_tokens")) {
10
10
  db.exec("ALTER TABLE requests ADD COLUMN cache_read_tokens INTEGER");
@@ -14,7 +14,7 @@ function ensureRequestsCacheColumns(db) {
14
14
  }
15
15
  }
16
16
  function ensureStablePrefixColumns(db) {
17
- const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
17
+ const rows = db.prepare("PRAGMA table_info(requests)").all();
18
18
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
19
19
  if (!names.has("stable_prefix_hash")) {
20
20
  db.exec("ALTER TABLE requests ADD COLUMN stable_prefix_hash TEXT");
@@ -25,7 +25,7 @@ function ensureStablePrefixColumns(db) {
25
25
  db.exec("CREATE INDEX IF NOT EXISTS idx_requests_stable_hash ON requests(stable_prefix_hash)");
26
26
  }
27
27
  function ensureCacheControlBlocksColumn(db) {
28
- const rows = db.prepare("PRAGMA table_info(requests)").all?.() ?? [];
28
+ const rows = db.prepare("PRAGMA table_info(requests)").all();
29
29
  const names = new Set(rows.map((row) => (row && typeof row.name === "string" ? row.name : "")));
30
30
  if (!names.has("cache_control_blocks")) {
31
31
  db.exec("ALTER TABLE requests ADD COLUMN cache_control_blocks INTEGER");
@@ -77,16 +77,14 @@ function truncateThinkingBlocks(blocks) {
77
77
  }
78
78
  export class FlightRecorder {
79
79
  db;
80
+ readOnlyDb = null;
81
+ closed = false;
82
+ dbPath;
80
83
  insertStartTxn;
81
84
  updateCompleteTxn;
82
85
  constructor(dbPath) {
83
- const require = createRequire(import.meta.url);
84
- const BetterSqlite3 = require("better-sqlite3");
85
- const directory = path.dirname(dbPath);
86
- if (!existsSync(directory)) {
87
- mkdirSync(directory, { recursive: true });
88
- }
89
- this.db = new BetterSqlite3(dbPath);
86
+ this.dbPath = dbPath;
87
+ this.db = openDatabase(dbPath);
90
88
  this.db.exec("PRAGMA journal_mode = WAL");
91
89
  this.db.exec("PRAGMA foreign_keys = ON");
92
90
  this.db.exec(`
@@ -165,7 +163,7 @@ export class FlightRecorder {
165
163
  INSERT INTO gateway_metadata (request_id, async_job_id, status)
166
164
  VALUES (@request_id, @async_job_id, 'started')
167
165
  `);
168
- this.insertStartTxn = this.db.transaction((entry) => {
166
+ this.insertStartTxn = this.db.withTransaction((entry) => {
169
167
  insertRequest.run({
170
168
  id: entry.correlationId,
171
169
  cli: entry.cli,
@@ -206,7 +204,7 @@ export class FlightRecorder {
206
204
  status = @status
207
205
  WHERE request_id = @id AND status = 'started'
208
206
  `);
209
- this.updateCompleteTxn = this.db.transaction((correlationId, result) => {
207
+ this.updateCompleteTxn = this.db.withTransaction((correlationId, result) => {
210
208
  const thinkingBlocks = result.thinkingBlocks && result.thinkingBlocks.length > 0
211
209
  ? JSON.stringify(truncateThinkingBlocks(result.thinkingBlocks))
212
210
  : null;
@@ -240,18 +238,22 @@ export class FlightRecorder {
240
238
  this.updateCompleteTxn(correlationId, result);
241
239
  }
242
240
  queryRequests(sql, ...params) {
243
- const stmt = this.db.prepare(sql);
244
- if (stmt.readonly === false) {
245
- throw new Error("FlightRecorder.queryRequests refuses non-readonly SQL — use a transaction or a separate write surface for INSERT/UPDATE/DELETE.");
241
+ if (this.closed) {
242
+ throw new Error("flight recorder is closed");
246
243
  }
247
- if (!stmt.all) {
248
- return [];
244
+ if (!this.readOnlyDb) {
245
+ this.readOnlyDb = openReadOnly(this.dbPath);
249
246
  }
250
- return stmt.all(...params);
247
+ return this.readOnlyDb.prepare(sql).all(...params);
251
248
  }
252
249
  flush() {
253
250
  }
254
251
  close() {
252
+ this.closed = true;
253
+ if (this.readOnlyDb) {
254
+ this.readOnlyDb.close();
255
+ this.readOnlyDb = null;
256
+ }
255
257
  this.db.close();
256
258
  }
257
259
  }
package/dist/index.d.ts CHANGED
@@ -251,6 +251,7 @@ export declare function prepareGrokRequest(params: {
251
251
  noSubagents?: boolean;
252
252
  oauth?: boolean;
253
253
  restoreCode?: boolean;
254
+ leaderSocket?: string;
254
255
  nativeWorktree?: boolean | string;
255
256
  }, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
256
257
  export declare function prepareMistralRequest(params: {
@@ -376,6 +377,7 @@ export interface GrokRequestParams {
376
377
  noSubagents?: boolean;
377
378
  oauth?: boolean;
378
379
  restoreCode?: boolean;
380
+ leaderSocket?: string;
379
381
  nativeWorktree?: boolean | string;
380
382
  worktree?: boolean | {
381
383
  name?: string;