llm-cli-gateway 1.4.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +67 -1
  2. package/README.md +111 -8
  3. package/dist/approval-manager.d.ts +1 -1
  4. package/dist/async-job-manager.d.ts +24 -2
  5. package/dist/async-job-manager.js +71 -7
  6. package/dist/auth.d.ts +15 -0
  7. package/dist/auth.js +46 -0
  8. package/dist/cli-updater.d.ts +19 -2
  9. package/dist/cli-updater.js +110 -7
  10. package/dist/codex-json-parser.d.ts +34 -0
  11. package/dist/codex-json-parser.js +105 -0
  12. package/dist/doctor.d.ts +110 -0
  13. package/dist/doctor.js +280 -0
  14. package/dist/endpoint-exposure.d.ts +22 -0
  15. package/dist/endpoint-exposure.js +231 -0
  16. package/dist/executor.d.ts +2 -0
  17. package/dist/executor.js +2 -2
  18. package/dist/flight-recorder.d.ts +3 -1
  19. package/dist/flight-recorder.js +31 -2
  20. package/dist/gateway-server.d.ts +2 -0
  21. package/dist/gateway-server.js +1 -0
  22. package/dist/gemini-json-parser.d.ts +21 -0
  23. package/dist/gemini-json-parser.js +47 -0
  24. package/dist/health.d.ts +7 -0
  25. package/dist/health.js +22 -0
  26. package/dist/http-transport.d.ts +22 -0
  27. package/dist/http-transport.js +164 -0
  28. package/dist/index.d.ts +183 -2
  29. package/dist/index.js +2629 -1411
  30. package/dist/logger.d.ts +9 -0
  31. package/dist/logger.js +14 -0
  32. package/dist/model-registry.js +40 -6
  33. package/dist/provider-login-guidance.d.ts +21 -0
  34. package/dist/provider-login-guidance.js +98 -0
  35. package/dist/provider-status.d.ts +41 -0
  36. package/dist/provider-status.js +203 -0
  37. package/dist/request-helpers.d.ts +484 -4
  38. package/dist/request-helpers.js +613 -0
  39. package/dist/resources.js +44 -0
  40. package/dist/session-manager-pg.js +1 -0
  41. package/dist/session-manager.d.ts +1 -1
  42. package/dist/session-manager.js +2 -1
  43. package/dist/validation-normalizer.d.ts +23 -0
  44. package/dist/validation-normalizer.js +79 -0
  45. package/dist/validation-orchestrator.d.ts +47 -0
  46. package/dist/validation-orchestrator.js +145 -0
  47. package/dist/validation-prompts.d.ts +15 -0
  48. package/dist/validation-prompts.js +52 -0
  49. package/dist/validation-report.d.ts +57 -0
  50. package/dist/validation-report.js +129 -0
  51. package/dist/validation-tools.d.ts +7 -0
  52. package/dist/validation-tools.js +198 -0
  53. package/package.json +15 -5
  54. package/setup/status.schema.json +271 -0
package/CHANGELOG.md CHANGED
@@ -2,7 +2,73 @@
2
2
 
3
3
  All notable changes to the llm-cli-gateway project.
4
4
 
5
- ## Unreleased
5
+ ## [1.5.4] - 2026-05-19
6
+
7
+ ### Fixed
8
+
9
+ - Disable the default shared SQLite flight recorder during Vitest runs so parallel test workers do not race on `~/.llm-cli-gateway/logs.db` in GitHub Actions.
10
+ - Keep the npm publish job under the public mirror's hosted-runner limit by installing without lifecycle scripts/audit, building once, verifying package contents, and leaving the full suite to CI.
11
+
12
+ ## [1.5.3] - 2026-05-19
13
+
14
+ ### Fixed
15
+
16
+ - Align npm and PyPI release versions at 1.5.3.
17
+ - Publish npm from the build already verified by CI instead of re-running `prepublishOnly` inside `npm publish`, which was causing the release publish step to be cancelled.
18
+ - Add a PyPI tag/version guard so future release jobs fail before upload when `integrations/llm-plugin/pyproject.toml` does not match the release tag.
19
+
20
+ ## [1.5.2] - 2026-05-19
21
+
22
+ ### Fixed
23
+
24
+ - **CI publish workflows fixed.** Both v1.5.0 and v1.5.1 npm + PyPI publish workflows failed; this release unblocks them:
25
+ - **`src/__tests__/session-manager.test.ts:437` — "should update lastUsedAt but not createdAt" was a broken test.** It used `setTimeout(...)` without awaiting it: the inner assertions never ran, AND the timer fired after `afterEach` removed the tmpdir, causing `FileSessionManager.updateSessionUsage` → `saveStorage` → `writeFileSync` to throw an unhandled `ENOENT`. Local vitest happened to exit 0 anyway; CI vitest correctly exits 1 on unhandled errors, so `npm test` failed every publish job. The test now `await`s the timer and snapshots `originalLastUsed` as a string (the original code compared against `session.lastUsedAt`, which is a live reference into the storage map and mutates when `updateSessionUsage` runs).
26
+ - **`.github/workflows/publish.yml` (PyPI) missing `contents: read`.** Declaring `permissions: { id-token: write }` shrinks `GITHUB_TOKEN` to only that scope, so `actions/checkout@v4` couldn't authenticate to fetch the release tag and failed with `fatal: could not read Username for 'https://github.com': terminal prompts disabled`. Permission now explicitly includes `contents: read`.
27
+
28
+ No package-code changes vs 1.5.0 (functional surface) or 1.5.1 (installer workflow). This patch is the test + workflow correctness fix that lets the npm + PyPI artifacts actually publish.
29
+
30
+ ## [1.5.1] - 2026-05-19
31
+
32
+ ### Changed
33
+
34
+ - **Desktop installer artifacts now built and uploaded automatically on release.** New `.github/workflows/release-installer.yml` triggers on `release: published`, cross-compiles the Go bootstrapper for 5 OS/arch targets (`darwin/{arm64,amd64}`, `linux/{amd64,arm64}`, `windows/amd64`), packages the Node gateway bundle (`llm-cli-gateway-bundle-<ver>.tar.gz`), generates `SHA256SUMS` + `release-manifest.json` with the repo-relative `RVWR_RELEASE_PUBLIC_BASE`, verifies checksums, and uploads everything as release assets via `gh release upload --clobber`. `workflow_dispatch` is supported so a missed run can be rebuilt for an existing tag. No package-code changes vs 1.5.0; this is purely the build/distribution pipeline that lets users install the desktop integration without git/npm/docker.
35
+
36
+ ## [1.5.0] - 2026-05-19
37
+
38
+ Lands DAG layers 6-12 — the personal-MCP MVP terminal plus all of Phase 0-3 provider modernisation. Codex round-2 unconditional SHIP across U22-U27 (correlation `517700e1`). 523 tests passing (+184 from 1.4.0).
39
+
40
+ ### Added
41
+
42
+ - **U19 / U20 — Early LLM-assisted setup validation + automated MVP test harness.** New `doctor.ts`, `http-transport.ts`, `validation-orchestrator.ts`, `validation-report.ts`, `validation-normalizer.ts`, `validation-prompts.ts`, `validation-tools.ts`, `endpoint-exposure.ts`, `auth.ts`, `provider-status.ts`, `provider-login-guidance.ts`, and `gateway-server.ts`. Prompt-pack tightenings driven by real LLM dogfooding (Gemini chat-only + Codex command-capable). 35 new tests across the four matching `__tests__/` files.
43
+ - **U13 / U16 — Release packaging + dogfood readiness.** `installer/build-release.sh` cross-compiles 5 OS/arch targets (linux/{amd64,arm64}, darwin/{amd64,arm64}, windows/amd64) + Node bundle + `SHA256SUMS` + `release-manifest.json`. New `cli_upgrade --uninstall` (idempotent, dry-run by default) and `cli_upgrade --check`. New `Dockerfile.personal` + `docker-compose.personal.yml` for the personal-MCP container path. New `installer/packaging/README.md`. New `package.json` scripts `release:build`, `release:checksums`, `release:docker`. Comprehensive `docs/personal-mcp/{DOGFOODING_RESULTS,RELEASE_READINESS,SINGLE_BINARY_INSTALLER,ENDPOINT_EXPOSURE,PRODUCT_CONTRACT,PROVIDER_SUPPORT_MATRIX,VALIDATION_REPORT_FORMAT}.md` + per-provider `connect-*.md` guides + `setup/assistants/*-install-prompt.md` install-prompt corpus.
44
+ - **U21 — Phase-0 parity fixes.** `SESSION_PROVIDER_VALUES` / `SESSION_PROVIDER_ENUM` now expose the full provider set (grok was previously absent from `session_create`/`session_list`/`session_clear_all` Zod enums despite the storage layer supporting it). `prepareGeminiRequest` emits `["-p", prompt, ...]` instead of a positional prompt, eliminating the dependency on Gemini's TTY/mode-detection heuristics. 6 new tests pin both fixes.
45
+ - **U22 — Mistral Vibe is the fifth supported provider.** New `mistral_request` and `mistral_request_async` MCP tools register alongside the four incumbents and route through the same async job manager, dedup store, flight recorder, approval manager, and validation orchestrator. Five Vibe-specific divergences are documented in `docs/personal-mcp/PROVIDER_MODERNISATION_AUDIT.md`:
46
+ - **No `--model` flag** — model selection is via the `VIBE_ACTIVE_MODEL` environment variable (default alias: `devstral-medium`); the executor and async job manager forward an `env` override.
47
+ - **Session-logging is opt-in** in `~/.vibe/config.toml` — `doctor --json` probes `[session_logging] enabled = true` (read-only) and surfaces an actionable `next_actions` entry when the toggle is missing.
48
+ - **`--agent` enum** replaces Grok's `--always-approve` (`default | plan | accept-edits | auto-approve | chat | explore | lean`); the gateway always emits `--agent` explicitly and defaults to `auto-approve` for programmatic callers.
49
+ - **`--enabled-tools` allow-list only** — `allowedTools` emits one `--enabled-tools <tool>` per entry; `disallowedTools` is accepted in the schema for caller parity but silently ignored at the CLI boundary (a logged warning records the no-op).
50
+ - **No self-update** — `cli_upgrade --cli mistral` detects pip / uv / brew via probes and dispatches to `pip install -U vibe-cli`, `uv tool upgrade vibe-cli`, or `brew upgrade mistral-vibe`. Unknown installations return an actionable error rather than running a non-existent `vibe update`.
51
+
52
+ Other surfaces extended: `SESSION_PROVIDER_VALUES` now includes `"mistral"`; `list_models`, `cli_versions`, `cli_upgrade`, `approval_list`, `session_create`, `session_list`, and `session_clear_all` accept the fifth provider; new MCP resources `sessions://mistral` and `models://mistral` are registered; `validate_with_models` / `consensus_check` / `red_team_review` can route to Mistral.
53
+ - **U23 — JSON output + token/cost parity across providers.** New `src/codex-json-parser.ts` parses the Codex `--json` JSONL event stream (`thread.started`, `turn.started`/`completed`/`failed`, `item.*`, `error`); lenient against partial streams and garbage preamble. New `src/gemini-json-parser.ts` parses `gemini -o json` output and maps `usageMetadata.{promptTokenCount, candidatesTokenCount, cachedContentTokenCount}`. `extractUsageAndCost` is now a thin per-provider dispatcher returning `{inputTokens, outputTokens, cacheReadTokens?, cacheCreationTokens?, costUsd?}` for every provider that supports JSON; Claude `cache_read_input_tokens` / `cache_creation_input_tokens` are now plumbed through instead of being discarded. `codex_request`, `codex_request_async`, `gemini_request`, and `gemini_request_async` now expose `outputFormat: enum("text","json")` — set to `"json"` and the gateway emits `--json` (Codex) or `-o json` (Gemini) and forwards parsed usage/cost into the flight recorder. Flight-recorder schema gains `cache_read_tokens` and `cache_creation_tokens` columns via idempotent migration (`PRAGMA table_info` → `ALTER TABLE ADD COLUMN`); existing `logs.db` files are upgraded in place. 15 new tests.
54
+ - **U24 — Permission/approval-mode parity across providers.** Claude `permissionMode` enum (`default | acceptEdits | plan | auto | dontAsk | bypassPermissions`) replaces the boolean `dangerouslySkipPermissions` (the boolean still works and now maps to `permissionMode: "bypassPermissions"`; setting both logs a warning, `permissionMode` wins). Gemini `approvalMode` gains `plan`. Codex splits `--full-auto` into `sandboxMode: enum("read-only","workspace-write","danger-full-access")` and `askForApproval: enum("untrusted","on-request","never")`, emitting `--sandbox <mode>` and `--ask-for-approval <mode>` independently; legacy `fullAuto: true` still works and expands to `--sandbox workspace-write --ask-for-approval never` by default, with `useLegacyFullAutoFlag: true` as an explicit escape hatch to emit `--full-auto` directly. Codex resume mode filters all three flags (`--full-auto`, `--sandbox`, `--ask-for-approval`) since `codex exec resume` inherits the session's policy. 26 new tests.
55
+ - **U25 — Claude high-impact features.** `claude_request` / `claude_request_async` schemas gain `agent?: string` (single sub-agent dispatch), `agents?: Record<string, object>` (multi-agent JSON, validated against `CLAUDE_AGENT_DEFINITION_SCHEMA` before emit), `forkSession?: boolean`, `systemPrompt?: string`, `appendSystemPrompt?: string` (mutually exclusive at the schema + tool-callback boundary), `maxBudgetUsd?: number`, `maxTurns?: number`, `effort?: enum("low","medium","high","xhigh","max")`, and `excludeDynamicSystemPromptSections?: boolean`. Each emits the documented `--<flag>` form. 25 new tests in `src/__tests__/claude-handler.test.ts`.
56
+ - **U26 — Codex high-impact features.** `codex_request` / `codex_request_async` gain `outputSchema?: string | object` (object form is materialised to an `0o600` temp file under `os.tmpdir()` and cleaned via the AsyncJobManager `onComplete` contract — see post-review fixes below), `search?: boolean`, `profile?: string`, `configOverrides?: Record<string,string>` (keys validated against `/^[a-zA-Z0-9._]+$/`, values reject `\r`/`\n` via Zod refinement; emitted as repeated `-c key=value`), `ephemeral?: boolean`, `images?: string[]` (each path existence-validated; missing paths fail fast), `ignoreUserConfig?: boolean`, `ignoreRules?: boolean`. New top-level tool `codex_fork_session` wraps `codex fork <UUID> <prompt>` and `codex fork --last <prompt>` (sessionId XOR forkLast via Zod refinement). Codex default model alias is now `gpt-5.5` (the prior `gpt-5.3-codex` alias still resolves). Codex resume filter list extended with `--add-dir`, `-C`, `--output-schema`, and `--search`. 28 new tests across `codex-handler.test.ts` and `codex-fork.test.ts`.
57
+ - **U27 — Gemini high-impact features.** `gemini_request` / `gemini_request_async` gain `sandbox?: boolean` (emits `-s`), `policyFiles?: string[]` and `adminPolicyFiles?: string[]` (each path existence-validated; missing paths fail fast), and `attachments?: string[]` (absolute paths only, validated and prepended to the prompt as `@<abs-path>` tokens before the `-p` pair — U21 ordering invariant preserved). For fresh sessions (`createNewSession: true` or no sessionId), the gateway now emits `--session-id <uuid-v4>` instead of `--resume`, mapping the gateway session 1:1 to Gemini's authoritative store; `gw-*` prefixed IDs are rejected via strict UUID-v4 regex. `doctor --json` probes `./GEMINI.md`, `~/.gemini/GEMINI.md`, and `~/.gemini/settings.json` (parses `mcpServers` and reconciles against the gateway's `--allowed-mcp-server-names` whitelist; surfaces `next_actions` for missing registrations). `provider-status.ts` `geminiAuthStatus()` recognises four auth methods: OAuth file, `GEMINI_API_KEY`, `GOOGLE_API_KEY`, and `GOOGLE_CLOUD_PROJECT` + `GOOGLE_GENAI_USE_VERTEXAI=true`. 41 new tests across `gemini-handler.test.ts`, `provider-status.test.ts`, and the extended `doctor.test.ts`.
58
+
59
+ ### Fixed
60
+
61
+ Round-1 Codex review found 5 blockers across U22, U23, and U26; round-2 unconditional SHIP. Locked in by `src/__tests__/post-review-fixes.test.ts` (14 tests, no mocks).
62
+
63
+ - **U22 dedup key now reflects env vars.** `AsyncJobManager.buildRequestKey(cli, args, env)` hashes a `canonicaliseEnvForKey(env)` payload (sorted-keys JSON) via the existing `computeRequestKey(cli, args, extra)` API. Two Mistral requests with the same argv but different `VIBE_ACTIVE_MODEL` no longer collide on dedup. Empty/undefined env collapses to `""` so pre-U22 callers retain the same key shape and previously-stored entries remain hit-able.
64
+ - **U23 JSON parsers are now reachable.** The newly-added Codex JSONL parser and Gemini JSON parser were dead code because `codex_request` / `gemini_request` exposed no `outputFormat` parameter and the gateway never emitted `--json` / `-o json`. Both tool schemas (sync + async) now expose `outputFormat: enum("text","json")`. `prepareCodexRequest` emits `--json`; `prepareGeminiRequest` emits a contiguous `-o json` pair after the U21 `-p` prompt pair. The success paths for `codex_request` and `gemini_request` now run `extractUsageAndCost(cli, stdout, outputFormat)` and forward `inputTokens`, `outputTokens`, `cacheReadTokens`, `cacheCreationTokens`, and `costUsd` into the flight recorder.
65
+ - **U26 `outputSchema` temp-file lifecycle now correct on every exit path.** `AsyncJobRecord` gains `onComplete?: () => void` + `onCompleteFired?: boolean` guard. `fireOnComplete(job)` is wired into every site that calls `persistComplete(job)` (8 total: close handler, cancel, idle-timeout, output overflow, dead-process recovery, exited-flag mismatch, process-monitor expiry, persistence-recovery). The dedup path also fires the new request's `onComplete` immediately so a deduped request never leaves its own materialised temp file orphaned. `awaitJobOrDefer` now takes `onComplete` as a trailing arg and guarantees exactly-once consumption across direct-execution, deferred, and `startJobWithDedup`-throws branches. The sync `codex_request` finally no longer runs cleanup (would have deleted the temp file while the deferred CLI process was still reading it); the async `codex_request_async` no longer leaks the temp file on successful start.
66
+
67
+ ### Changed
68
+
69
+ - Codex default model alias is now `gpt-5.5` (legacy `gpt-5.3-codex` alias preserved).
70
+ - Default `model-registry` fallback chain order updated for new aliases.
71
+ - Skills (`.agents/skills/*` and `skills/*`) extended from four-provider to five-provider lists, with Mistral notes on auto-approve default and session-logging requirement.
6
72
 
7
73
  ## [1.4.0] - 2026-05-16
8
74
 
package/README.md CHANGED
@@ -5,6 +5,54 @@
5
5
 
6
6
  A Model Context Protocol (MCP) server providing unified access to Claude Code, Codex, Gemini, and Grok CLIs with session management, retry logic, and async job orchestration.
7
7
 
8
+ ## Personal MCP Appliance MVP
9
+
10
+ `llm-cli-gateway` is being packaged as a single-user personal MCP appliance for cross-LLM validation. The intended workflow is: connect one MCP endpoint, ask any client for cross-LLM validation.
11
+
12
+ The product contract is documented in [docs/personal-mcp/PRODUCT_CONTRACT.md](docs/personal-mcp/PRODUCT_CONTRACT.md). It defines the single-user scope, security posture, target support matrix, and provider-support verification gates. Public setup guides must not claim ChatGPT, Claude web, Claude Desktop, Codex, Gemini CLI, Gemini web, or Grok inbound support until the corresponding provider/client path has been verified.
13
+
14
+ This project does not provide hosted multi-tenant credential custody. Provider credentials stay on the user's machine or user-owned deployment volume.
15
+
16
+ MVP release readiness is tracked in [docs/personal-mcp/RELEASE_READINESS.md](docs/personal-mcp/RELEASE_READINESS.md). Dogfooding evidence (which target LLMs guided setup, what unsafe suggestions were captured, which findings are deferred to post-MVP work) is in [docs/personal-mcp/DOGFOODING_RESULTS.md](docs/personal-mcp/DOGFOODING_RESULTS.md).
17
+
18
+ Current personal-appliance artifacts include:
19
+
20
+ - Streamable HTTP startup: `LLM_GATEWAY_AUTH_TOKEN=<token> npm run start:http`
21
+ - Machine-readable diagnostics: `npm run doctor`
22
+ - Go bootstrapper scaffold: `installer/` with `setup`, `doctor --json`, `start`, `stop`, `status`, `repair`, `upgrade`, `uninstall`, `print-client-config`, and verified bundle download commands.
23
+ - Release packaging: `npm run release:build` produces cross-platform binaries plus a checksummed Node bundle under `installer/dist/`; see [installer/packaging/README.md](installer/packaging/README.md).
24
+ - Docker Compose fallback: [docker-compose.personal.yml](docker-compose.personal.yml) + [Dockerfile.personal](Dockerfile.personal) for users who already manage containers.
25
+ - Local setup UI artifact: [setup/ui/index.html](setup/ui/index.html)
26
+ - Provider setup snippets: [setup/providers/](setup/providers/)
27
+ - Cross-validation tools: `validate_with_models`, `second_opinion`, `compare_answers`, `red_team_review`, `consensus_check`, `ask_model`, `synthesize_validation`, `job_status`, and `job_result`.
28
+
29
+ ### Install / Upgrade / Uninstall (single binary)
30
+
31
+ ```bash
32
+ # After downloading the binary that matches your OS/arch from a release:
33
+ sha256sum --check SHA256SUMS # verify before run (or `shasum -a 256 --check` on macOS)
34
+ chmod +x llm-cli-gateway-<ver>-<os>-<arch>
35
+ ./llm-cli-gateway-<ver>-<os>-<arch> setup
36
+ ./llm-cli-gateway-<ver>-<os>-<arch> install-bundle # uses RVWR_GATEWAY_BUNDLE_URL/_SHA256
37
+ ./llm-cli-gateway-<ver>-<os>-<arch> start
38
+ ./llm-cli-gateway-<ver>-<os>-<arch> doctor
39
+
40
+ # Upgrade: replace the binary, set the new bundle env vars, run upgrade.
41
+ ./llm-cli-gateway-<new>-<os>-<arch> upgrade
42
+
43
+ # Uninstall: dry-run first, then run with --yes.
44
+ ./llm-cli-gateway-<ver>-<os>-<arch> uninstall
45
+ ./llm-cli-gateway-<ver>-<os>-<arch> uninstall --yes
46
+ ```
47
+
48
+ Docker fallback:
49
+
50
+ ```bash
51
+ LLM_GATEWAY_AUTH_TOKEN=$(openssl rand -hex 32) \
52
+ docker compose -f docker-compose.personal.yml up -d
53
+ docker compose -f docker-compose.personal.yml run --rm doctor
54
+ ```
55
+
8
56
  ## Features
9
57
 
10
58
  ### Core Capabilities
@@ -63,6 +111,36 @@ grok login # OAuth flow, or set GROK_CODE_XAI_API_KEY
63
111
  # Docs: https://docs.x.ai/build/cli
64
112
  ```
65
113
 
114
+ ### Mistral Vibe CLI
115
+ ```bash
116
+ # Pick one — the gateway's cli_upgrade auto-detects which one you used.
117
+ pip install vibe-cli
118
+ uv tool install vibe-cli
119
+ brew install mistral-vibe
120
+
121
+ vibe auth login
122
+ # Required for `mistral_request --resume` / `--continue` to persist sessions:
123
+ vibe config set session_logging.enabled true # or edit ~/.vibe/config.toml
124
+ ```
125
+
126
+ Vibe-specific notes:
127
+
128
+ - **Model selection is via the `VIBE_ACTIVE_MODEL` environment variable** —
129
+ Vibe has no `--model` flag. The gateway resolves the requested model alias
130
+ (default: `devstral-medium`) and injects it as `VIBE_ACTIVE_MODEL` when
131
+ spawning `vibe`.
132
+ - **`permissionMode` accepts** `default | plan | accept-edits | auto-approve |
133
+ chat | explore | lean` and emits `--agent <mode>`. The gateway's
134
+ programmatic-mode default is `auto-approve`; pick a stricter mode
135
+ explicitly if you need approval gates.
136
+ - **`allowedTools` is allow-list only** — the gateway emits one
137
+ `--enabled-tools <tool>` flag per entry. `disallowedTools` is accepted in
138
+ the schema for caller-side parity but is silently ignored at the CLI
139
+ boundary (a `logger.info` warning records the no-op).
140
+ - **No self-update**: `cli_upgrade --cli mistral` detects whether you used
141
+ pip / uv / brew and dispatches the matching upgrade command. Running
142
+ `vibe update` is not a thing.
143
+
66
144
  ## Installation
67
145
 
68
146
  ### As an MCP server (npm)
@@ -94,7 +172,7 @@ npm run build
94
172
 
95
173
  ### As an MCP Server
96
174
 
97
- Add to your MCP client configuration (e.g., Claude Desktop):
175
+ For clients that already support local stdio MCP servers, add a configuration like:
98
176
 
99
177
  ```json
100
178
  {
@@ -107,8 +185,24 @@ Add to your MCP client configuration (e.g., Claude Desktop):
107
185
  }
108
186
  ```
109
187
 
188
+ This generic stdio example is not provider-support verification for the Personal MCP Appliance MVP. Client-specific setup guides for ChatGPT, Claude web, Claude Desktop, Codex, Gemini CLI, Gemini web, and Grok remain gated by the provider-support matrix in [docs/personal-mcp/PRODUCT_CONTRACT.md](docs/personal-mcp/PRODUCT_CONTRACT.md).
189
+
110
190
  ### Available Tools
111
191
 
192
+ #### Cross-LLM Validation Tools
193
+
194
+ The personal-appliance surface exposes simplified validation tools for non-developer clients. These tools start provider CLI jobs through the durable async job manager and return normalized provider status plus raw job references.
195
+
196
+ - `validate_with_models`: ask two or more providers to independently validate a question.
197
+ - `second_opinion`: ask one provider to review an answer.
198
+ - `red_team_review`: challenge a plan, answer, or document for risks and failure modes.
199
+ - `consensus_check`: check whether providers agree with a claim.
200
+ - `ask_model`: ask one provider through the simplified surface.
201
+ - `synthesize_validation`: run an explicit judge model after provider results have been collected.
202
+ - `job_status` and `job_result`: poll and collect validation job outputs.
203
+
204
+ The validation report preserves per-provider disagreement. Optional judge synthesis is explicit about which provider produced the judge job.
205
+
112
206
  #### LLM Request Tools
113
207
 
114
208
  ##### `claude_request`
@@ -258,8 +352,17 @@ Environment variables:
258
352
  - `LLM_GATEWAY_DEDUP_WINDOW_MS` — how recent an existing job must be to dedup against. Default `3600000` (1 hour). Set `0` to disable dedup.
259
353
  - `LLM_GATEWAY_JOBS_DB` — override the sqlite path. Defaults to the value of `LLM_GATEWAY_LOGS_DB`, then `~/.llm-cli-gateway/logs.db`. Set to `none` to disable durability entirely (in-memory only).
260
354
 
261
- ##### `claude_request_async` / `codex_request_async` / `gemini_request_async` / `grok_request_async`
262
- Start a long-running Claude, Codex, Gemini, or Grok request without waiting for completion in the same MCP call.
355
+ ##### `mistral_request`
356
+ Run a Mistral Vibe agentic coding request. Like `grok_request` in shape, but with Vibe's specific surface:
357
+
358
+ - `model` (string, optional): Resolved alias (e.g. `devstral-medium`, `devstral-large`, `latest`). The resolved value is injected via the `VIBE_ACTIVE_MODEL` environment variable — Vibe has no `--model` flag.
359
+ - `permissionMode`: `default | plan | accept-edits | auto-approve | chat | explore | lean` — emitted as `--agent <mode>`. Defaults to `auto-approve` in programmatic mode.
360
+ - `allowedTools` (string[], optional): One `--enabled-tools <tool>` flag per entry (allow-list only).
361
+ - `disallowedTools` (string[], optional): Accepted for parity with the other providers; ignored at the CLI boundary with a logged warning.
362
+ - `sessionId` / `resumeLatest` / `createNewSession`: standard session controls. Continuity requires `[session_logging] enabled = true` in `~/.vibe/config.toml` — `doctor --json` surfaces an actionable next-action when the toggle is missing.
363
+
364
+ ##### `claude_request_async` / `codex_request_async` / `gemini_request_async` / `grok_request_async` / `mistral_request_async`
365
+ Start a long-running Claude, Codex, Gemini, Grok, or Mistral request without waiting for completion in the same MCP call.
263
366
 
264
367
  Use this flow when analysis/runtime can exceed client tool-call limits:
265
368
  1. Start job with `*_request_async`
@@ -297,7 +400,7 @@ Approval records are persisted to `~/.llm-cli-gateway/approvals.jsonl`.
297
400
  Create a new session for a specific CLI.
298
401
 
299
402
  **Parameters:**
300
- - `cli` (string, required): CLI to create session for ("claude", "codex", "gemini", "grok")
403
+ - `cli` (string, required): CLI to create session for ("claude", "codex", "gemini", "grok", "mistral")
301
404
  - `description` (string, optional): Description for the session
302
405
  - `setAsActive` (boolean, optional): Set as active session, default: true
303
406
 
@@ -314,7 +417,7 @@ Create a new session for a specific CLI.
314
417
  List all sessions, optionally filtered by CLI.
315
418
 
316
419
  **Parameters:**
317
- - `cli` (string, optional): Filter by CLI ("claude", "codex", "gemini", "grok")
420
+ - `cli` (string, optional): Filter by CLI ("claude", "codex", "gemini", "grok", "mistral")
318
421
 
319
422
  **Response includes:**
320
423
  - Total session count
@@ -352,7 +455,7 @@ Clear all sessions, optionally for a specific CLI.
352
455
  List available models for each CLI.
353
456
 
354
457
  **Parameters:**
355
- - `cli` (string, optional): Specific CLI to list models for ("claude", "codex", "gemini", "grok")
458
+ - `cli` (string, optional): Specific CLI to list models for ("claude", "codex", "gemini", "grok", "mistral")
356
459
 
357
460
  **Response includes:**
358
461
  - Model names and descriptions
@@ -394,13 +497,13 @@ LLM_GATEWAY_DISABLE_MODEL_DISCOVERY=1
394
497
  Report installed CLI versions.
395
498
 
396
499
  **Parameters:**
397
- - `cli` (string, optional): Specific CLI to inspect ("claude", "codex", "gemini", "grok")
500
+ - `cli` (string, optional): Specific CLI to inspect ("claude", "codex", "gemini", "grok", "mistral")
398
501
 
399
502
  ##### `cli_upgrade`
400
503
  Plan or run an upgrade for one CLI.
401
504
 
402
505
  **Parameters:**
403
- - `cli` (string, required): CLI to upgrade ("claude", "codex", "gemini", "grok")
506
+ - `cli` (string, required): CLI to upgrade ("claude", "codex", "gemini", "grok", "mistral")
404
507
  - `target` (string, optional): Package tag/version/target, default: `latest`
405
508
  - `dryRun` (boolean, optional): Return the upgrade plan without running it, default: `true`
406
509
  - `timeoutMs` (number, optional): Upgrade timeout when `dryRun=false`
@@ -2,7 +2,7 @@ import type { Logger } from "./logger.js";
2
2
  import type { ReviewIntegrityResult } from "./review-integrity.js";
3
3
  export type ApprovalPolicy = "strict" | "balanced" | "permissive";
4
4
  export type ApprovalStrategy = "legacy" | "mcp_managed";
5
- export type ApprovalCli = "claude" | "codex" | "gemini" | "grok";
5
+ export type ApprovalCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
6
6
  export type ApprovalStatus = "approved" | "denied";
7
7
  export interface ApprovalRequest {
8
8
  cli: ApprovalCli;
@@ -1,7 +1,7 @@
1
1
  import type { Logger } from "./logger.js";
2
2
  import { type JobHealth } from "./process-monitor.js";
3
3
  import { JobStore } from "./job-store.js";
4
- export type LlmCli = "claude" | "codex" | "gemini" | "grok";
4
+ export type LlmCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
5
5
  export type AsyncJobStatus = "running" | "completed" | "failed" | "canceled" | "orphaned";
6
6
  export interface AsyncJobSnapshot {
7
7
  id: string;
@@ -29,6 +29,22 @@ export interface StartJobOptions {
29
29
  outputFormat?: string;
30
30
  /** Bypass dedup and force a fresh CLI run even if a recent matching job exists. */
31
31
  forceRefresh?: boolean;
32
+ /**
33
+ * Extra environment variables to inject when spawning the child CLI.
34
+ * Used by Mistral Vibe to pass `VIBE_ACTIVE_MODEL` (Vibe has no `--model` flag).
35
+ *
36
+ * IMPORTANT: env vars participate in the dedup key (canonicalised by sorted
37
+ * keys + JSON-stringified). Two requests that differ only in env (e.g. two
38
+ * Mistral requests with the same prompt but different VIBE_ACTIVE_MODEL)
39
+ * therefore do NOT collide on dedup.
40
+ */
41
+ env?: Record<string, string>;
42
+ /**
43
+ * Optional hook fired exactly once when the job reaches a terminal state.
44
+ * Used by callers that own per-request resources (outputSchema temp files,
45
+ * etc.) that must persist for the lifetime of the spawned CLI process.
46
+ */
47
+ onComplete?: () => void;
32
48
  }
33
49
  export interface StartJobOutcome {
34
50
  snapshot: AsyncJobSnapshot;
@@ -50,8 +66,13 @@ export declare class AsyncJobManager {
50
66
  /**
51
67
  * Compute the dedup key for a job. Stable across re-issues of the same request,
52
68
  * which is exactly what allows agents to safely retry without restarting the run.
69
+ *
70
+ * U22 fix: env vars participate in the key via a deterministic canonicalisation
71
+ * (sorted keys → JSON-stringified). This prevents two Mistral requests with the
72
+ * same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
53
73
  */
54
74
  private buildRequestKey;
75
+ private fireOnComplete;
55
76
  private safeStoreCall;
56
77
  /**
57
78
  * Flush in-memory stdout/stderr to the durable store if anything changed
@@ -71,7 +92,7 @@ export declare class AsyncJobManager {
71
92
  * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
72
93
  * optional param for the dedup-aware path.
73
94
  */
74
- startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean): AsyncJobSnapshot;
95
+ startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void): AsyncJobSnapshot;
75
96
  /**
76
97
  * Start a job, with optional dedup against recent identical requests.
77
98
  * Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
@@ -82,6 +103,7 @@ export declare class AsyncJobManager {
82
103
  */
83
104
  startJobWithDedup(cli: LlmCli, args: string[], correlationId: string, opts?: StartJobOptions): StartJobOutcome;
84
105
  getJobSnapshot(jobId: string): AsyncJobSnapshot | null;
106
+ getJobSnapshots(jobIds: string[]): Record<string, AsyncJobSnapshot | null>;
85
107
  getJobResult(jobId: string, maxChars?: number): AsyncJobResult | null;
86
108
  cancelJob(jobId: string): {
87
109
  canceled: boolean;
@@ -8,6 +8,22 @@ const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
8
8
  const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
9
9
  const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
10
10
  const OUTPUT_FLUSH_INTERVAL_MS = 1000; // Throttle DB writes for streaming stdout/stderr
11
+ /**
12
+ * U22 fix: deterministic canonicalisation of an env-var map for the dedup key.
13
+ * Returns "" when env is undefined or empty (preserves dedup key continuity for
14
+ * pre-U22 callers that pass no env).
15
+ */
16
+ function canonicaliseEnvForKey(env) {
17
+ if (!env)
18
+ return "";
19
+ const entries = Object.entries(env)
20
+ .filter(([, v]) => v !== undefined && v !== null)
21
+ .map(([k, v]) => [k, String(v)]);
22
+ if (entries.length === 0)
23
+ return "";
24
+ entries.sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0));
25
+ return JSON.stringify(entries);
26
+ }
11
27
  function truncateText(value, maxChars) {
12
28
  if (value.length <= maxChars) {
13
29
  return { text: value, truncated: false };
@@ -82,6 +98,7 @@ export class AsyncJobManager {
82
98
  this.logger.error(`Job ${id} process ${job.process.pid} no longer exists, marking as failed`);
83
99
  this.emitMetrics(job);
84
100
  this.persistComplete(job);
101
+ this.fireOnComplete(job);
85
102
  }
86
103
  // EPERM: process exists but we can't signal it — ignore
87
104
  }
@@ -96,6 +113,7 @@ export class AsyncJobManager {
96
113
  this.logger.error(`Job ${id} has exited flag but was still in running state, marking as failed`);
97
114
  this.emitMetrics(job);
98
115
  this.persistComplete(job);
116
+ this.fireOnComplete(job);
99
117
  }
100
118
  }
101
119
  for (const [id, job] of this.jobs) {
@@ -126,9 +144,26 @@ export class AsyncJobManager {
126
144
  /**
127
145
  * Compute the dedup key for a job. Stable across re-issues of the same request,
128
146
  * which is exactly what allows agents to safely retry without restarting the run.
147
+ *
148
+ * U22 fix: env vars participate in the key via a deterministic canonicalisation
149
+ * (sorted keys → JSON-stringified). This prevents two Mistral requests with the
150
+ * same argv but different `VIBE_ACTIVE_MODEL` from deduping onto each other.
129
151
  */
130
- buildRequestKey(cli, args) {
131
- return computeRequestKey(cli, args);
152
+ buildRequestKey(cli, args, env) {
153
+ return computeRequestKey(cli, args, canonicaliseEnvForKey(env));
154
+ }
155
+ fireOnComplete(job) {
156
+ if (job.onCompleteFired)
157
+ return;
158
+ if (!job.onComplete)
159
+ return;
160
+ job.onCompleteFired = true;
161
+ try {
162
+ job.onComplete();
163
+ }
164
+ catch (err) {
165
+ this.logger.error(`Job ${job.id} onComplete hook threw`, err);
166
+ }
132
167
  }
133
168
  safeStoreCall(label, fn) {
134
169
  if (!this.store)
@@ -234,12 +269,14 @@ export class AsyncJobManager {
234
269
  * Existing callers keep working unchanged; forceRefresh is exposed as a trailing
235
270
  * optional param for the dedup-aware path.
236
271
  */
237
- startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh) {
272
+ startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete) {
238
273
  return this.startJobWithDedup(cli, args, correlationId, {
239
274
  cwd,
240
275
  idleTimeoutMs,
241
276
  outputFormat,
242
277
  forceRefresh,
278
+ env,
279
+ onComplete,
243
280
  }).snapshot;
244
281
  }
245
282
  /**
@@ -251,8 +288,8 @@ export class AsyncJobManager {
251
288
  * is returned without spawning a new process. forceRefresh skips dedup entirely.
252
289
  */
253
290
  startJobWithDedup(cli, args, correlationId, opts = {}) {
254
- const { cwd, idleTimeoutMs, outputFormat, forceRefresh } = opts;
255
- const requestKey = this.buildRequestKey(cli, args);
291
+ const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, onComplete } = opts;
292
+ const requestKey = this.buildRequestKey(cli, args, extraEnv);
256
293
  if (!forceRefresh && this.store) {
257
294
  try {
258
295
  const existing = this.store.findByRequestKey(requestKey);
@@ -268,6 +305,19 @@ export class AsyncJobManager {
268
305
  originalCorrelationId: record.correlationId,
269
306
  status: record.status,
270
307
  });
308
+ // U26 fix: the caller's per-request resources (e.g. outputSchema temp
309
+ // file) are NOT consumed by the deduped job, which reuses its own
310
+ // original resources. Release the new request's cleanup immediately
311
+ // to avoid an orphaned temp file. The original job's onComplete (if
312
+ // any) remains attached to that original job record.
313
+ if (onComplete) {
314
+ try {
315
+ onComplete();
316
+ }
317
+ catch (err) {
318
+ this.logger.error("dedup onComplete cleanup threw", err);
319
+ }
320
+ }
271
321
  return {
272
322
  snapshot: this.snapshot(record),
273
323
  deduped: true,
@@ -282,11 +332,14 @@ export class AsyncJobManager {
282
332
  }
283
333
  const id = randomUUID();
284
334
  const startedAt = new Date().toISOString();
285
- const child = spawn(cli, args, {
335
+ // Mistral Vibe ships as the `vibe` binary; the gateway uses `mistral` as the
336
+ // provider key but spawns `vibe` on the shell.
337
+ const command = cli === "mistral" ? "vibe" : cli;
338
+ const child = spawn(command, args, {
286
339
  cwd,
287
340
  detached: true,
288
341
  stdio: ["ignore", "pipe", "pipe"],
289
- env: { ...process.env, PATH: getExtendedPath() },
342
+ env: { ...process.env, PATH: getExtendedPath(), ...(extraEnv ?? {}) },
290
343
  });
291
344
  if (child.pid)
292
345
  registerProcessGroup(child.pid);
@@ -320,6 +373,8 @@ export class AsyncJobManager {
320
373
  metricsRecorded: false,
321
374
  outputFormat,
322
375
  cleanupGroup,
376
+ onComplete,
377
+ onCompleteFired: false,
323
378
  outputDirty: false,
324
379
  lastOutputFlushAt: Date.now(),
325
380
  };
@@ -356,6 +411,7 @@ export class AsyncJobManager {
356
411
  });
357
412
  this.emitMetrics(job);
358
413
  this.persistComplete(job);
414
+ this.fireOnComplete(job);
359
415
  setTimeout(() => {
360
416
  if (!job.exited && job.process)
361
417
  killProcessGroup(job.process, "SIGKILL");
@@ -386,6 +442,7 @@ export class AsyncJobManager {
386
442
  this.logger.error(`Job ${id} error: ${error.message}`, { correlationId });
387
443
  this.emitMetrics(job);
388
444
  this.persistComplete(job);
445
+ this.fireOnComplete(job);
389
446
  }
390
447
  });
391
448
  child.on("close", (code) => {
@@ -402,6 +459,7 @@ export class AsyncJobManager {
402
459
  }
403
460
  // Ensure terminal state reaches the durable store (idle-timeout/output-overflow already persisted).
404
461
  this.persistComplete(job);
462
+ this.fireOnComplete(job);
405
463
  return;
406
464
  }
407
465
  job.exitCode = code ?? 0;
@@ -417,6 +475,7 @@ export class AsyncJobManager {
417
475
  }
418
476
  this.emitMetrics(job);
419
477
  this.persistComplete(job);
478
+ this.fireOnComplete(job);
420
479
  });
421
480
  return { snapshot: this.snapshot(job), deduped: false };
422
481
  }
@@ -429,6 +488,9 @@ export class AsyncJobManager {
429
488
  }
430
489
  return this.snapshot(job);
431
490
  }
491
+ getJobSnapshots(jobIds) {
492
+ return Object.fromEntries(jobIds.map(jobId => [jobId, this.getJobSnapshot(jobId)]));
493
+ }
432
494
  getJobResult(jobId, maxChars = 200000) {
433
495
  let job = this.jobs.get(jobId);
434
496
  if (!job) {
@@ -468,6 +530,7 @@ export class AsyncJobManager {
468
530
  killProcessGroup(job.process, "SIGTERM");
469
531
  this.logger.info(`Job ${jobId} canceled`, { correlationId: job.correlationId });
470
532
  this.persistComplete(job);
533
+ this.fireOnComplete(job);
471
534
  setTimeout(() => {
472
535
  if (!job.exited && job.process)
473
536
  killProcessGroup(job.process, "SIGKILL");
@@ -539,6 +602,7 @@ export class AsyncJobManager {
539
602
  });
540
603
  this.emitMetrics(job);
541
604
  this.persistComplete(job);
605
+ this.fireOnComplete(job);
542
606
  setTimeout(() => {
543
607
  if (!job.exited && job.process)
544
608
  killProcessGroup(job.process, "SIGKILL");
package/dist/auth.d.ts ADDED
@@ -0,0 +1,15 @@
1
+ import type { IncomingMessage, ServerResponse } from "node:http";
2
+ export interface AuthConfig {
3
+ required: boolean;
4
+ tokenConfigured: boolean;
5
+ source: "env" | "disabled";
6
+ }
7
+ export interface AuthResult {
8
+ ok: boolean;
9
+ status?: number;
10
+ message?: string;
11
+ }
12
+ export declare function loadAuthConfig(env?: NodeJS.ProcessEnv): AuthConfig;
13
+ export declare function getRequiredBearerToken(env?: NodeJS.ProcessEnv): string | null;
14
+ export declare function authorizeBearerRequest(req: IncomingMessage, token?: string | null): AuthResult;
15
+ export declare function writeAuthFailure(res: ServerResponse, result: AuthResult): void;
package/dist/auth.js ADDED
@@ -0,0 +1,46 @@
1
+ const AUTH_SCHEME = "Bearer ";
2
+ export function loadAuthConfig(env = process.env) {
3
+ const token = env.LLM_GATEWAY_AUTH_TOKEN;
4
+ const disabled = env.LLM_GATEWAY_AUTH_DISABLED === "1";
5
+ return {
6
+ required: !disabled,
7
+ tokenConfigured: Boolean(token),
8
+ source: disabled ? "disabled" : "env",
9
+ };
10
+ }
11
+ export function getRequiredBearerToken(env = process.env) {
12
+ const config = loadAuthConfig(env);
13
+ if (!config.required)
14
+ return null;
15
+ return env.LLM_GATEWAY_AUTH_TOKEN || null;
16
+ }
17
+ export function authorizeBearerRequest(req, token = getRequiredBearerToken()) {
18
+ if (!loadAuthConfig().required) {
19
+ return { ok: true };
20
+ }
21
+ if (!token) {
22
+ return {
23
+ ok: false,
24
+ status: 503,
25
+ message: "HTTP transport requires LLM_GATEWAY_AUTH_TOKEN",
26
+ };
27
+ }
28
+ const header = req.headers.authorization;
29
+ const value = Array.isArray(header) ? header[0] : header;
30
+ if (!value || !value.startsWith(AUTH_SCHEME)) {
31
+ return { ok: false, status: 401, message: "Unauthorized" };
32
+ }
33
+ const supplied = value.slice(AUTH_SCHEME.length);
34
+ if (supplied !== token) {
35
+ return { ok: false, status: 401, message: "Unauthorized" };
36
+ }
37
+ return { ok: true };
38
+ }
39
+ export function writeAuthFailure(res, result) {
40
+ const status = result.status ?? 401;
41
+ res.writeHead(status, {
42
+ "content-type": "application/json",
43
+ "www-authenticate": 'Bearer realm="llm-cli-gateway"',
44
+ });
45
+ res.end(JSON.stringify({ error: result.message || "Unauthorized" }));
46
+ }