llm-cli-gateway 1.5.4 → 1.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,85 @@
2
2
 
3
3
  All notable changes to the llm-cli-gateway project.
4
4
 
5
+ ## [1.5.14] - 2026-05-24
6
+
7
+ ### Fixed
8
+
9
+ - Remove the Redis Lua `eval` lock-release path from production source and replace it with Redis `WATCH`/`MULTI` compare-and-delete semantics.
10
+ - Add exact direct production dependencies for `content-type@1.0.5` and `type-is@2.0.1` so packed consumer installs do not resolve the Socket-flagged `content-type@2.0.0` / `type-is@2.1.0` versions.
11
+
12
+ ### Added
13
+
14
+ - Add `npm run security:audit` as a CI/release gate covering `npm audit --omit=dev`, production source dynamic-execution scanning, blocked dependency-version checks, and a packed consumer install policy check.
15
+
16
+ ## [1.5.13] - 2026-05-24
17
+
18
+ ### Fixed
19
+
20
+ - Report missing provider CLI launches as a clear command-not-found error instead of leaking Windows/libuv codes such as `-4058`.
21
+ - Preserve async provider launch errors in job stderr/result output so sync MCP tools can return actionable setup guidance.
22
+ - Replace `irm | iex` Windows install guidance and generated release manifest commands with direct binary download plus SHA256 verification.
23
+
24
+ ## [1.5.12] - 2026-05-24
25
+
26
+ ### Fixed
27
+
28
+ - Stop detaching provider CLI processes on Windows so `ask_model` and async requests do not flash visible cmd/conhost windows.
29
+ - Use hidden Windows process creation for the bootstrapper's managed Node gateway process and status checks.
30
+ - Keep Windows process cleanup by killing provider process trees with hidden `taskkill.exe` instead of Unix process-group signals.
31
+
32
+ ## [1.5.11] - 2026-05-24
33
+
34
+ ### Fixed
35
+
36
+ - Install a stable Windows `llm-cli-gateway.exe` command alongside the versioned bootstrapper and add the install directory to the user PATH.
37
+ - Make the Windows one-command installer stop any running gateway before replacing the managed bundle, then start and doctor through the stable command.
38
+ - Fix bootstrapper `status` and `stop` behavior on Windows so they do not depend on Unix-style PID probing.
39
+
40
+ ## [1.5.10] - 2026-05-24
41
+
42
+ ### Fixed
43
+
44
+ - Hide Windows console windows when the gateway spawns provider CLIs for synchronous and asynchronous requests.
45
+
46
+ ## [1.5.9] - 2026-05-24
47
+
48
+ ### Fixed
49
+
50
+ - Fix the Node entrypoint direct-run guard on Windows by using `pathToFileURL(realpathSync(...))` instead of constructing a POSIX-style file URL manually.
51
+ - Make the Windows one-command installer stop when bootstrapper commands fail by checking native process exit codes.
52
+
53
+ ## [1.5.8] - 2026-05-24
54
+
55
+ ### Fixed
56
+
57
+ - Make `start` wait for the local HTTP health endpoint before reporting success.
58
+ - Write gateway stdout/stderr to local log files so startup failures are diagnosable instead of returning a misleading PID.
59
+
60
+ ## [1.5.7] - 2026-05-24
61
+
62
+ ### Fixed
63
+
64
+ - Add a release-pinned `install-windows.ps1` asset so Windows users can install with one PowerShell command while still verifying the downloaded bootstrapper and platform bundle against `SHA256SUMS`.
65
+ - Add the Windows one-liner to `release-manifest.json` and upload the installer script as part of the desktop release workflow.
66
+
67
+ ## [1.5.6] - 2026-05-24
68
+
69
+ ### Fixed
70
+
71
+ - Replace the host-Node installer path with platform-specific verified bundles that include the compiled gateway, production dependencies, setup assets, and a managed Node runtime.
72
+ - Make the bootstrapper start the managed runtime from the installed bundle and require `RVWR_ALLOW_HOST_NODE=1` for the developer host-Node fallback.
73
+ - Update release packaging metadata and docs so Windows/macOS/Linux install instructions use `llm-cli-gateway-bundle-<version>-<os>-<arch>.tar.gz`.
74
+ - Update production dependencies (`@modelcontextprotocol/sdk`, `better-sqlite3`, and transitive Hono/AJV packages) so `npm audit --omit=dev` reports zero vulnerabilities while pinning `type-is` and `content-type` away from Socket-flagged latest releases.
75
+
76
+ ## [1.5.5] - 2026-05-24
77
+
78
+ ### Fixed
79
+
80
+ - Build desktop installer binaries on local self-hosted Linux, Windows, and macOS runners, then publish combined release metadata from the Linux packaging job.
81
+ - Make `installer/build-release.sh` default to the host target for local runs, with `--all-targets` / `RVWR_RELEASE_ALL_TARGETS=1` reserved for local full-matrix testing.
82
+ - Package setup UI/provider assets into the verified gateway bundle and let the setup UI resolve installed bundle assets from the managed gateway directory.
83
+
5
84
  ## [1.5.4] - 2026-05-19
6
85
 
7
86
  ### Fixed
package/README.md CHANGED
@@ -20,7 +20,7 @@ Current personal-appliance artifacts include:
20
20
  - Streamable HTTP startup: `LLM_GATEWAY_AUTH_TOKEN=<token> npm run start:http`
21
21
  - Machine-readable diagnostics: `npm run doctor`
22
22
  - Go bootstrapper scaffold: `installer/` with `setup`, `doctor --json`, `start`, `stop`, `status`, `repair`, `upgrade`, `uninstall`, `print-client-config`, and verified bundle download commands.
23
- - Release packaging: `npm run release:build` produces cross-platform binaries plus a checksummed Node bundle under `installer/dist/`; see [installer/packaging/README.md](installer/packaging/README.md).
23
+ - Release packaging: the release workflow builds Linux binaries on the local self-hosted runner, builds Windows/macOS binaries on GitHub-hosted runners, then publishes checksummed platform bundles with the gateway, production dependencies, and a managed Node runtime; see [installer/packaging/README.md](installer/packaging/README.md).
24
24
  - Docker Compose fallback: [docker-compose.personal.yml](docker-compose.personal.yml) + [Dockerfile.personal](Dockerfile.personal) for users who already manage containers.
25
25
  - Local setup UI artifact: [setup/ui/index.html](setup/ui/index.html)
26
26
  - Provider setup snippets: [setup/providers/](setup/providers/)
@@ -28,12 +28,35 @@ Current personal-appliance artifacts include:
28
28
 
29
29
  ### Install / Upgrade / Uninstall (single binary)
30
30
 
31
+ Windows PowerShell:
32
+
33
+ ```powershell
34
+ $Version = '<version>'
35
+ $Base = "https://github.com/verivus-oss/llm-cli-gateway/releases/download/v$Version"
36
+ $InstallDir = Join-Path (Join-Path $env:LOCALAPPDATA 'Programs') 'llm-cli-gateway'
37
+ $Exe = Join-Path $InstallDir 'llm-cli-gateway.exe'
38
+ New-Item -ItemType Directory -Force $InstallDir | Out-Null
39
+ Invoke-WebRequest -UseBasicParsing "$Base/llm-cli-gateway-$Version-windows-amd64.exe" -OutFile $Exe
40
+ $env:RVWR_GATEWAY_BUNDLE_URL = "$Base/llm-cli-gateway-bundle-$Version-windows-amd64.tar.gz"
41
+ $env:RVWR_GATEWAY_BUNDLE_SHA256 = '<bundle-sha256-from-SHA256SUMS>'
42
+ & $Exe setup
43
+ & $Exe stop
44
+ & $Exe install-bundle
45
+ & $Exe start
46
+ & $Exe status
47
+ & $Exe doctor
48
+ ```
49
+
50
+ The Windows installer keeps a stable `llm-cli-gateway.exe` command in
51
+ `%LOCALAPPDATA%\Programs\llm-cli-gateway` and adds that directory to the user
52
+ PATH. Do not script against release-versioned exe names after install.
53
+
31
54
  ```bash
32
55
  # After downloading the binary that matches your OS/arch from a release:
33
56
  sha256sum --check SHA256SUMS # verify before run (or `shasum -a 256 --check` on macOS)
34
57
  chmod +x llm-cli-gateway-<ver>-<os>-<arch>
35
58
  ./llm-cli-gateway-<ver>-<os>-<arch> setup
36
- ./llm-cli-gateway-<ver>-<os>-<arch> install-bundle # uses RVWR_GATEWAY_BUNDLE_URL/_SHA256
59
+ ./llm-cli-gateway-<ver>-<os>-<arch> install-bundle # uses the platform bundle URL/SHA256
37
60
  ./llm-cli-gateway-<ver>-<os>-<arch> start
38
61
  ./llm-cli-gateway-<ver>-<os>-<arch> doctor
39
62
 
@@ -340,17 +363,233 @@ Execute a Grok CLI (xAI) request with session support.
340
363
 
341
364
  #### Durable job results & automatic dedup
342
365
 
343
- Every async job is persisted to a `jobs` table in `~/.llm-cli-gateway/logs.db` as it transitions through running → completed/failed/canceled. This makes the gateway a durable collection layer:
366
+ Every async job is persisted to a job store as it transitions through running → completed/failed/canceled. This makes the gateway a durable collection layer:
344
367
 
345
368
  - **Re-issuing a request is safe.** Identical `*_request` / `*_request_async` calls within the dedup window (default 1 hour) short-circuit onto the existing running or completed job — the caller gets back the same job ID instead of starting a duplicate run. This directly fixes the "agent times out polling, re-issues, and the whole job starts over" failure mode.
346
369
  - **`llm_job_status` and `llm_job_result` work across gateway restarts.** Job rows live for 30 days by default; callers can fetch results long after the in-memory cache has evicted them.
347
370
  - **Jobs running at shutdown are marked `orphaned`** on the next gateway boot (the detached child can't be reattached to). Their captured partial output remains readable.
348
371
  - **Pass `forceRefresh: true`** on any request tool to bypass dedup and force a fresh CLI run.
349
372
 
350
- Environment variables:
351
- - `LLM_GATEWAY_JOB_RETENTION_DAYS` — how long completed jobs stay queryable. Default `30`.
352
- - `LLM_GATEWAY_DEDUP_WINDOW_MS` — how recent an existing job must be to dedup against. Default `3600000` (1 hour). Set `0` to disable dedup.
353
- - `LLM_GATEWAY_JOBS_DB` — override the sqlite path. Defaults to the value of `LLM_GATEWAY_LOGS_DB`, then `~/.llm-cli-gateway/logs.db`. Set to `none` to disable durability entirely (in-memory only).
373
+ ##### Persistence configuration
374
+
375
+ The job-store backend is configured by `~/.llm-cli-gateway/config.toml` (override with `LLM_GATEWAY_CONFIG=/path/to/config.toml`). Example:
376
+
377
+ ```toml
378
+ [persistence]
379
+ backend = "sqlite" # "sqlite" | "memory" | "postgres" | "none"
380
+ path = "~/.llm-cli-gateway/logs.db" # for sqlite
381
+ # dsn = "postgresql://user:pw@host/db" # for postgres (interface only — impl not yet shipped)
382
+ retentionDays = 30
383
+ dedupWindowMs = 3600000
384
+ acknowledgeEphemeral = false # required to enable async tools with memory backend
385
+ ```
386
+
387
+ Backends:
388
+ - **`sqlite`** (default) — durable, file-backed. Safe for single-instance deployments.
389
+ - **`memory`** — in-process Map. Lost on gateway exit. Requires `acknowledgeEphemeral = true` to be loaded. Suitable for tests and ephemeral CI gateways.
390
+ - **`postgres`** — interface only, implementation not yet shipped. Selecting this backend throws at startup.
391
+ - **`none`** — no store. **`*_request_async`, `llm_job_status`, `llm_job_result`, and `llm_job_cancel` are NOT registered on the gateway.** This is a structural invariant: agents that try to call async tools against a gateway with `backend = "none"` get a clean "tool not found" at connect time instead of silent in-memory loss after the 1-hour TTL. Use `llm_process_health` to inspect the resolved persistence state programmatically.
392
+
393
+ Legacy environment variables (deprecated; emit a warning at startup):
394
+ - `LLM_GATEWAY_LOGS_DB` / `LLM_GATEWAY_JOBS_DB` — `none` selects `backend = "none"`; any other value selects `backend = "sqlite"` with that path.
395
+ - `LLM_GATEWAY_JOB_RETENTION_DAYS` — overrides `retentionDays`.
396
+ - `LLM_GATEWAY_DEDUP_WINDOW_MS` — overrides `dedupWindowMs`.
397
+ - `LLM_GATEWAY_ACKNOWLEDGE_EPHEMERAL` — `1`/`true`/`yes` sets `acknowledgeEphemeral = true`.
398
+
399
+ ##### Per-project isolation
400
+
401
+ By default, **all gateway data is global per user**, not per project. With no overrides, every Claude Code window — across every repo — spawns its own gateway subprocess but they all read and write the same files:
402
+
403
+ - `~/.llm-cli-gateway/logs.db` (async jobs + flight recorder)
404
+ - `~/.llm-cli-gateway/sessions.json` (CLI sessions)
405
+ - `~/.llm-cli-gateway/config.toml` (resolved config)
406
+
407
+ This is usually what you want — `session_list` from repo A shows sessions from repo B, an async job started in window A can be polled from window B, and the 1-hour dedup window catches re-issues across windows. SQLite WAL mode makes concurrent access from multiple gateway subprocesses safe.
408
+
409
+ If you instead want **per-project isolation** (e.g. unrelated repos shouldn't share session lists or risk false dedup hits), point each project at its own config file. In `.claude/settings.local.json` for the project:
410
+
411
+ ```json
412
+ {
413
+ "mcpServers": {
414
+ "llm-gateway": {
415
+ "env": {
416
+ "LLM_GATEWAY_CONFIG": "${workspaceFolder}/.gateway/config.toml"
417
+ }
418
+ }
419
+ }
420
+ }
421
+ ```
422
+
423
+ …and put a per-project `config.toml` in the repo:
424
+
425
+ ```toml
426
+ [persistence]
427
+ backend = "sqlite"
428
+ path = "/srv/repos/.../my-repo/.gateway/logs.db"
429
+ ```
430
+
431
+ Now every gateway subprocess spawned for *this* repo's Claude Code window reads its own config and writes to its own SQLite file; sessions, jobs, and dedup state are scoped to the repo. Other repos keep using the global default. `llm_process_health.persistence.sources.configFile` lets an agent confirm which config it's actually running under.
432
+
433
+ ###### Agent-executable spec (DAG-TOML)
434
+
435
+ If you want an LLM agent to perform this setup deterministically — rather than reading the prose above and guessing — copy the following DAG-TOML into the repo (e.g. `docs/planning/per-project-gateway-isolation.toml`) and point your agent at it. The schema is [`agent-assurance`](https://github.com/verivus-oss/agent-assurance/blob/main/SPEC.md) `template_kind = "implementation-dag"`. The agent MUST execute units in layer order, must not skip the verification unit, and must treat any failed gate as blocking.
436
+
437
+ ```toml
438
+ [meta]
439
+ schema_version = "1.0.0"
440
+ template_kind = "implementation-dag"
441
+ docs = "https://github.com/verivus-oss/agent-assurance/blob/main/SPEC.md"
442
+ confidentiality = "public"
443
+ title = "Per-project llm-cli-gateway persistence isolation"
444
+ spec = "https://github.com/verivusai-labs/llm-cli-gateway#per-project-isolation"
445
+ created = "YYYY-MM-DD"
446
+ total_units = 5
447
+ tier1_units = ["U01","U02","U03","U04","U05"]
448
+ tier2_units = []
449
+ tier3_units = []
450
+
451
+ # ============================================================================
452
+ # [policy.agent] — persona for the agent performing the configuration.
453
+ # ============================================================================
454
+
455
+ [policy.agent]
456
+ name = "Gateway Persistence Isolator"
457
+ role = "Configuration Engineer"
458
+ purpose = "Configure the llm-cli-gateway MCP server so its async job store, sessions, dedup state, and flight recorder are scoped to THIS repository instead of the per-user default at ~/.llm-cli-gateway/."
459
+ validation_type = "Structural + Runtime Verification"
460
+ workflow_initiator = false
461
+ description = "Writes a repo-local config.toml, registers an LLM_GATEWAY_CONFIG override in .claude/settings.local.json, restarts the MCP server, and confirms via llm_process_health that the gateway is now reading the repo-local config and writing to the repo-local SQLite path."
462
+
463
+ [policy.agent.orchestration]
464
+ consumes_events = ["PerProjectIsolationRequested"]
465
+ produces_events = ["PerProjectIsolationComplete"]
466
+
467
+ [policy.agent.responsibilities]
468
+ items = [
469
+ "Create the repo-local gateway data directory and add it to .gitignore.",
470
+ "Write a config.toml that pins backend=sqlite to a repo-local path.",
471
+ "Register the LLM_GATEWAY_CONFIG env override in .claude/settings.local.json (NOT .mcp.json — that file is committed and shared).",
472
+ "Trigger an MCP server reconnect.",
473
+ "Verify via llm_process_health that the resolved configFile and dbPath are the repo-local values.",
474
+ ]
475
+
476
+ # ============================================================================
477
+ # [policy.instance] — concrete paths the agent fills in for THIS repo.
478
+ # Agent MUST replace <REPO_ABS_PATH> with the absolute path to the repo
479
+ # before emitting any artefact. Relative paths in config.toml MUST be
480
+ # expanded to absolute — the gateway does not re-resolve them per cwd.
481
+ # ============================================================================
482
+
483
+ [policy.instance]
484
+ repo_abs_path = "<REPO_ABS_PATH>" # e.g. /srv/repos/me/my-project
485
+ gateway_data_dir_relative = ".gateway" # repo-relative directory
486
+ config_toml_relative = ".gateway/config.toml"
487
+ sqlite_db_relative = ".gateway/logs.db"
488
+ claude_local_settings_relative = ".claude/settings.local.json"
489
+ gitignore_relative = ".gitignore"
490
+ mcp_server_name = "llm-gateway" # must match the entry in .mcp.json
491
+
492
+ # ============================================================================
493
+ # [policy.gates] — blocking checks. Any failure stops the workflow.
494
+ # ============================================================================
495
+
496
+ [policy.gates]
497
+ gate_repo_abs_path_resolved = "policy.instance.repo_abs_path must NOT be the literal string '<REPO_ABS_PATH>' when U01 starts."
498
+ gate_config_is_committed = "policy.instance.config_toml_relative MAY be committed. policy.instance.claude_local_settings_relative MUST NOT be committed (it is per-developer). Agent MUST verify .gitignore covers .claude/settings.local.json if absent."
499
+ gate_no_legacy_env_leak = "Agent MUST grep the shell init files for LLM_GATEWAY_LOGS_DB / LLM_GATEWAY_JOBS_DB. If set, the legacy env var will override the new config and the deprecation warning will fire at every gateway boot. The agent reports this as a finding and asks the operator to unset before proceeding."
500
+ gate_health_confirms_isolation = "U05 MUST observe llm_process_health.persistence.sources.configFile == policy.instance.repo_abs_path + '/' + policy.instance.config_toml_relative AND llm_process_health.persistence.path == policy.instance.repo_abs_path + '/' + policy.instance.sqlite_db_relative. Anything else means the override did not take effect."
501
+
502
+ # ============================================================================
503
+ # [policy.evidence] — what each unit must emit so the work is auditable.
504
+ # ============================================================================
505
+
506
+ [policy.evidence]
507
+ per_unit_required_fields = [
508
+ "unit_id", # U01..U05
509
+ "status", # "completed" | "failed"
510
+ "artefact_paths", # files written / modified
511
+ "stdout_tail", # last 20 lines of any command output
512
+ "verification_quote", # for U05, the verbatim llm_process_health.persistence block
513
+ ]
514
+ findings_required_fields = [
515
+ "gate_id", # which gate failed
516
+ "observed",
517
+ "expected",
518
+ "remediation",
519
+ ]
520
+
521
+ # ============================================================================
522
+ # Units. Execute in layer order. U01..U03 modify the working tree; U04
523
+ # triggers a reconnect; U05 is the verification gate that decides success.
524
+ # ============================================================================
525
+
526
+ [units.U01]
527
+ name = "create-repo-local-data-dir"
528
+ summary = "mkdir -p <repo>/.gateway and append /.gateway/ to .gitignore (creating .gitignore if missing). The gateway will write logs.db, logs.db-wal, logs.db-shm here — none should be committed."
529
+ layer = 0
530
+ tier = 1
531
+ status = "pending"
532
+ depends_on = []
533
+ blocks = ["U02"]
534
+ estimated_loc = 5
535
+ files_modify = [".gitignore"]
536
+ produces = ["ART:gateway-data-dir"]
537
+ consumes = []
538
+
539
+ [units.U02]
540
+ name = "write-config-toml"
541
+ summary = "Write <repo>/.gateway/config.toml with [persistence] backend='sqlite' and path=<absolute-path-to-repo>/.gateway/logs.db. Path MUST be absolute. Do NOT use ~ — the gateway expands ~ but [persistence].path is read literally if not prefixed with ~/, and Claude Code may launch the gateway with a HOME that surprises you."
542
+ layer = 1
543
+ tier = 1
544
+ status = "pending"
545
+ depends_on = ["U01"]
546
+ blocks = ["U03"]
547
+ estimated_loc = 10
548
+ files_modify = [".gateway/config.toml"]
549
+ produces = ["ART:gateway-config"]
550
+ consumes = ["ART:gateway-data-dir"]
551
+
552
+ [units.U03]
553
+ name = "register-llm-gateway-config-env-in-claude-local-settings"
554
+ summary = "Add (or merge) an mcpServers.<mcp_server_name>.env entry in .claude/settings.local.json that sets LLM_GATEWAY_CONFIG to the absolute path of .gateway/config.toml. Do NOT modify .mcp.json — that file is committed and the path would be wrong for every other developer. If .claude/settings.local.json already has an mcpServers.<mcp_server_name> entry, the agent MUST merge into the existing env map (preserving other keys), not overwrite the whole entry."
555
+ layer = 2
556
+ tier = 1
557
+ status = "pending"
558
+ depends_on = ["U02"]
559
+ blocks = ["U04"]
560
+ estimated_loc = 20
561
+ files_modify = [".claude/settings.local.json"]
562
+ produces = ["ART:claude-local-settings"]
563
+ consumes = ["ART:gateway-config"]
564
+
565
+ [units.U04]
566
+ name = "trigger-mcp-reconnect"
567
+ summary = "Ask the operator to run /mcp in Claude Code (or restart Claude Code) so the gateway subprocess is re-spawned under the new env. The agent cannot do this itself — MCP server lifecycle is owned by the host."
568
+ layer = 3
569
+ tier = 1
570
+ status = "pending"
571
+ depends_on = ["U03"]
572
+ blocks = ["U05"]
573
+ estimated_loc = 0
574
+ files_modify = []
575
+ produces = ["OUT:mcp-reconnected"]
576
+ consumes = ["ART:claude-local-settings"]
577
+
578
+ [units.U05]
579
+ name = "verify-via-llm-process-health"
580
+ summary = "Call llm_process_health and assert the returned persistence block satisfies policy.gates.gate_health_confirms_isolation. Quote the verbatim persistence block in evidence. If the assertion fails, the agent MUST NOT mark the workflow complete — it must emit a finding under policy.evidence.findings_required_fields, naming the observed vs. expected configFile/path, and stop."
581
+ layer = 4
582
+ tier = 1
583
+ status = "pending"
584
+ depends_on = ["U04"]
585
+ blocks = []
586
+ estimated_loc = 5
587
+ files_modify = []
588
+ produces = ["ART:isolation-verification","OUT:per-project-isolation-complete"]
589
+ consumes = ["OUT:mcp-reconnected"]
590
+ ```
591
+
592
+ **Why this matters for agents:** the gateway has multiple configuration surfaces (TOML file, env-var overrides, two different MCP settings files) and one easy mistake — editing the committed `.mcp.json` instead of the local-only `.claude/settings.local.json` — will silently break the per-project scope for every other developer on the repo. The DAG above encodes the correct sequence, the verification gate, and the failure modes explicitly so an agent can execute it without inference.
354
593
 
355
594
  ##### `mistral_request`
356
595
  Run a Mistral Vibe agentic coding request. Like `grok_request` in shape, but with Vibe's specific surface:
@@ -582,11 +821,12 @@ await callTool("session_delete", {
582
821
  ```bash
583
822
  LLM_GATEWAY_APPROVAL_POLICY=strict node dist/index.js
584
823
  ```
585
- - `LLM_GATEWAY_LOGS_DB`: Path to SQLite flight recorder database. Default: `~/.llm-cli-gateway/logs.db`. Set to empty string or `none` to disable logging.
824
+ - `LLM_GATEWAY_CONFIG`: Path to the gateway TOML config (default: `~/.llm-cli-gateway/config.toml`). See **Persistence configuration** above for the `[persistence]` schema.
825
+ - `LLM_GATEWAY_LOGS_DB`: **Deprecated** — overrides `[persistence].path` and selects `backend = "sqlite"` (or `backend = "none"` when set to `none`). Emits a deprecation warning at startup; migrate to `config.toml`.
586
826
  ```bash
587
827
  # Custom path
588
828
  LLM_GATEWAY_LOGS_DB=/var/log/gateway/logs.db node dist/index.js
589
- # Disable flight recorder
829
+ # Disable durable persistence (also disables *_request_async tools)
590
830
  LLM_GATEWAY_LOGS_DB=none node dist/index.js
591
831
  ```
592
832
 
@@ -61,6 +61,14 @@ export declare class AsyncJobManager {
61
61
  private processMonitor;
62
62
  private store;
63
63
  constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null);
64
+ /**
65
+ * True iff a durable (or memory) job store is attached. The MCP-tool
66
+ * registration layer ANDs this with persistence.asyncJobsEnabled when
67
+ * deciding whether to register the *_request_async / llm_job_* tools.
68
+ * Without a store, async tools must not be registered, otherwise we
69
+ * re-open the silent in-memory loss path the structural invariant closes.
70
+ */
71
+ hasStore(): boolean;
64
72
  private emitMetrics;
65
73
  private evictCompletedJobs;
66
74
  /**
@@ -1,6 +1,5 @@
1
- import { spawn } from "child_process";
2
1
  import { randomUUID } from "crypto";
3
- import { getExtendedPath, killProcessGroup, registerProcessGroup, unregisterProcessGroup, } from "./executor.js";
2
+ import { getExtendedPath, killProcessGroup, spawnCliProcess, unregisterProcessGroup, } from "./executor.js";
4
3
  import { noopLogger } from "./logger.js";
5
4
  import { ProcessMonitor } from "./process-monitor.js";
6
5
  import { computeRequestKey } from "./job-store.js";
@@ -8,6 +7,19 @@ const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
8
7
  const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
9
8
  const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
10
9
  const OUTPUT_FLUSH_INTERVAL_MS = 1000; // Throttle DB writes for streaming stdout/stderr
10
+ function describeProcessLaunchError(cli, error) {
11
+ const code = error.code;
12
+ if (code === "ENOENT") {
13
+ return {
14
+ exitCode: 127,
15
+ message: `The '${cli}' command was not found. Install the ${cli} CLI and make sure it is on PATH. (${error.message})`,
16
+ };
17
+ }
18
+ return {
19
+ exitCode: 126,
20
+ message: `Failed to launch ${cli} CLI: ${error.message}`,
21
+ };
22
+ }
11
23
  /**
12
24
  * U22 fix: deterministic canonicalisation of an env-var map for the dedup key.
13
25
  * Returns "" when env is undefined or empty (preserves dedup key continuity for
@@ -62,6 +74,16 @@ export class AsyncJobManager {
62
74
  this.evictionTimer.unref();
63
75
  }
64
76
  }
77
+ /**
78
+ * True iff a durable (or memory) job store is attached. The MCP-tool
79
+ * registration layer ANDs this with persistence.asyncJobsEnabled when
80
+ * deciding whether to register the *_request_async / llm_job_* tools.
81
+ * Without a store, async tools must not be registered, otherwise we
82
+ * re-open the silent in-memory loss path the structural invariant closes.
83
+ */
84
+ hasStore() {
85
+ return this.store !== null;
86
+ }
65
87
  emitMetrics(job) {
66
88
  if (job.metricsRecorded)
67
89
  return;
@@ -335,15 +357,11 @@ export class AsyncJobManager {
335
357
  // Mistral Vibe ships as the `vibe` binary; the gateway uses `mistral` as the
336
358
  // provider key but spawns `vibe` on the shell.
337
359
  const command = cli === "mistral" ? "vibe" : cli;
338
- const child = spawn(command, args, {
360
+ const child = spawnCliProcess(command, args, {
339
361
  cwd,
340
- detached: true,
341
362
  stdio: ["ignore", "pipe", "pipe"],
342
363
  env: { ...process.env, PATH: getExtendedPath(), ...(extraEnv ?? {}) },
343
364
  });
344
- if (child.pid)
345
- registerProcessGroup(child.pid);
346
- child.unref();
347
365
  // Single cleanup flag to prevent double-unregister
348
366
  let groupCleaned = false;
349
367
  const cleanupGroup = () => {
@@ -436,10 +454,13 @@ export class AsyncJobManager {
436
454
  job.clearIdleTimer?.();
437
455
  job.cleanupGroup?.();
438
456
  if (job.status === "running") {
457
+ const launchError = describeProcessLaunchError(cli, error);
439
458
  job.status = job.canceled ? "canceled" : "failed";
440
- job.error = error.message;
459
+ job.exitCode = launchError.exitCode;
460
+ job.error = launchError.message;
461
+ job.stderr = job.stderr ? `${job.stderr}\n${launchError.message}` : launchError.message;
441
462
  job.finishedAt = new Date().toISOString();
442
- this.logger.error(`Job ${id} error: ${error.message}`, { correlationId });
463
+ this.logger.error(`Job ${id} error: ${launchError.message}`, { correlationId });
443
464
  this.emitMetrics(job);
444
465
  this.persistComplete(job);
445
466
  this.fireOnComplete(job);
@@ -453,7 +474,7 @@ export class AsyncJobManager {
453
474
  job.cleanupGroup?.();
454
475
  }
455
476
  if (job.status !== "running") {
456
- job.exitCode = code ?? job.exitCode;
477
+ job.exitCode = job.exitCode ?? code ?? null;
457
478
  if (!job.finishedAt) {
458
479
  job.finishedAt = new Date().toISOString();
459
480
  }
package/dist/config.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import type { Logger } from "./logger.js";
1
2
  export interface CacheTtl {
2
3
  session: number;
3
4
  activeSession: number;
@@ -33,3 +34,32 @@ export interface Config {
33
34
  * Database and Redis fields are populated only when both env vars are set.
34
35
  */
35
36
  export declare function loadConfig(): Config;
37
+ export declare const PERSISTENCE_BACKENDS: readonly ["sqlite", "postgres", "memory", "none"];
38
+ export type PersistenceBackend = (typeof PERSISTENCE_BACKENDS)[number];
39
+ export declare const DEFAULT_JOB_RETENTION_DAYS = 30;
40
+ export declare const DEFAULT_DEDUP_WINDOW_MS: number;
41
+ export interface PersistenceConfig {
42
+ backend: PersistenceBackend;
43
+ path: string | null;
44
+ dsn: string | null;
45
+ retentionDays: number;
46
+ dedupWindowMs: number;
47
+ acknowledgeEphemeral: boolean;
48
+ /** True iff async-job tools should be registered on the MCP server. */
49
+ asyncJobsEnabled: boolean;
50
+ /** Audit trail: which inputs (file, env vars) contributed to the resolved config. */
51
+ sources: PersistenceConfigSources;
52
+ }
53
+ export interface PersistenceConfigSources {
54
+ configFile: string | null;
55
+ envOverrides: string[];
56
+ }
57
+ /**
58
+ * Load and validate the persistence config from (in order, last-write-wins):
59
+ * 1. Built-in defaults (backend=sqlite, default retention/dedup).
60
+ * 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
61
+ * 3. Legacy env vars (with deprecation warning).
62
+ *
63
+ * Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
64
+ */
65
+ export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;