llm-cli-gateway 1.5.4 → 1.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +79 -0
- package/README.md +249 -9
- package/dist/async-job-manager.d.ts +8 -0
- package/dist/async-job-manager.js +31 -10
- package/dist/config.d.ts +30 -0
- package/dist/config.js +167 -0
- package/dist/entrypoint-url.d.ts +1 -0
- package/dist/entrypoint-url.js +5 -0
- package/dist/executor.d.ts +7 -1
- package/dist/executor.js +50 -15
- package/dist/index.d.ts +3 -0
- package/dist/index.js +764 -675
- package/dist/job-store.d.ts +118 -2
- package/dist/job-store.js +176 -5
- package/dist/session-manager-pg.d.ts +3 -2
- package/dist/session-manager-pg.js +16 -11
- package/dist/upstream-contracts.d.ts +62 -0
- package/dist/upstream-contracts.js +620 -0
- package/package.json +15 -7
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,85 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the llm-cli-gateway project.
|
|
4
4
|
|
|
5
|
+
## [1.5.14] - 2026-05-24
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Remove the Redis Lua `eval` lock-release path from production source and replace it with Redis `WATCH`/`MULTI` compare-and-delete semantics.
|
|
10
|
+
- Add exact direct production dependencies for `content-type@1.0.5` and `type-is@2.0.1` so packed consumer installs do not resolve the Socket-flagged `content-type@2.0.0` / `type-is@2.1.0` versions.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Add `npm run security:audit` as a CI/release gate covering `npm audit --omit=dev`, production source dynamic-execution scanning, blocked dependency-version checks, and a packed consumer install policy check.
|
|
15
|
+
|
|
16
|
+
## [1.5.13] - 2026-05-24
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Report missing provider CLI launches as a clear command-not-found error instead of leaking Windows/libuv codes such as `-4058`.
|
|
21
|
+
- Preserve async provider launch errors in job stderr/result output so sync MCP tools can return actionable setup guidance.
|
|
22
|
+
- Replace `irm | iex` Windows install guidance and generated release manifest commands with direct binary download plus SHA256 verification.
|
|
23
|
+
|
|
24
|
+
## [1.5.12] - 2026-05-24
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Stop detaching provider CLI processes on Windows so `ask_model` and async requests do not flash visible cmd/conhost windows.
|
|
29
|
+
- Use hidden Windows process creation for the bootstrapper's managed Node gateway process and status checks.
|
|
30
|
+
- Keep Windows process cleanup by killing provider process trees with hidden `taskkill.exe` instead of Unix process-group signals.
|
|
31
|
+
|
|
32
|
+
## [1.5.11] - 2026-05-24
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Install a stable Windows `llm-cli-gateway.exe` command alongside the versioned bootstrapper and add the install directory to the user PATH.
|
|
37
|
+
- Make the Windows one-command installer stop any running gateway before replacing the managed bundle, then start and doctor through the stable command.
|
|
38
|
+
- Fix bootstrapper `status` and `stop` behavior on Windows so they do not depend on Unix-style PID probing.
|
|
39
|
+
|
|
40
|
+
## [1.5.10] - 2026-05-24
|
|
41
|
+
|
|
42
|
+
### Fixed
|
|
43
|
+
|
|
44
|
+
- Hide Windows console windows when the gateway spawns provider CLIs for synchronous and asynchronous requests.
|
|
45
|
+
|
|
46
|
+
## [1.5.9] - 2026-05-24
|
|
47
|
+
|
|
48
|
+
### Fixed
|
|
49
|
+
|
|
50
|
+
- Fix the Node entrypoint direct-run guard on Windows by using `pathToFileURL(realpathSync(...))` instead of constructing a POSIX-style file URL manually.
|
|
51
|
+
- Make the Windows one-command installer stop when bootstrapper commands fail by checking native process exit codes.
|
|
52
|
+
|
|
53
|
+
## [1.5.8] - 2026-05-24
|
|
54
|
+
|
|
55
|
+
### Fixed
|
|
56
|
+
|
|
57
|
+
- Make `start` wait for the local HTTP health endpoint before reporting success.
|
|
58
|
+
- Write gateway stdout/stderr to local log files so startup failures are diagnosable instead of returning a misleading PID.
|
|
59
|
+
|
|
60
|
+
## [1.5.7] - 2026-05-24
|
|
61
|
+
|
|
62
|
+
### Fixed
|
|
63
|
+
|
|
64
|
+
- Add a release-pinned `install-windows.ps1` asset so Windows users can install with one PowerShell command while still verifying the downloaded bootstrapper and platform bundle against `SHA256SUMS`.
|
|
65
|
+
- Add the Windows one-liner to `release-manifest.json` and upload the installer script as part of the desktop release workflow.
|
|
66
|
+
|
|
67
|
+
## [1.5.6] - 2026-05-24
|
|
68
|
+
|
|
69
|
+
### Fixed
|
|
70
|
+
|
|
71
|
+
- Replace the host-Node installer path with platform-specific verified bundles that include the compiled gateway, production dependencies, setup assets, and a managed Node runtime.
|
|
72
|
+
- Make the bootstrapper start the managed runtime from the installed bundle and require `RVWR_ALLOW_HOST_NODE=1` for the developer host-Node fallback.
|
|
73
|
+
- Update release packaging metadata and docs so Windows/macOS/Linux install instructions use `llm-cli-gateway-bundle-<version>-<os>-<arch>.tar.gz`.
|
|
74
|
+
- Update production dependencies (`@modelcontextprotocol/sdk`, `better-sqlite3`, and transitive Hono/AJV packages) so `npm audit --omit=dev` reports zero vulnerabilities while pinning `type-is` and `content-type` away from Socket-flagged latest releases.
|
|
75
|
+
|
|
76
|
+
## [1.5.5] - 2026-05-24
|
|
77
|
+
|
|
78
|
+
### Fixed
|
|
79
|
+
|
|
80
|
+
- Build desktop installer binaries on local self-hosted Linux, Windows, and macOS runners, then publish combined release metadata from the Linux packaging job.
|
|
81
|
+
- Make `installer/build-release.sh` default to the host target for local runs, with `--all-targets` / `RVWR_RELEASE_ALL_TARGETS=1` reserved for local full-matrix testing.
|
|
82
|
+
- Package setup UI/provider assets into the verified gateway bundle and let the setup UI resolve installed bundle assets from the managed gateway directory.
|
|
83
|
+
|
|
5
84
|
## [1.5.4] - 2026-05-19
|
|
6
85
|
|
|
7
86
|
### Fixed
|
package/README.md
CHANGED
|
@@ -20,7 +20,7 @@ Current personal-appliance artifacts include:
|
|
|
20
20
|
- Streamable HTTP startup: `LLM_GATEWAY_AUTH_TOKEN=<token> npm run start:http`
|
|
21
21
|
- Machine-readable diagnostics: `npm run doctor`
|
|
22
22
|
- Go bootstrapper scaffold: `installer/` with `setup`, `doctor --json`, `start`, `stop`, `status`, `repair`, `upgrade`, `uninstall`, `print-client-config`, and verified bundle download commands.
|
|
23
|
-
- Release packaging:
|
|
23
|
+
- Release packaging: the release workflow builds Linux binaries on the local self-hosted runner, builds Windows/macOS binaries on GitHub-hosted runners, then publishes checksummed platform bundles with the gateway, production dependencies, and a managed Node runtime; see [installer/packaging/README.md](installer/packaging/README.md).
|
|
24
24
|
- Docker Compose fallback: [docker-compose.personal.yml](docker-compose.personal.yml) + [Dockerfile.personal](Dockerfile.personal) for users who already manage containers.
|
|
25
25
|
- Local setup UI artifact: [setup/ui/index.html](setup/ui/index.html)
|
|
26
26
|
- Provider setup snippets: [setup/providers/](setup/providers/)
|
|
@@ -28,12 +28,35 @@ Current personal-appliance artifacts include:
|
|
|
28
28
|
|
|
29
29
|
### Install / Upgrade / Uninstall (single binary)
|
|
30
30
|
|
|
31
|
+
Windows PowerShell:
|
|
32
|
+
|
|
33
|
+
```powershell
|
|
34
|
+
$Version = '<version>'
|
|
35
|
+
$Base = "https://github.com/verivus-oss/llm-cli-gateway/releases/download/v$Version"
|
|
36
|
+
$InstallDir = Join-Path (Join-Path $env:LOCALAPPDATA 'Programs') 'llm-cli-gateway'
|
|
37
|
+
$Exe = Join-Path $InstallDir 'llm-cli-gateway.exe'
|
|
38
|
+
New-Item -ItemType Directory -Force $InstallDir | Out-Null
|
|
39
|
+
Invoke-WebRequest -UseBasicParsing "$Base/llm-cli-gateway-$Version-windows-amd64.exe" -OutFile $Exe
|
|
40
|
+
$env:RVWR_GATEWAY_BUNDLE_URL = "$Base/llm-cli-gateway-bundle-$Version-windows-amd64.tar.gz"
|
|
41
|
+
$env:RVWR_GATEWAY_BUNDLE_SHA256 = '<bundle-sha256-from-SHA256SUMS>'
|
|
42
|
+
& $Exe setup
|
|
43
|
+
& $Exe stop
|
|
44
|
+
& $Exe install-bundle
|
|
45
|
+
& $Exe start
|
|
46
|
+
& $Exe status
|
|
47
|
+
& $Exe doctor
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The Windows installer keeps a stable `llm-cli-gateway.exe` command in
|
|
51
|
+
`%LOCALAPPDATA%\Programs\llm-cli-gateway` and adds that directory to the user
|
|
52
|
+
PATH. Do not script against release-versioned exe names after install.
|
|
53
|
+
|
|
31
54
|
```bash
|
|
32
55
|
# After downloading the binary that matches your OS/arch from a release:
|
|
33
56
|
sha256sum --check SHA256SUMS # verify before run (or `shasum -a 256 --check` on macOS)
|
|
34
57
|
chmod +x llm-cli-gateway-<ver>-<os>-<arch>
|
|
35
58
|
./llm-cli-gateway-<ver>-<os>-<arch> setup
|
|
36
|
-
./llm-cli-gateway-<ver>-<os>-<arch> install-bundle # uses
|
|
59
|
+
./llm-cli-gateway-<ver>-<os>-<arch> install-bundle # uses the platform bundle URL/SHA256
|
|
37
60
|
./llm-cli-gateway-<ver>-<os>-<arch> start
|
|
38
61
|
./llm-cli-gateway-<ver>-<os>-<arch> doctor
|
|
39
62
|
|
|
@@ -340,17 +363,233 @@ Execute a Grok CLI (xAI) request with session support.
|
|
|
340
363
|
|
|
341
364
|
#### Durable job results & automatic dedup
|
|
342
365
|
|
|
343
|
-
Every async job is persisted to a
|
|
366
|
+
Every async job is persisted to a job store as it transitions through running → completed/failed/canceled. This makes the gateway a durable collection layer:
|
|
344
367
|
|
|
345
368
|
- **Re-issuing a request is safe.** Identical `*_request` / `*_request_async` calls within the dedup window (default 1 hour) short-circuit onto the existing running or completed job — the caller gets back the same job ID instead of starting a duplicate run. This directly fixes the "agent times out polling, re-issues, and the whole job starts over" failure mode.
|
|
346
369
|
- **`llm_job_status` and `llm_job_result` work across gateway restarts.** Job rows live for 30 days by default; callers can fetch results long after the in-memory cache has evicted them.
|
|
347
370
|
- **Jobs running at shutdown are marked `orphaned`** on the next gateway boot (the detached child can't be reattached to). Their captured partial output remains readable.
|
|
348
371
|
- **Pass `forceRefresh: true`** on any request tool to bypass dedup and force a fresh CLI run.
|
|
349
372
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
373
|
+
##### Persistence configuration
|
|
374
|
+
|
|
375
|
+
The job-store backend is configured by `~/.llm-cli-gateway/config.toml` (override with `LLM_GATEWAY_CONFIG=/path/to/config.toml`). Example:
|
|
376
|
+
|
|
377
|
+
```toml
|
|
378
|
+
[persistence]
|
|
379
|
+
backend = "sqlite" # "sqlite" | "memory" | "postgres" | "none"
|
|
380
|
+
path = "~/.llm-cli-gateway/logs.db" # for sqlite
|
|
381
|
+
# dsn = "postgresql://user:pw@host/db" # for postgres (interface only — impl not yet shipped)
|
|
382
|
+
retentionDays = 30
|
|
383
|
+
dedupWindowMs = 3600000
|
|
384
|
+
acknowledgeEphemeral = false # required to enable async tools with memory backend
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
Backends:
|
|
388
|
+
- **`sqlite`** (default) — durable, file-backed. Safe for single-instance deployments.
|
|
389
|
+
- **`memory`** — in-process Map. Lost on gateway exit. Requires `acknowledgeEphemeral = true` to be loaded. Suitable for tests and ephemeral CI gateways.
|
|
390
|
+
- **`postgres`** — interface only, implementation not yet shipped. Selecting this backend throws at startup.
|
|
391
|
+
- **`none`** — no store. **`*_request_async`, `llm_job_status`, `llm_job_result`, and `llm_job_cancel` are NOT registered on the gateway.** This is a structural invariant: agents that try to call async tools against a gateway with `backend = "none"` get a clean "tool not found" at connect time instead of silent in-memory loss after the 1-hour TTL. Use `llm_process_health` to inspect the resolved persistence state programmatically.
|
|
392
|
+
|
|
393
|
+
Legacy environment variables (deprecated; emit a warning at startup):
|
|
394
|
+
- `LLM_GATEWAY_LOGS_DB` / `LLM_GATEWAY_JOBS_DB` — `none` selects `backend = "none"`; any other value selects `backend = "sqlite"` with that path.
|
|
395
|
+
- `LLM_GATEWAY_JOB_RETENTION_DAYS` — overrides `retentionDays`.
|
|
396
|
+
- `LLM_GATEWAY_DEDUP_WINDOW_MS` — overrides `dedupWindowMs`.
|
|
397
|
+
- `LLM_GATEWAY_ACKNOWLEDGE_EPHEMERAL` — `1`/`true`/`yes` sets `acknowledgeEphemeral = true`.
|
|
398
|
+
|
|
399
|
+
##### Per-project isolation
|
|
400
|
+
|
|
401
|
+
By default, **all gateway data is global per user**, not per project. With no overrides, every Claude Code window — across every repo — spawns its own gateway subprocess but they all read and write the same files:
|
|
402
|
+
|
|
403
|
+
- `~/.llm-cli-gateway/logs.db` (async jobs + flight recorder)
|
|
404
|
+
- `~/.llm-cli-gateway/sessions.json` (CLI sessions)
|
|
405
|
+
- `~/.llm-cli-gateway/config.toml` (resolved config)
|
|
406
|
+
|
|
407
|
+
This is usually what you want — `session_list` from repo A shows sessions from repo B, an async job started in window A can be polled from window B, and the 1-hour dedup window catches re-issues across windows. SQLite WAL mode makes concurrent access from multiple gateway subprocesses safe.
|
|
408
|
+
|
|
409
|
+
If you instead want **per-project isolation** (e.g. unrelated repos shouldn't share session lists or risk false dedup hits), point each project at its own config file. In `.claude/settings.local.json` for the project:
|
|
410
|
+
|
|
411
|
+
```json
|
|
412
|
+
{
|
|
413
|
+
"mcpServers": {
|
|
414
|
+
"llm-gateway": {
|
|
415
|
+
"env": {
|
|
416
|
+
"LLM_GATEWAY_CONFIG": "${workspaceFolder}/.gateway/config.toml"
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
…and put a per-project `config.toml` in the repo:
|
|
424
|
+
|
|
425
|
+
```toml
|
|
426
|
+
[persistence]
|
|
427
|
+
backend = "sqlite"
|
|
428
|
+
path = "/srv/repos/.../my-repo/.gateway/logs.db"
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
Now every gateway subprocess spawned for *this* repo's Claude Code window reads its own config and writes to its own SQLite file; sessions, jobs, and dedup state are scoped to the repo. Other repos keep using the global default. `llm_process_health.persistence.sources.configFile` lets an agent confirm which config it's actually running under.
|
|
432
|
+
|
|
433
|
+
###### Agent-executable spec (DAG-TOML)
|
|
434
|
+
|
|
435
|
+
If you want an LLM agent to perform this setup deterministically — rather than reading the prose above and guessing — copy the following DAG-TOML into the repo (e.g. `docs/planning/per-project-gateway-isolation.toml`) and point your agent at it. The schema is [`agent-assurance`](https://github.com/verivus-oss/agent-assurance/blob/main/SPEC.md) `template_kind = "implementation-dag"`. The agent MUST execute units in layer order, must not skip the verification unit, and must treat any failed gate as blocking.
|
|
436
|
+
|
|
437
|
+
```toml
|
|
438
|
+
[meta]
|
|
439
|
+
schema_version = "1.0.0"
|
|
440
|
+
template_kind = "implementation-dag"
|
|
441
|
+
docs = "https://github.com/verivus-oss/agent-assurance/blob/main/SPEC.md"
|
|
442
|
+
confidentiality = "public"
|
|
443
|
+
title = "Per-project llm-cli-gateway persistence isolation"
|
|
444
|
+
spec = "https://github.com/verivusai-labs/llm-cli-gateway#per-project-isolation"
|
|
445
|
+
created = "YYYY-MM-DD"
|
|
446
|
+
total_units = 5
|
|
447
|
+
tier1_units = ["U01","U02","U03","U04","U05"]
|
|
448
|
+
tier2_units = []
|
|
449
|
+
tier3_units = []
|
|
450
|
+
|
|
451
|
+
# ============================================================================
|
|
452
|
+
# [policy.agent] — persona for the agent performing the configuration.
|
|
453
|
+
# ============================================================================
|
|
454
|
+
|
|
455
|
+
[policy.agent]
|
|
456
|
+
name = "Gateway Persistence Isolator"
|
|
457
|
+
role = "Configuration Engineer"
|
|
458
|
+
purpose = "Configure the llm-cli-gateway MCP server so its async job store, sessions, dedup state, and flight recorder are scoped to THIS repository instead of the per-user default at ~/.llm-cli-gateway/."
|
|
459
|
+
validation_type = "Structural + Runtime Verification"
|
|
460
|
+
workflow_initiator = false
|
|
461
|
+
description = "Writes a repo-local config.toml, registers an LLM_GATEWAY_CONFIG override in .claude/settings.local.json, restarts the MCP server, and confirms via llm_process_health that the gateway is now reading the repo-local config and writing to the repo-local SQLite path."
|
|
462
|
+
|
|
463
|
+
[policy.agent.orchestration]
|
|
464
|
+
consumes_events = ["PerProjectIsolationRequested"]
|
|
465
|
+
produces_events = ["PerProjectIsolationComplete"]
|
|
466
|
+
|
|
467
|
+
[policy.agent.responsibilities]
|
|
468
|
+
items = [
|
|
469
|
+
"Create the repo-local gateway data directory and add it to .gitignore.",
|
|
470
|
+
"Write a config.toml that pins backend=sqlite to a repo-local path.",
|
|
471
|
+
"Register the LLM_GATEWAY_CONFIG env override in .claude/settings.local.json (NOT .mcp.json — that file is committed and shared).",
|
|
472
|
+
"Trigger an MCP server reconnect.",
|
|
473
|
+
"Verify via llm_process_health that the resolved configFile and dbPath are the repo-local values.",
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
# ============================================================================
|
|
477
|
+
# [policy.instance] — concrete paths the agent fills in for THIS repo.
|
|
478
|
+
# Agent MUST replace <REPO_ABS_PATH> with the absolute path to the repo
|
|
479
|
+
# before emitting any artefact. Relative paths in config.toml MUST be
|
|
480
|
+
# expanded to absolute — the gateway does not re-resolve them per cwd.
|
|
481
|
+
# ============================================================================
|
|
482
|
+
|
|
483
|
+
[policy.instance]
|
|
484
|
+
repo_abs_path = "<REPO_ABS_PATH>" # e.g. /srv/repos/me/my-project
|
|
485
|
+
gateway_data_dir_relative = ".gateway" # repo-relative directory
|
|
486
|
+
config_toml_relative = ".gateway/config.toml"
|
|
487
|
+
sqlite_db_relative = ".gateway/logs.db"
|
|
488
|
+
claude_local_settings_relative = ".claude/settings.local.json"
|
|
489
|
+
gitignore_relative = ".gitignore"
|
|
490
|
+
mcp_server_name = "llm-gateway" # must match the entry in .mcp.json
|
|
491
|
+
|
|
492
|
+
# ============================================================================
|
|
493
|
+
# [policy.gates] — blocking checks. Any failure stops the workflow.
|
|
494
|
+
# ============================================================================
|
|
495
|
+
|
|
496
|
+
[policy.gates]
|
|
497
|
+
gate_repo_abs_path_resolved = "policy.instance.repo_abs_path must NOT be the literal string '<REPO_ABS_PATH>' when U01 starts."
|
|
498
|
+
gate_config_is_committed = "policy.instance.config_toml_relative MAY be committed. policy.instance.claude_local_settings_relative MUST NOT be committed (it is per-developer). Agent MUST verify .gitignore covers .claude/settings.local.json if absent."
|
|
499
|
+
gate_no_legacy_env_leak = "Agent MUST grep the shell init files for LLM_GATEWAY_LOGS_DB / LLM_GATEWAY_JOBS_DB. If set, the legacy env var will override the new config and the deprecation warning will fire at every gateway boot. The agent reports this as a finding and asks the operator to unset before proceeding."
|
|
500
|
+
gate_health_confirms_isolation = "U05 MUST observe llm_process_health.persistence.sources.configFile == policy.instance.repo_abs_path + '/' + policy.instance.config_toml_relative AND llm_process_health.persistence.path == policy.instance.repo_abs_path + '/' + policy.instance.sqlite_db_relative. Anything else means the override did not take effect."
|
|
501
|
+
|
|
502
|
+
# ============================================================================
|
|
503
|
+
# [policy.evidence] — what each unit must emit so the work is auditable.
|
|
504
|
+
# ============================================================================
|
|
505
|
+
|
|
506
|
+
[policy.evidence]
|
|
507
|
+
per_unit_required_fields = [
|
|
508
|
+
"unit_id", # U01..U05
|
|
509
|
+
"status", # "completed" | "failed"
|
|
510
|
+
"artefact_paths", # files written / modified
|
|
511
|
+
"stdout_tail", # last 20 lines of any command output
|
|
512
|
+
"verification_quote", # for U05, the verbatim llm_process_health.persistence block
|
|
513
|
+
]
|
|
514
|
+
findings_required_fields = [
|
|
515
|
+
"gate_id", # which gate failed
|
|
516
|
+
"observed",
|
|
517
|
+
"expected",
|
|
518
|
+
"remediation",
|
|
519
|
+
]
|
|
520
|
+
|
|
521
|
+
# ============================================================================
|
|
522
|
+
# Units. Execute in layer order. U01..U03 modify the working tree; U04
|
|
523
|
+
# triggers a reconnect; U05 is the verification gate that decides success.
|
|
524
|
+
# ============================================================================
|
|
525
|
+
|
|
526
|
+
[units.U01]
|
|
527
|
+
name = "create-repo-local-data-dir"
|
|
528
|
+
summary = "mkdir -p <repo>/.gateway and append /.gateway/ to .gitignore (creating .gitignore if missing). The gateway will write logs.db, logs.db-wal, logs.db-shm here — none should be committed."
|
|
529
|
+
layer = 0
|
|
530
|
+
tier = 1
|
|
531
|
+
status = "pending"
|
|
532
|
+
depends_on = []
|
|
533
|
+
blocks = ["U02"]
|
|
534
|
+
estimated_loc = 5
|
|
535
|
+
files_modify = [".gitignore"]
|
|
536
|
+
produces = ["ART:gateway-data-dir"]
|
|
537
|
+
consumes = []
|
|
538
|
+
|
|
539
|
+
[units.U02]
|
|
540
|
+
name = "write-config-toml"
|
|
541
|
+
summary = "Write <repo>/.gateway/config.toml with [persistence] backend='sqlite' and path=<absolute-path-to-repo>/.gateway/logs.db. Path MUST be absolute. Do NOT use ~ — the gateway expands ~ but [persistence].path is read literally if not prefixed with ~/, and Claude Code may launch the gateway with a HOME that surprises you."
|
|
542
|
+
layer = 1
|
|
543
|
+
tier = 1
|
|
544
|
+
status = "pending"
|
|
545
|
+
depends_on = ["U01"]
|
|
546
|
+
blocks = ["U03"]
|
|
547
|
+
estimated_loc = 10
|
|
548
|
+
files_modify = [".gateway/config.toml"]
|
|
549
|
+
produces = ["ART:gateway-config"]
|
|
550
|
+
consumes = ["ART:gateway-data-dir"]
|
|
551
|
+
|
|
552
|
+
[units.U03]
|
|
553
|
+
name = "register-llm-gateway-config-env-in-claude-local-settings"
|
|
554
|
+
summary = "Add (or merge) an mcpServers.<mcp_server_name>.env entry in .claude/settings.local.json that sets LLM_GATEWAY_CONFIG to the absolute path of .gateway/config.toml. Do NOT modify .mcp.json — that file is committed and the path would be wrong for every other developer. If .claude/settings.local.json already has an mcpServers.<mcp_server_name> entry, the agent MUST merge into the existing env map (preserving other keys), not overwrite the whole entry."
|
|
555
|
+
layer = 2
|
|
556
|
+
tier = 1
|
|
557
|
+
status = "pending"
|
|
558
|
+
depends_on = ["U02"]
|
|
559
|
+
blocks = ["U04"]
|
|
560
|
+
estimated_loc = 20
|
|
561
|
+
files_modify = [".claude/settings.local.json"]
|
|
562
|
+
produces = ["ART:claude-local-settings"]
|
|
563
|
+
consumes = ["ART:gateway-config"]
|
|
564
|
+
|
|
565
|
+
[units.U04]
|
|
566
|
+
name = "trigger-mcp-reconnect"
|
|
567
|
+
summary = "Ask the operator to run /mcp in Claude Code (or restart Claude Code) so the gateway subprocess is re-spawned under the new env. The agent cannot do this itself — MCP server lifecycle is owned by the host."
|
|
568
|
+
layer = 3
|
|
569
|
+
tier = 1
|
|
570
|
+
status = "pending"
|
|
571
|
+
depends_on = ["U03"]
|
|
572
|
+
blocks = ["U05"]
|
|
573
|
+
estimated_loc = 0
|
|
574
|
+
files_modify = []
|
|
575
|
+
produces = ["OUT:mcp-reconnected"]
|
|
576
|
+
consumes = ["ART:claude-local-settings"]
|
|
577
|
+
|
|
578
|
+
[units.U05]
|
|
579
|
+
name = "verify-via-llm-process-health"
|
|
580
|
+
summary = "Call llm_process_health and assert the returned persistence block satisfies policy.gates.gate_health_confirms_isolation. Quote the verbatim persistence block in evidence. If the assertion fails, the agent MUST NOT mark the workflow complete — it must emit a finding under policy.evidence.findings_required_fields, naming the observed vs. expected configFile/path, and stop."
|
|
581
|
+
layer = 4
|
|
582
|
+
tier = 1
|
|
583
|
+
status = "pending"
|
|
584
|
+
depends_on = ["U04"]
|
|
585
|
+
blocks = []
|
|
586
|
+
estimated_loc = 5
|
|
587
|
+
files_modify = []
|
|
588
|
+
produces = ["ART:isolation-verification","OUT:per-project-isolation-complete"]
|
|
589
|
+
consumes = ["OUT:mcp-reconnected"]
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
**Why this matters for agents:** the gateway has multiple configuration surfaces (TOML file, env-var overrides, two different MCP settings files) and one easy mistake — editing the committed `.mcp.json` instead of the local-only `.claude/settings.local.json` — will silently break the per-project scope for every other developer on the repo. The DAG above encodes the correct sequence, the verification gate, and the failure modes explicitly so an agent can execute it without inference.
|
|
354
593
|
|
|
355
594
|
##### `mistral_request`
|
|
356
595
|
Run a Mistral Vibe agentic coding request. Like `grok_request` in shape, but with Vibe's specific surface:
|
|
@@ -582,11 +821,12 @@ await callTool("session_delete", {
|
|
|
582
821
|
```bash
|
|
583
822
|
LLM_GATEWAY_APPROVAL_POLICY=strict node dist/index.js
|
|
584
823
|
```
|
|
585
|
-
- `
|
|
824
|
+
- `LLM_GATEWAY_CONFIG`: Path to the gateway TOML config (default: `~/.llm-cli-gateway/config.toml`). See **Persistence configuration** above for the `[persistence]` schema.
|
|
825
|
+
- `LLM_GATEWAY_LOGS_DB`: **Deprecated** — overrides `[persistence].path` and selects `backend = "sqlite"` (or `backend = "none"` when set to `none`). Emits a deprecation warning at startup; migrate to `config.toml`.
|
|
586
826
|
```bash
|
|
587
827
|
# Custom path
|
|
588
828
|
LLM_GATEWAY_LOGS_DB=/var/log/gateway/logs.db node dist/index.js
|
|
589
|
-
# Disable
|
|
829
|
+
# Disable durable persistence (also disables *_request_async tools)
|
|
590
830
|
LLM_GATEWAY_LOGS_DB=none node dist/index.js
|
|
591
831
|
```
|
|
592
832
|
|
|
@@ -61,6 +61,14 @@ export declare class AsyncJobManager {
|
|
|
61
61
|
private processMonitor;
|
|
62
62
|
private store;
|
|
63
63
|
constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null);
|
|
64
|
+
/**
|
|
65
|
+
* True iff a durable (or memory) job store is attached. The MCP-tool
|
|
66
|
+
* registration layer ANDs this with persistence.asyncJobsEnabled when
|
|
67
|
+
* deciding whether to register the *_request_async / llm_job_* tools.
|
|
68
|
+
* Without a store, async tools must not be registered, otherwise we
|
|
69
|
+
* re-open the silent in-memory loss path the structural invariant closes.
|
|
70
|
+
*/
|
|
71
|
+
hasStore(): boolean;
|
|
64
72
|
private emitMetrics;
|
|
65
73
|
private evictCompletedJobs;
|
|
66
74
|
/**
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { spawn } from "child_process";
|
|
2
1
|
import { randomUUID } from "crypto";
|
|
3
|
-
import { getExtendedPath, killProcessGroup,
|
|
2
|
+
import { getExtendedPath, killProcessGroup, spawnCliProcess, unregisterProcessGroup, } from "./executor.js";
|
|
4
3
|
import { noopLogger } from "./logger.js";
|
|
5
4
|
import { ProcessMonitor } from "./process-monitor.js";
|
|
6
5
|
import { computeRequestKey } from "./job-store.js";
|
|
@@ -8,6 +7,19 @@ const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
|
|
|
8
7
|
const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
|
|
9
8
|
const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
|
|
10
9
|
const OUTPUT_FLUSH_INTERVAL_MS = 1000; // Throttle DB writes for streaming stdout/stderr
|
|
10
|
+
function describeProcessLaunchError(cli, error) {
|
|
11
|
+
const code = error.code;
|
|
12
|
+
if (code === "ENOENT") {
|
|
13
|
+
return {
|
|
14
|
+
exitCode: 127,
|
|
15
|
+
message: `The '${cli}' command was not found. Install the ${cli} CLI and make sure it is on PATH. (${error.message})`,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
exitCode: 126,
|
|
20
|
+
message: `Failed to launch ${cli} CLI: ${error.message}`,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
11
23
|
/**
|
|
12
24
|
* U22 fix: deterministic canonicalisation of an env-var map for the dedup key.
|
|
13
25
|
* Returns "" when env is undefined or empty (preserves dedup key continuity for
|
|
@@ -62,6 +74,16 @@ export class AsyncJobManager {
|
|
|
62
74
|
this.evictionTimer.unref();
|
|
63
75
|
}
|
|
64
76
|
}
|
|
77
|
+
/**
|
|
78
|
+
* True iff a durable (or memory) job store is attached. The MCP-tool
|
|
79
|
+
* registration layer ANDs this with persistence.asyncJobsEnabled when
|
|
80
|
+
* deciding whether to register the *_request_async / llm_job_* tools.
|
|
81
|
+
* Without a store, async tools must not be registered, otherwise we
|
|
82
|
+
* re-open the silent in-memory loss path the structural invariant closes.
|
|
83
|
+
*/
|
|
84
|
+
hasStore() {
|
|
85
|
+
return this.store !== null;
|
|
86
|
+
}
|
|
65
87
|
emitMetrics(job) {
|
|
66
88
|
if (job.metricsRecorded)
|
|
67
89
|
return;
|
|
@@ -335,15 +357,11 @@ export class AsyncJobManager {
|
|
|
335
357
|
// Mistral Vibe ships as the `vibe` binary; the gateway uses `mistral` as the
|
|
336
358
|
// provider key but spawns `vibe` on the shell.
|
|
337
359
|
const command = cli === "mistral" ? "vibe" : cli;
|
|
338
|
-
const child =
|
|
360
|
+
const child = spawnCliProcess(command, args, {
|
|
339
361
|
cwd,
|
|
340
|
-
detached: true,
|
|
341
362
|
stdio: ["ignore", "pipe", "pipe"],
|
|
342
363
|
env: { ...process.env, PATH: getExtendedPath(), ...(extraEnv ?? {}) },
|
|
343
364
|
});
|
|
344
|
-
if (child.pid)
|
|
345
|
-
registerProcessGroup(child.pid);
|
|
346
|
-
child.unref();
|
|
347
365
|
// Single cleanup flag to prevent double-unregister
|
|
348
366
|
let groupCleaned = false;
|
|
349
367
|
const cleanupGroup = () => {
|
|
@@ -436,10 +454,13 @@ export class AsyncJobManager {
|
|
|
436
454
|
job.clearIdleTimer?.();
|
|
437
455
|
job.cleanupGroup?.();
|
|
438
456
|
if (job.status === "running") {
|
|
457
|
+
const launchError = describeProcessLaunchError(cli, error);
|
|
439
458
|
job.status = job.canceled ? "canceled" : "failed";
|
|
440
|
-
job.
|
|
459
|
+
job.exitCode = launchError.exitCode;
|
|
460
|
+
job.error = launchError.message;
|
|
461
|
+
job.stderr = job.stderr ? `${job.stderr}\n${launchError.message}` : launchError.message;
|
|
441
462
|
job.finishedAt = new Date().toISOString();
|
|
442
|
-
this.logger.error(`Job ${id} error: ${
|
|
463
|
+
this.logger.error(`Job ${id} error: ${launchError.message}`, { correlationId });
|
|
443
464
|
this.emitMetrics(job);
|
|
444
465
|
this.persistComplete(job);
|
|
445
466
|
this.fireOnComplete(job);
|
|
@@ -453,7 +474,7 @@ export class AsyncJobManager {
|
|
|
453
474
|
job.cleanupGroup?.();
|
|
454
475
|
}
|
|
455
476
|
if (job.status !== "running") {
|
|
456
|
-
job.exitCode = code ??
|
|
477
|
+
job.exitCode = job.exitCode ?? code ?? null;
|
|
457
478
|
if (!job.finishedAt) {
|
|
458
479
|
job.finishedAt = new Date().toISOString();
|
|
459
480
|
}
|
package/dist/config.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Logger } from "./logger.js";
|
|
1
2
|
export interface CacheTtl {
|
|
2
3
|
session: number;
|
|
3
4
|
activeSession: number;
|
|
@@ -33,3 +34,32 @@ export interface Config {
|
|
|
33
34
|
* Database and Redis fields are populated only when both env vars are set.
|
|
34
35
|
*/
|
|
35
36
|
export declare function loadConfig(): Config;
|
|
37
|
+
export declare const PERSISTENCE_BACKENDS: readonly ["sqlite", "postgres", "memory", "none"];
|
|
38
|
+
export type PersistenceBackend = (typeof PERSISTENCE_BACKENDS)[number];
|
|
39
|
+
export declare const DEFAULT_JOB_RETENTION_DAYS = 30;
|
|
40
|
+
export declare const DEFAULT_DEDUP_WINDOW_MS: number;
|
|
41
|
+
export interface PersistenceConfig {
|
|
42
|
+
backend: PersistenceBackend;
|
|
43
|
+
path: string | null;
|
|
44
|
+
dsn: string | null;
|
|
45
|
+
retentionDays: number;
|
|
46
|
+
dedupWindowMs: number;
|
|
47
|
+
acknowledgeEphemeral: boolean;
|
|
48
|
+
/** True iff async-job tools should be registered on the MCP server. */
|
|
49
|
+
asyncJobsEnabled: boolean;
|
|
50
|
+
/** Audit trail: which inputs (file, env vars) contributed to the resolved config. */
|
|
51
|
+
sources: PersistenceConfigSources;
|
|
52
|
+
}
|
|
53
|
+
export interface PersistenceConfigSources {
|
|
54
|
+
configFile: string | null;
|
|
55
|
+
envOverrides: string[];
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Load and validate the persistence config from (in order, last-write-wins):
|
|
59
|
+
* 1. Built-in defaults (backend=sqlite, default retention/dedup).
|
|
60
|
+
* 2. ~/.llm-cli-gateway/config.toml (or $LLM_GATEWAY_CONFIG).
|
|
61
|
+
* 3. Legacy env vars (with deprecation warning).
|
|
62
|
+
*
|
|
63
|
+
* Throws on incoherent configs (memory/none + asyncJobsEnabled without ack).
|
|
64
|
+
*/
|
|
65
|
+
export declare function loadPersistenceConfig(logger?: Logger): PersistenceConfig;
|