llm-cli-gateway 1.6.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +181 -0
- package/dist/async-job-manager.d.ts +70 -2
- package/dist/async-job-manager.js +166 -6
- package/dist/codex-json-parser.js +4 -1
- package/dist/index.d.ts +32 -0
- package/dist/index.js +152 -36
- package/dist/job-store.d.ts +43 -4
- package/dist/job-store.js +28 -2
- package/dist/mistral-meta-json-parser.d.ts +6 -0
- package/dist/mistral-meta-json-parser.js +175 -0
- package/dist/request-helpers.d.ts +14 -5
- package/dist/request-helpers.js +8 -5
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,187 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the llm-cli-gateway project.
|
|
4
4
|
|
|
5
|
+
## [1.8.0] - 2026-05-27 — Phase 4 openers (codex resume fix, mistral telemetry, headless trust flags)
|
|
6
|
+
|
|
7
|
+
Ships the first three slices of the Phase 4 provider-modernisation
|
|
8
|
+
backlog, one bug fix and two small features. Multi-LLM review surfaced
|
|
9
|
+
five additional bug classes during the cycle (path traversal, UUID→dir
|
|
10
|
+
resolution gap, sync usage ctx drop, retry-path flag drop, symlink
|
|
11
|
+
boundary bypass); all are addressed in the two follow-up fix commits.
|
|
12
|
+
|
|
13
|
+
### Fixed — Codex `--output-schema` + `-c/--config` on `exec resume`
|
|
14
|
+
|
|
15
|
+
- `prepareCodexRequest` previously dropped `outputSchema` and
|
|
16
|
+
`configOverrides` on the resume branch because the U26 audit assumed
|
|
17
|
+
`codex exec resume` rejected both flags. Live re-verification against
|
|
18
|
+
`codex exec resume --help` (codex-cli 0.133.0) confirms both ARE
|
|
19
|
+
accepted on resume; only `--search` remains resume-incompatible. The
|
|
20
|
+
resume branch now threads both fields through, reusing the existing
|
|
21
|
+
outputSchema temp-file materialisation + cleanup contract.
|
|
22
|
+
`CODEX_RESUME_FILTERED_FLAGS` no longer strips `--output-schema`.
|
|
23
|
+
|
|
24
|
+
### Added — Mistral Vibe `meta.json` usage / cost telemetry
|
|
25
|
+
|
|
26
|
+
- New `src/mistral-meta-json-parser.ts` reads
|
|
27
|
+
`~/.vibe/logs/session/session_<YYYYMMDD>_<HHMMSS>_<first8hex>/meta.json`
|
|
28
|
+
(the actual filename — an earlier TODO at `src/index.ts:750` said
|
|
29
|
+
`metadata.json`, which was incorrect). Maps `stats.session_prompt_tokens`,
|
|
30
|
+
`stats.session_completion_tokens`, and `stats.session_cost` onto the
|
|
31
|
+
gateway's `inputTokens`/`outputTokens`/`costUsd` flight-recorder
|
|
32
|
+
columns. Cache-token surfaces stay undefined — Vibe doesn't expose
|
|
33
|
+
them today.
|
|
34
|
+
- The gateway's mistral sessionId surface accepts the full UUID (to match
|
|
35
|
+
`vibe --resume <uuid>`), but Vibe persists telemetry under
|
|
36
|
+
`session_<ts>_<first8>` directories. The new resolver globs by the
|
|
37
|
+
leading 8-hex prefix and verifies each candidate's `session_id` field
|
|
38
|
+
before returning — required for every UUID input including
|
|
39
|
+
single-match cases, so two UUIDs sharing the leading 8 hex chars never
|
|
40
|
+
cross-attribute usage.
|
|
41
|
+
- `extractUsageAndCost` and `buildAsyncFlightRecorderHandoff` thread a
|
|
42
|
+
primitives-only `{ sessionId, home }` context so the AsyncJobRecord
|
|
43
|
+
retention stays O(constant). `buildCliResponse` passes the same ctx so
|
|
44
|
+
sync `mistral_request` resume calls populate structured usage in their
|
|
45
|
+
response (not just the flight-recorder row).
|
|
46
|
+
|
|
47
|
+
### Added — Headless trust-prompt bypass for Gemini + Mistral
|
|
48
|
+
|
|
49
|
+
- New optional `skipTrust?: boolean` field on `gemini_request` and
|
|
50
|
+
`gemini_request_async`, defaulting `false`. When set, emits
|
|
51
|
+
`--skip-trust` so fresh workspaces don't block headless invocations on
|
|
52
|
+
Gemini's interactive trust prompt.
|
|
53
|
+
- New optional `trust?: boolean` field on `mistral_request` and
|
|
54
|
+
`mistral_request_async`, defaulting `false`. When set, emits `--trust`
|
|
55
|
+
(per-invocation only, not persisted to `trusted_folders.toml`) so
|
|
56
|
+
fresh workspaces don't block headless Vibe runs. Preserved on the
|
|
57
|
+
stale-model recovery retry path so a fresh untrusted workspace can't
|
|
58
|
+
deadlock on the second attempt.
|
|
59
|
+
- Default `false` preserves existing prompt behaviour for legacy
|
|
60
|
+
callers.
|
|
61
|
+
|
|
62
|
+
### Security
|
|
63
|
+
|
|
64
|
+
- `parseVibeMetaJson` enforces a strict input charset (UUID-shape OR
|
|
65
|
+
`^session_\d{8}_\d{6}_[0-9a-f]{8}$` Vibe dir basename) before any
|
|
66
|
+
filesystem access.
|
|
67
|
+
- New `readInBase(realBase, candidate)` helper realpath-resolves both
|
|
68
|
+
ends and rejects targets whose final inode lives outside the session
|
|
69
|
+
log root. Both the resolver's disambiguation reads and the final
|
|
70
|
+
parser read route through it, so an in-tree symlink to an
|
|
71
|
+
out-of-tree directory (or symlinked meta.json) cannot leak file
|
|
72
|
+
contents outside `~/.vibe/logs/session/`.
|
|
73
|
+
- Test coverage: traversal inputs (`../`, absolute, control-char,
|
|
74
|
+
embedded `../`), single-candidate prefix-collision rejection,
|
|
75
|
+
symlink-to-outside-baseDir rejection.
|
|
76
|
+
|
|
77
|
+
## [1.7.0] - 2026-05-26 — cache-awareness slice 1.5 (async-path flight recorder + codex parser fix)
|
|
78
|
+
|
|
79
|
+
Closes the two telemetry gaps that v1.6.0 explicitly deferred: async-path
|
|
80
|
+
flight-recorder integration and Codex parser support for the actual
|
|
81
|
+
`cached_input_tokens` field the current Codex CLI emits. Both ship
|
|
82
|
+
together because they jointly close out `cache_state://*` completeness
|
|
83
|
+
for the async tools and the codex CLI.
|
|
84
|
+
|
|
85
|
+
### Added — async-path flight recorder writes
|
|
86
|
+
|
|
87
|
+
- `AsyncJobManager` now accepts a `FlightRecorderLike` constructor
|
|
88
|
+
dependency (defaults to `NoopFlightRecorder` for tests that don't
|
|
89
|
+
inject one). `StartJobOptions` extended with `writeFlightStart`,
|
|
90
|
+
`flightRecorderEntry`, and `extractUsage` — pure async tools
|
|
91
|
+
(`*_request_async`) pass `writeFlightStart: true` so the manager owns
|
|
92
|
+
the row. The legacy positional `startJob(...)` signature was extended
|
|
93
|
+
with trailing optional params so existing callers keep working.
|
|
94
|
+
- New private `writeFlightComplete` helper inside the manager fires on
|
|
95
|
+
every terminal-state code path (close handler, error handler, idle
|
|
96
|
+
timeout, output overflow, cancelJob, evictCompletedJobs dead-process
|
|
97
|
+
and exited-mismatch branches). Failure payload mirrors sync-helper
|
|
98
|
+
semantics: `response = stderr || stdout` on failure, `errorMessage`
|
|
99
|
+
falls back through override → `job.error` → `job.stderr` →
|
|
100
|
+
`"Exit code N"`. Single-shot guard set only on successful write so a
|
|
101
|
+
thrown `logComplete` can be retried by a later terminal callback.
|
|
102
|
+
- New public `armFlightCompleteForDeferral(jobId)` on AsyncJobManager.
|
|
103
|
+
Called by `awaitJobOrDefer` in `src/index.ts` immediately before
|
|
104
|
+
returning a `DeferredJobResponse` — this lets the sync handler keep
|
|
105
|
+
ownership of the rich-metadata `safeFlightComplete` write for
|
|
106
|
+
sync-inline completions, while still ensuring deferred-from-sync rows
|
|
107
|
+
get a terminal `logComplete` from the manager when the underlying job
|
|
108
|
+
finishes. Includes a race-mitigation immediate-write path if the job
|
|
109
|
+
already terminated before the arm signal landed.
|
|
110
|
+
- `JobStore.markOrphanedOnStartup()` return shape extended from `number`
|
|
111
|
+
to `{ count, orphaned: Array<{ id, correlationId, startedAt, stdout,
|
|
112
|
+
stderr, exitCode }> }` so the manager constructor can write FR
|
|
113
|
+
`logComplete` rows for previously orphaned jobs with proper audit data
|
|
114
|
+
(durationMs from `startedAt`, response from `stderr || stdout`,
|
|
115
|
+
errorMessage `"orphaned after gateway restart"`). `SqliteJobStore`
|
|
116
|
+
SELECTs the per-orphan fields before the orphan-flip UPDATE; no
|
|
117
|
+
transaction wrapper needed because gateway boot is single-threaded
|
|
118
|
+
before any new jobs can arrive. `MemoryJobStore` returns
|
|
119
|
+
`{ count: 0, orphaned: [] }` (in-process state can't be orphaned).
|
|
120
|
+
Breaking change to the `JobStore` interface; the `PostgresJobStore`
|
|
121
|
+
stub was updated to match (the impl is still not yet shipped).
|
|
122
|
+
- `cache_state://global`, `cache_state://session/{id}`, and
|
|
123
|
+
`cache_state://prefix/{hash}` aggregates now include async-job
|
|
124
|
+
activity. No query changes — `cache_state://*` already didn't filter
|
|
125
|
+
on `asyncJobId`, so the new rows participate naturally.
|
|
126
|
+
|
|
127
|
+
### Fixed — Codex parser accepts current CLI's cache-token field
|
|
128
|
+
|
|
129
|
+
- `src/codex-json-parser.ts` now reads `cached_input_tokens` (preferred,
|
|
130
|
+
what Codex CLI ≥0.133.0 emits) in addition to the legacy
|
|
131
|
+
`cache_read_input_tokens` and the bare `cache_read_tokens` fallback.
|
|
132
|
+
Live smoke-tested against Codex CLI on 2026-05-26 — see
|
|
133
|
+
`docs/personal-mcp/PROVIDER_CACHE_SURFACES.md` "Codex — field name
|
|
134
|
+
divergence" for the exact invocation. Cache hits on codex rows now
|
|
135
|
+
populate the FR's `cache_read_tokens` column.
|
|
136
|
+
|
|
137
|
+
### Known limitation — sync-deferred-dedup orphan rows
|
|
138
|
+
|
|
139
|
+
When a sync request dedup-hits an in-flight original job AND the sync
|
|
140
|
+
deadline expires before the original finishes, the dedup'd caller's
|
|
141
|
+
sync-side `logStart` row stays at `status='started'` forever. The
|
|
142
|
+
manager's `logComplete` writes to the ORIGINAL job's correlationId, not
|
|
143
|
+
the dedup'd caller's. This is a pre-existing limitation surfaced by the
|
|
144
|
+
slice's clearer accounting; it predates v1.7.0 and is not a regression.
|
|
145
|
+
A future slice can address it via per-request corrId fan-out.
|
|
146
|
+
|
|
147
|
+
### Cross-table asymmetry — `canceled` / `orphaned` jobs in the FR
|
|
148
|
+
|
|
149
|
+
`FlightLogResult.status` only carries `"completed" | "failed"`, so
|
|
150
|
+
canceled and orphaned async jobs are encoded as `"failed"` plus a
|
|
151
|
+
distinguishing `errorMessage`. The underlying `jobs` table in JobStore
|
|
152
|
+
retains the distinct `"canceled"` / `"orphaned"` statuses for
|
|
153
|
+
`getJobSnapshot` callers. External consumers of `~/.llm-cli-gateway/
|
|
154
|
+
logs.db` that filter `status='failed'` will count cancels and boot-time
|
|
155
|
+
orphans as errors; `cache_state://*` aggregation does not distinguish.
|
|
156
|
+
|
|
157
|
+
### No config or schema changes
|
|
158
|
+
|
|
159
|
+
No migration. No new opt-in flag. The new behaviour is gated solely on
|
|
160
|
+
whether the caller (handler or `awaitJobOrDefer`) supplies a
|
|
161
|
+
`flightRecorderEntry` to `startJobWithDedup`. Tests/callers that don't
|
|
162
|
+
opt in see no behaviour change (the constructor's default
|
|
163
|
+
`NoopFlightRecorder` short-circuits the FR writes).
|
|
164
|
+
|
|
165
|
+
### Migration impact
|
|
166
|
+
|
|
167
|
+
None. SQLite schema and TOML config surface are byte-identical to
|
|
168
|
+
v1.6.1. Rollback is non-destructive (revert the release commit).
|
|
169
|
+
|
|
170
|
+
### Documentation
|
|
171
|
+
|
|
172
|
+
- `docs/plans/async-flight-recorder.dag.toml` — new slice plan (Unit A
|
|
173
|
+
unanimously approved across Codex/Gemini/Grok/Mistral).
|
|
174
|
+
- `docs/plans/async-flight-recorder.pr-body.md` — new PR description.
|
|
175
|
+
- `docs/personal-mcp/ASYNC_FLIGHT_RECORDER_SURFACES.md` — new research
|
|
176
|
+
note documenting every terminal state, the data contract per FR write
|
|
177
|
+
site, the sync-path responsibility split table, and the cancel /
|
|
178
|
+
orphan / dedup limitations.
|
|
179
|
+
- `docs/personal-mcp/PROVIDER_CACHE_SURFACES.md` — Codex section updated
|
|
180
|
+
to reflect that the parser now accepts `cached_input_tokens`; slice 2
|
|
181
|
+
"Populated for **claude only** today" claim corrected to include
|
|
182
|
+
codex.
|
|
183
|
+
- `docs/launch/blog-cache-awareness.md` — slice 1.5 follow-up note in
|
|
184
|
+
the "What's next" section.
|
|
185
|
+
|
|
5
186
|
## [1.6.1] - 2026-05-26 — docs-only follow-up to 1.6.0
|
|
6
187
|
|
|
7
188
|
Pure documentation release; zero source-code changes since 1.6.0.
|
|
@@ -1,8 +1,35 @@
|
|
|
1
1
|
import type { Logger } from "./logger.js";
|
|
2
2
|
import { type JobHealth } from "./process-monitor.js";
|
|
3
3
|
import { JobStore } from "./job-store.js";
|
|
4
|
+
import { type FlightRecorderLike } from "./flight-recorder.js";
|
|
4
5
|
export type LlmCli = "claude" | "codex" | "gemini" | "grok" | "mistral";
|
|
5
6
|
export type AsyncJobStatus = "running" | "completed" | "failed" | "canceled" | "orphaned";
|
|
7
|
+
/**
|
|
8
|
+
* Slice 1.5 flight-recorder payload supplied via StartJobOptions.
|
|
9
|
+
* Decomposed to primitive fields (no nested handler-locals) so retaining
|
|
10
|
+
* a reference on the in-memory job record doesn't pin large promptParts
|
|
11
|
+
* or attachments via closure scope.
|
|
12
|
+
*/
|
|
13
|
+
export interface AsyncJobFlightRecorderEntry {
|
|
14
|
+
model: string;
|
|
15
|
+
prompt: string;
|
|
16
|
+
sessionId?: string;
|
|
17
|
+
stablePrefixHash?: string;
|
|
18
|
+
stablePrefixTokens?: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Slice 1.5 usage-extraction callback. Closures MUST be constructed from
|
|
22
|
+
* primitive locals only (e.g. const fmt = params.outputFormat; closure
|
|
23
|
+
* captures fmt). Capturing the handler's full `params` object pins large
|
|
24
|
+
* promptParts/attachments for JOB_TTL_MS.
|
|
25
|
+
*/
|
|
26
|
+
export type AsyncJobUsageExtractor = (stdout: string) => {
|
|
27
|
+
inputTokens?: number;
|
|
28
|
+
outputTokens?: number;
|
|
29
|
+
cacheReadTokens?: number;
|
|
30
|
+
cacheCreationTokens?: number;
|
|
31
|
+
costUsd?: number;
|
|
32
|
+
};
|
|
6
33
|
export interface AsyncJobSnapshot {
|
|
7
34
|
id: string;
|
|
8
35
|
cli: LlmCli;
|
|
@@ -45,6 +72,23 @@ export interface StartJobOptions {
|
|
|
45
72
|
* etc.) that must persist for the lifetime of the spawned CLI process.
|
|
46
73
|
*/
|
|
47
74
|
onComplete?: () => void;
|
|
75
|
+
/**
|
|
76
|
+
* Slice 1.5: when true, AsyncJobManager writes a flight-recorder logStart
|
|
77
|
+
* row at startJob entry using `flightRecorderEntry`. Pure async handlers
|
|
78
|
+
* (handle*RequestAsync) pass true because they have no upstream
|
|
79
|
+
* safeFlightStart writer. The sync-deferred path (awaitJobOrDefer) passes
|
|
80
|
+
* false because the upstream sync handler already wrote logStart keyed on
|
|
81
|
+
* the same correlationId — a second INSERT would crash on the PK.
|
|
82
|
+
*/
|
|
83
|
+
writeFlightStart?: boolean;
|
|
84
|
+
/** Slice 1.5: payload for the FR logStart and the terminal logComplete. */
|
|
85
|
+
flightRecorderEntry?: AsyncJobFlightRecorderEntry;
|
|
86
|
+
/**
|
|
87
|
+
* Slice 1.5: invoked only on terminal `completed` to populate token-usage
|
|
88
|
+
* fields in the FR logComplete payload. Construct from primitive locals
|
|
89
|
+
* only (see AsyncJobUsageExtractor doc).
|
|
90
|
+
*/
|
|
91
|
+
extractUsage?: AsyncJobUsageExtractor;
|
|
48
92
|
}
|
|
49
93
|
export interface StartJobOutcome {
|
|
50
94
|
snapshot: AsyncJobSnapshot;
|
|
@@ -60,7 +104,8 @@ export declare class AsyncJobManager {
|
|
|
60
104
|
private evictionTimer;
|
|
61
105
|
private processMonitor;
|
|
62
106
|
private store;
|
|
63
|
-
|
|
107
|
+
private flightRecorder;
|
|
108
|
+
constructor(logger?: Logger, onJobComplete?: ((cli: LlmCli, durationMs: number, success: boolean) => void) | undefined, store?: JobStore | null, flightRecorder?: FlightRecorderLike);
|
|
64
109
|
/**
|
|
65
110
|
* True iff a durable (or memory) job store is attached. The MCP-tool
|
|
66
111
|
* registration layer ANDs this with persistence.asyncJobsEnabled when
|
|
@@ -81,6 +126,29 @@ export declare class AsyncJobManager {
|
|
|
81
126
|
*/
|
|
82
127
|
private buildRequestKey;
|
|
83
128
|
private fireOnComplete;
|
|
129
|
+
/**
|
|
130
|
+
* Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
|
|
131
|
+
* failure semantics (response = stderr||stdout on failure, errorMessage
|
|
132
|
+
* falls back through overrideErrorMessage → job.error → job.stderr →
|
|
133
|
+
* "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
|
|
134
|
+
* thrown logComplete can be retried by a later terminal callback; the
|
|
135
|
+
* FR's WHERE status='started' UPDATE guard remains the actual
|
|
136
|
+
* idempotency mechanism for the common "retry succeeds, original
|
|
137
|
+
* succeeded too" case.
|
|
138
|
+
*/
|
|
139
|
+
private writeFlightComplete;
|
|
140
|
+
private safeExtractUsage;
|
|
141
|
+
/**
|
|
142
|
+
* R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
|
|
143
|
+
* deferred response. From this point on the sync handler will not write
|
|
144
|
+
* its own safeFlightComplete, so the manager takes over.
|
|
145
|
+
*
|
|
146
|
+
* Race mitigation: if the job already terminated between the sync
|
|
147
|
+
* deadline expiring and this method firing, write logComplete
|
|
148
|
+
* synchronously here so the previously-skipped terminal callback's
|
|
149
|
+
* write isn't lost.
|
|
150
|
+
*/
|
|
151
|
+
armFlightCompleteForDeferral(jobId: string): void;
|
|
84
152
|
private safeStoreCall;
|
|
85
153
|
/**
|
|
86
154
|
* Flush in-memory stdout/stderr to the durable store if anything changed
|
|
@@ -100,7 +168,7 @@ export declare class AsyncJobManager {
|
|
|
100
168
|
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
101
169
|
* optional param for the dedup-aware path.
|
|
102
170
|
*/
|
|
103
|
-
startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void): AsyncJobSnapshot;
|
|
171
|
+
startJob(cli: LlmCli, args: string[], correlationId: string, cwd?: string, idleTimeoutMs?: number, outputFormat?: string, forceRefresh?: boolean, env?: Record<string, string>, onComplete?: () => void, flightRecorderEntry?: AsyncJobFlightRecorderEntry, extractUsage?: AsyncJobUsageExtractor, writeFlightStart?: boolean): AsyncJobSnapshot;
|
|
104
172
|
/**
|
|
105
173
|
* Start a job, with optional dedup against recent identical requests.
|
|
106
174
|
* Returns `{ snapshot, deduped }` so callers can log/report the short-circuit.
|
|
@@ -3,6 +3,7 @@ import { envWithExtendedPath, getExtendedPath, killProcessGroup, spawnCliProcess
|
|
|
3
3
|
import { noopLogger } from "./logger.js";
|
|
4
4
|
import { ProcessMonitor } from "./process-monitor.js";
|
|
5
5
|
import { computeRequestKey } from "./job-store.js";
|
|
6
|
+
import { NoopFlightRecorder } from "./flight-recorder.js";
|
|
6
7
|
const MAX_OUTPUT_SIZE = 50 * 1024 * 1024;
|
|
7
8
|
const JOB_TTL_MS = 60 * 60 * 1000; // 1 hour in-memory retention; durable store has its own (longer) retention
|
|
8
9
|
const EVICTION_INTERVAL_MS = 5 * 60 * 1000; // Check every 5 minutes
|
|
@@ -61,16 +62,40 @@ export class AsyncJobManager {
|
|
|
61
62
|
evictionTimer = null;
|
|
62
63
|
processMonitor;
|
|
63
64
|
store;
|
|
64
|
-
|
|
65
|
+
flightRecorder;
|
|
66
|
+
constructor(logger = noopLogger, onJobComplete, store = null, flightRecorder = new NoopFlightRecorder()) {
|
|
65
67
|
this.logger = logger;
|
|
66
68
|
this.onJobComplete = onJobComplete;
|
|
67
69
|
this.processMonitor = new ProcessMonitor(logger);
|
|
68
70
|
this.store = store;
|
|
71
|
+
this.flightRecorder = flightRecorder;
|
|
69
72
|
if (this.store) {
|
|
70
73
|
try {
|
|
71
|
-
const orphaned = this.store.markOrphanedOnStartup();
|
|
72
|
-
if (
|
|
73
|
-
this.logger.info(`Marked ${
|
|
74
|
+
const { count, orphaned } = this.store.markOrphanedOnStartup();
|
|
75
|
+
if (count > 0) {
|
|
76
|
+
this.logger.info(`Marked ${count} in-flight job(s) as orphaned after gateway restart`);
|
|
77
|
+
}
|
|
78
|
+
// Slice 1.5: close out the FR row for each orphaned job. The FR
|
|
79
|
+
// logComplete UPDATE has WHERE status='started' so pre-1.7.0 rows
|
|
80
|
+
// (where the prior gateway never wrote a logStart) silently
|
|
81
|
+
// no-op. Wrapped per-orphan so a single bad row can't tank boot.
|
|
82
|
+
for (const orphan of orphaned) {
|
|
83
|
+
try {
|
|
84
|
+
const durationMs = Math.max(0, Date.now() - new Date(orphan.startedAt).getTime());
|
|
85
|
+
this.flightRecorder.logComplete(orphan.correlationId, {
|
|
86
|
+
response: orphan.stderr || orphan.stdout,
|
|
87
|
+
durationMs,
|
|
88
|
+
retryCount: 0,
|
|
89
|
+
circuitBreakerState: "closed",
|
|
90
|
+
optimizationApplied: false,
|
|
91
|
+
exitCode: orphan.exitCode ?? 1,
|
|
92
|
+
errorMessage: "orphaned after gateway restart",
|
|
93
|
+
status: "failed",
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
this.logger.error(`Async-path FR logComplete for orphaned job ${orphan.id} failed`, err);
|
|
98
|
+
}
|
|
74
99
|
}
|
|
75
100
|
}
|
|
76
101
|
catch (err) {
|
|
@@ -129,6 +154,7 @@ export class AsyncJobManager {
|
|
|
129
154
|
this.logger.error(`Job ${id} process ${job.process.pid} no longer exists, marking as failed`);
|
|
130
155
|
this.emitMetrics(job);
|
|
131
156
|
this.persistComplete(job);
|
|
157
|
+
this.writeFlightComplete(job, "failed");
|
|
132
158
|
this.fireOnComplete(job);
|
|
133
159
|
}
|
|
134
160
|
// EPERM: process exists but we can't signal it — ignore
|
|
@@ -144,6 +170,7 @@ export class AsyncJobManager {
|
|
|
144
170
|
this.logger.error(`Job ${id} has exited flag but was still in running state, marking as failed`);
|
|
145
171
|
this.emitMetrics(job);
|
|
146
172
|
this.persistComplete(job);
|
|
173
|
+
this.writeFlightComplete(job, "failed");
|
|
147
174
|
this.fireOnComplete(job);
|
|
148
175
|
}
|
|
149
176
|
}
|
|
@@ -196,6 +223,96 @@ export class AsyncJobManager {
|
|
|
196
223
|
this.logger.error(`Job ${job.id} onComplete hook threw`, err);
|
|
197
224
|
}
|
|
198
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Slice 1.5: write the terminal flight-recorder row. Mirrors sync-path
|
|
228
|
+
* failure semantics (response = stderr||stdout on failure, errorMessage
|
|
229
|
+
* falls back through overrideErrorMessage → job.error → job.stderr →
|
|
230
|
+
* "Exit code N"). Single-shot guard set only on SUCCESSFUL write so a
|
|
231
|
+
* thrown logComplete can be retried by a later terminal callback; the
|
|
232
|
+
* FR's WHERE status='started' UPDATE guard remains the actual
|
|
233
|
+
* idempotency mechanism for the common "retry succeeds, original
|
|
234
|
+
* succeeded too" case.
|
|
235
|
+
*/
|
|
236
|
+
writeFlightComplete(job, finalStatus, overrideErrorMessage) {
|
|
237
|
+
if (!job.flightRecorderEntry)
|
|
238
|
+
return; // never opted in
|
|
239
|
+
// R2 Codex-Unit-B F1: only write when armed. Sync-inline requests are
|
|
240
|
+
// NOT armed at startJob — the sync handler owns the rich-metadata
|
|
241
|
+
// safeFlightComplete write. Pure async + sync-deferred ARE armed.
|
|
242
|
+
if (!job.flightCompleteArmed)
|
|
243
|
+
return;
|
|
244
|
+
if (job.flightRecorderComplete)
|
|
245
|
+
return; // already wrote successfully
|
|
246
|
+
const durationMs = Math.max(0, Date.now() - new Date(job.startedAt).getTime());
|
|
247
|
+
const usage = finalStatus === "completed" && job.extractUsage ? this.safeExtractUsage(job) : {};
|
|
248
|
+
const isFailure = finalStatus === "failed";
|
|
249
|
+
const response = isFailure ? job.stderr || job.stdout : job.stdout;
|
|
250
|
+
const exitCode = job.exitCode ?? (finalStatus === "completed" ? 0 : 1);
|
|
251
|
+
const errorMessage = isFailure
|
|
252
|
+
? (overrideErrorMessage ?? job.error ?? job.stderr ?? `Exit code ${exitCode}`)
|
|
253
|
+
: undefined;
|
|
254
|
+
try {
|
|
255
|
+
this.flightRecorder.logComplete(job.correlationId, {
|
|
256
|
+
response,
|
|
257
|
+
durationMs,
|
|
258
|
+
retryCount: 0,
|
|
259
|
+
circuitBreakerState: "closed",
|
|
260
|
+
optimizationApplied: false,
|
|
261
|
+
exitCode,
|
|
262
|
+
errorMessage,
|
|
263
|
+
status: finalStatus,
|
|
264
|
+
inputTokens: usage.inputTokens,
|
|
265
|
+
outputTokens: usage.outputTokens,
|
|
266
|
+
cacheReadTokens: usage.cacheReadTokens,
|
|
267
|
+
cacheCreationTokens: usage.cacheCreationTokens,
|
|
268
|
+
costUsd: usage.costUsd,
|
|
269
|
+
});
|
|
270
|
+
// Only mark complete on successful write so a thrown logComplete
|
|
271
|
+
// can be retried by the next terminal callback.
|
|
272
|
+
job.flightRecorderComplete = true;
|
|
273
|
+
// Clear retained references so the GC can reclaim anything the
|
|
274
|
+
// extractUsage closure captured.
|
|
275
|
+
job.flightRecorderEntry = undefined;
|
|
276
|
+
job.extractUsage = undefined;
|
|
277
|
+
}
|
|
278
|
+
catch (err) {
|
|
279
|
+
this.logger.error("Async-path flight recorder logComplete failed", err);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
safeExtractUsage(job) {
|
|
283
|
+
try {
|
|
284
|
+
return job.extractUsage?.(job.stdout) ?? {};
|
|
285
|
+
}
|
|
286
|
+
catch (err) {
|
|
287
|
+
this.logger.error(`Job ${job.id} extractUsage threw`, err);
|
|
288
|
+
return {};
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* R2 Codex-Unit-B F1: awaitJobOrDefer calls this when returning a
|
|
293
|
+
* deferred response. From this point on the sync handler will not write
|
|
294
|
+
* its own safeFlightComplete, so the manager takes over.
|
|
295
|
+
*
|
|
296
|
+
* Race mitigation: if the job already terminated between the sync
|
|
297
|
+
* deadline expiring and this method firing, write logComplete
|
|
298
|
+
* synchronously here so the previously-skipped terminal callback's
|
|
299
|
+
* write isn't lost.
|
|
300
|
+
*/
|
|
301
|
+
armFlightCompleteForDeferral(jobId) {
|
|
302
|
+
const job = this.jobs.get(jobId);
|
|
303
|
+
if (!job)
|
|
304
|
+
return;
|
|
305
|
+
if (job.flightCompleteArmed)
|
|
306
|
+
return; // pure async already armed
|
|
307
|
+
job.flightCompleteArmed = true;
|
|
308
|
+
if (job.status === "running")
|
|
309
|
+
return;
|
|
310
|
+
// Job already terminal — the close handler's writeFlightComplete
|
|
311
|
+
// saw flightCompleteArmed=false and skipped. Write now to recover.
|
|
312
|
+
const finalStatus = job.status === "completed" ? "completed" : "failed";
|
|
313
|
+
const override = job.canceled ? "canceled by caller" : undefined;
|
|
314
|
+
this.writeFlightComplete(job, finalStatus, override);
|
|
315
|
+
}
|
|
199
316
|
safeStoreCall(label, fn) {
|
|
200
317
|
if (!this.store)
|
|
201
318
|
return;
|
|
@@ -300,7 +417,7 @@ export class AsyncJobManager {
|
|
|
300
417
|
* Existing callers keep working unchanged; forceRefresh is exposed as a trailing
|
|
301
418
|
* optional param for the dedup-aware path.
|
|
302
419
|
*/
|
|
303
|
-
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete) {
|
|
420
|
+
startJob(cli, args, correlationId, cwd, idleTimeoutMs, outputFormat, forceRefresh, env, onComplete, flightRecorderEntry, extractUsage, writeFlightStart) {
|
|
304
421
|
return this.startJobWithDedup(cli, args, correlationId, {
|
|
305
422
|
cwd,
|
|
306
423
|
idleTimeoutMs,
|
|
@@ -308,6 +425,9 @@ export class AsyncJobManager {
|
|
|
308
425
|
forceRefresh,
|
|
309
426
|
env,
|
|
310
427
|
onComplete,
|
|
428
|
+
flightRecorderEntry,
|
|
429
|
+
extractUsage,
|
|
430
|
+
writeFlightStart,
|
|
311
431
|
}).snapshot;
|
|
312
432
|
}
|
|
313
433
|
/**
|
|
@@ -319,7 +439,7 @@ export class AsyncJobManager {
|
|
|
319
439
|
* is returned without spawning a new process. forceRefresh skips dedup entirely.
|
|
320
440
|
*/
|
|
321
441
|
startJobWithDedup(cli, args, correlationId, opts = {}) {
|
|
322
|
-
const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, onComplete } = opts;
|
|
442
|
+
const { cwd, idleTimeoutMs, outputFormat, forceRefresh, env: extraEnv, onComplete, flightRecorderEntry, extractUsage, writeFlightStart, } = opts;
|
|
323
443
|
const requestKey = this.buildRequestKey(cli, args, extraEnv);
|
|
324
444
|
if (!forceRefresh && this.store) {
|
|
325
445
|
try {
|
|
@@ -405,6 +525,14 @@ export class AsyncJobManager {
|
|
|
405
525
|
onCompleteFired: false,
|
|
406
526
|
outputDirty: false,
|
|
407
527
|
lastOutputFlushAt: Date.now(),
|
|
528
|
+
flightRecorderEntry,
|
|
529
|
+
extractUsage,
|
|
530
|
+
flightRecorderComplete: false,
|
|
531
|
+
// R2 Codex-Unit-B F1: pure async path arms now (writeFlightStart=true
|
|
532
|
+
// means the manager is the only FR writer). Sync-deferred path
|
|
533
|
+
// arrives with writeFlightStart=false and arms later via
|
|
534
|
+
// armFlightCompleteForDeferral when awaitJobOrDefer decides to defer.
|
|
535
|
+
flightCompleteArmed: writeFlightStart === true,
|
|
408
536
|
};
|
|
409
537
|
this.jobs.set(id, job);
|
|
410
538
|
this.safeStoreCall("recordStart", () => this.store.recordStart({
|
|
@@ -417,6 +545,27 @@ export class AsyncJobManager {
|
|
|
417
545
|
startedAt,
|
|
418
546
|
pid: child.pid ?? null,
|
|
419
547
|
}));
|
|
548
|
+
// Slice 1.5: only opt-in callers (pure async handlers) write logStart
|
|
549
|
+
// here. The sync-deferred path passes writeFlightStart=false because
|
|
550
|
+
// the upstream sync handler already wrote a logStart row keyed on the
|
|
551
|
+
// same correlationId; a duplicate INSERT would crash on the PK.
|
|
552
|
+
if (writeFlightStart && flightRecorderEntry) {
|
|
553
|
+
try {
|
|
554
|
+
this.flightRecorder.logStart({
|
|
555
|
+
correlationId,
|
|
556
|
+
cli,
|
|
557
|
+
model: flightRecorderEntry.model,
|
|
558
|
+
prompt: flightRecorderEntry.prompt,
|
|
559
|
+
sessionId: flightRecorderEntry.sessionId,
|
|
560
|
+
asyncJobId: id,
|
|
561
|
+
stablePrefixHash: flightRecorderEntry.stablePrefixHash,
|
|
562
|
+
stablePrefixTokens: flightRecorderEntry.stablePrefixTokens,
|
|
563
|
+
});
|
|
564
|
+
}
|
|
565
|
+
catch (err) {
|
|
566
|
+
this.logger.error("Async-path flight recorder logStart failed", err);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
420
569
|
this.logger.info(`Job ${id} started for ${cli}`, { correlationId });
|
|
421
570
|
// Idle timeout: kill process if no output activity for idleTimeoutMs
|
|
422
571
|
let idleTimerId;
|
|
@@ -439,6 +588,7 @@ export class AsyncJobManager {
|
|
|
439
588
|
});
|
|
440
589
|
this.emitMetrics(job);
|
|
441
590
|
this.persistComplete(job);
|
|
591
|
+
this.writeFlightComplete(job, "failed");
|
|
442
592
|
this.fireOnComplete(job);
|
|
443
593
|
setTimeout(() => {
|
|
444
594
|
if (!job.exited && job.process)
|
|
@@ -473,6 +623,7 @@ export class AsyncJobManager {
|
|
|
473
623
|
this.logger.error(`Job ${id} error: ${launchError.message}`, { correlationId });
|
|
474
624
|
this.emitMetrics(job);
|
|
475
625
|
this.persistComplete(job);
|
|
626
|
+
this.writeFlightComplete(job, "failed");
|
|
476
627
|
this.fireOnComplete(job);
|
|
477
628
|
}
|
|
478
629
|
});
|
|
@@ -490,6 +641,12 @@ export class AsyncJobManager {
|
|
|
490
641
|
}
|
|
491
642
|
// Ensure terminal state reaches the durable store (idle-timeout/output-overflow already persisted).
|
|
492
643
|
this.persistComplete(job);
|
|
644
|
+
// Slice 1.5: retry the FR complete write iff the earlier terminal
|
|
645
|
+
// callback's logComplete threw. The single-shot guard in
|
|
646
|
+
// writeFlightComplete makes this a no-op in the common case.
|
|
647
|
+
const fallbackFlightStatus = job.status === "completed" ? "completed" : "failed";
|
|
648
|
+
const fallbackOverride = job.status === "canceled" ? "canceled by caller" : undefined;
|
|
649
|
+
this.writeFlightComplete(job, fallbackFlightStatus, fallbackOverride);
|
|
493
650
|
this.fireOnComplete(job);
|
|
494
651
|
return;
|
|
495
652
|
}
|
|
@@ -512,6 +669,7 @@ export class AsyncJobManager {
|
|
|
512
669
|
}
|
|
513
670
|
this.emitMetrics(job);
|
|
514
671
|
this.persistComplete(job);
|
|
672
|
+
this.writeFlightComplete(job, job.status === "completed" ? "completed" : "failed", job.status === "canceled" ? "canceled by caller" : undefined);
|
|
515
673
|
this.fireOnComplete(job);
|
|
516
674
|
});
|
|
517
675
|
return { snapshot: this.snapshot(job), deduped: false };
|
|
@@ -567,6 +725,7 @@ export class AsyncJobManager {
|
|
|
567
725
|
killProcessGroup(job.process, "SIGTERM");
|
|
568
726
|
this.logger.info(`Job ${jobId} canceled`, { correlationId: job.correlationId });
|
|
569
727
|
this.persistComplete(job);
|
|
728
|
+
this.writeFlightComplete(job, "failed", "canceled by caller");
|
|
570
729
|
this.fireOnComplete(job);
|
|
571
730
|
setTimeout(() => {
|
|
572
731
|
if (!job.exited && job.process)
|
|
@@ -639,6 +798,7 @@ export class AsyncJobManager {
|
|
|
639
798
|
});
|
|
640
799
|
this.emitMetrics(job);
|
|
641
800
|
this.persistComplete(job);
|
|
801
|
+
this.writeFlightComplete(job, "failed", "Output exceeded maximum size (50MB)");
|
|
642
802
|
this.fireOnComplete(job);
|
|
643
803
|
setTimeout(() => {
|
|
644
804
|
if (!job.exited && job.process)
|
|
@@ -47,7 +47,10 @@ export function parseCodexJsonStream(stdout) {
|
|
|
47
47
|
input_tokens: typeof u.input_tokens === "number" ? u.input_tokens : 0,
|
|
48
48
|
output_tokens: typeof u.output_tokens === "number" ? u.output_tokens : 0,
|
|
49
49
|
};
|
|
50
|
-
if (typeof u.
|
|
50
|
+
if (typeof u.cached_input_tokens === "number") {
|
|
51
|
+
usage.cache_read_tokens = u.cached_input_tokens;
|
|
52
|
+
}
|
|
53
|
+
else if (typeof u.cache_read_input_tokens === "number") {
|
|
51
54
|
usage.cache_read_tokens = u.cache_read_input_tokens;
|
|
52
55
|
}
|
|
53
56
|
else if (typeof u.cache_read_tokens === "number") {
|
package/dist/index.d.ts
CHANGED
|
@@ -81,6 +81,23 @@ interface GatewayServerRuntime {
|
|
|
81
81
|
persistence: PersistenceConfig;
|
|
82
82
|
cacheAwareness: CacheAwarenessConfig;
|
|
83
83
|
}
|
|
84
|
+
export declare function extractUsageAndCost(cli: "claude" | "codex" | "gemini" | "grok" | "mistral", output: string, outputFormat?: string,
|
|
85
|
+
/**
|
|
86
|
+
* Optional context for off-stdout telemetry sources. Today only Mistral
|
|
87
|
+
* uses this — its meta.json lives on disk keyed by sessionId. Threading
|
|
88
|
+
* this in keeps the closure built by `buildAsyncFlightRecorderHandoff`
|
|
89
|
+
* primitives-only (no `params`/`prep` retention on AsyncJobRecord).
|
|
90
|
+
*/
|
|
91
|
+
ctx?: {
|
|
92
|
+
sessionId?: string;
|
|
93
|
+
home?: string;
|
|
94
|
+
}): {
|
|
95
|
+
inputTokens?: number;
|
|
96
|
+
outputTokens?: number;
|
|
97
|
+
cacheReadTokens?: number;
|
|
98
|
+
cacheCreationTokens?: number;
|
|
99
|
+
costUsd?: number;
|
|
100
|
+
};
|
|
84
101
|
interface CliRequestPrep {
|
|
85
102
|
corrId: string;
|
|
86
103
|
effectivePrompt: string;
|
|
@@ -191,6 +208,12 @@ export declare function prepareGeminiRequest(params: {
|
|
|
191
208
|
policyFiles?: string[];
|
|
192
209
|
adminPolicyFiles?: string[];
|
|
193
210
|
attachments?: string[];
|
|
211
|
+
/**
|
|
212
|
+
* Phase 4 slice γ: emit `--skip-trust` so first-run workspaces don't
|
|
213
|
+
* block headless invocations on the interactive trust prompt. Default
|
|
214
|
+
* is undefined (preserves current prompt behaviour for legacy callers).
|
|
215
|
+
*/
|
|
216
|
+
skipTrust?: boolean;
|
|
194
217
|
}, runtime?: GatewayServerRuntime): CliRequestPrep | ExtendedToolResponse;
|
|
195
218
|
export declare function prepareMistralRequest(params: {
|
|
196
219
|
prompt?: string;
|
|
@@ -208,6 +231,11 @@ export declare function prepareMistralRequest(params: {
|
|
|
208
231
|
correlationId?: string;
|
|
209
232
|
optimizePrompt: boolean;
|
|
210
233
|
operation: string;
|
|
234
|
+
/**
|
|
235
|
+
* Phase 4 slice γ: emit `--trust` to bypass Vibe's interactive trust
|
|
236
|
+
* prompt for this invocation only (not persisted). Default undefined.
|
|
237
|
+
*/
|
|
238
|
+
trust?: boolean;
|
|
211
239
|
}, runtime?: GatewayServerRuntime): (CliRequestPrep & {
|
|
212
240
|
mistralEnv: Record<string, string>;
|
|
213
241
|
}) | ExtendedToolResponse;
|
|
@@ -235,6 +263,8 @@ export interface GeminiRequestParams {
|
|
|
235
263
|
policyFiles?: string[];
|
|
236
264
|
adminPolicyFiles?: string[];
|
|
237
265
|
attachments?: string[];
|
|
266
|
+
/** Phase 4 slice γ: emit `--skip-trust` for fresh-workspace headless runs. */
|
|
267
|
+
skipTrust?: boolean;
|
|
238
268
|
}
|
|
239
269
|
export interface HandlerDeps {
|
|
240
270
|
sessionManager: ISessionManager;
|
|
@@ -297,6 +327,8 @@ export interface MistralRequestParams {
|
|
|
297
327
|
optimizeResponse?: boolean;
|
|
298
328
|
idleTimeoutMs?: number;
|
|
299
329
|
forceRefresh?: boolean;
|
|
330
|
+
/** Phase 4 slice γ: emit `--trust` for fresh-workspace headless runs. */
|
|
331
|
+
trust?: boolean;
|
|
300
332
|
}
|
|
301
333
|
export declare function handleMistralRequest(deps: HandlerDeps, params: MistralRequestParams): Promise<ExtendedToolResponse>;
|
|
302
334
|
export declare function handleMistralRequestAsync(deps: AsyncHandlerDeps, params: Omit<MistralRequestParams, "optimizeResponse">): Promise<ExtendedToolResponse>;
|