@bookedsolid/rea 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/serve.d.ts +8 -0
- package/dist/cli/serve.js +32 -6
- package/dist/cli/status.d.ts +25 -0
- package/dist/cli/status.js +86 -1
- package/dist/gateway/circuit-breaker.d.ts +8 -2
- package/dist/gateway/downstream-pool.d.ts +13 -1
- package/dist/gateway/downstream-pool.js +30 -2
- package/dist/gateway/downstream.d.ts +157 -0
- package/dist/gateway/downstream.js +307 -5
- package/dist/gateway/live-state.d.ts +252 -0
- package/dist/gateway/live-state.js +504 -0
- package/dist/gateway/server.d.ts +44 -1
- package/dist/gateway/server.js +101 -1
- package/dist/gateway/session-blocker.d.ts +132 -0
- package/dist/gateway/session-blocker.js +163 -0
- package/hooks/_lib/push-review-core.sh +52 -8
- package/hooks/push-review-gate-git.sh +8 -6
- package/hooks/push-review-gate.sh +32 -17
- package/package.json +1 -1
package/dist/gateway/server.js
CHANGED
|
@@ -53,6 +53,8 @@ import { executeChain } from './middleware/chain.js';
|
|
|
53
53
|
import { RateLimiter } from './rate-limiter.js';
|
|
54
54
|
import { CircuitBreaker } from './circuit-breaker.js';
|
|
55
55
|
import { currentSessionId } from './session.js';
|
|
56
|
+
import { SessionBlockerTracker } from './session-blocker.js';
|
|
57
|
+
import { LiveStatePublisher } from './live-state.js';
|
|
56
58
|
import { InvocationStatus, Tier } from '../policy/types.js';
|
|
57
59
|
import { log } from '../cli/utils.js';
|
|
58
60
|
import { createLogger } from './log.js';
|
|
@@ -127,6 +129,36 @@ export function createGateway(opts) {
|
|
|
127
129
|
const pool = new DownstreamPool(registry, logger);
|
|
128
130
|
const gatewayVersion = getPkgVersion();
|
|
129
131
|
const startedAtMs = Date.now();
|
|
132
|
+
// 0.9.0 — SESSION_BLOCKER tracker. One per gateway process. The audit
|
|
133
|
+
// sink wraps `appendAuditRecord` so a fired record lands in the hash
|
|
134
|
+
// chain for forensic inspection.
|
|
135
|
+
const sessionBlocker = new SessionBlockerTracker(currentSessionId(), {}, logger, async (event) => {
|
|
136
|
+
try {
|
|
137
|
+
await appendAuditRecord(baseDir, {
|
|
138
|
+
tool_name: 'session_blocker',
|
|
139
|
+
server_name: event.server_name,
|
|
140
|
+
status: InvocationStatus.Error,
|
|
141
|
+
tier: Tier.Read,
|
|
142
|
+
autonomy_level: String(policy.autonomy_level),
|
|
143
|
+
session_id: event.session_id,
|
|
144
|
+
duration_ms: 0,
|
|
145
|
+
metadata: {
|
|
146
|
+
event: event.event,
|
|
147
|
+
open_transitions: event.open_transitions,
|
|
148
|
+
threshold: event.threshold,
|
|
149
|
+
emitted_at: event.emitted_at,
|
|
150
|
+
},
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
catch (err) {
|
|
154
|
+
logger.error({
|
|
155
|
+
event: 'session_blocker.audit_failed',
|
|
156
|
+
server_name: event.server_name,
|
|
157
|
+
message: 'failed to append SESSION_BLOCKER audit record — log remains the sole record',
|
|
158
|
+
error: err instanceof Error ? err.message : String(err),
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
});
|
|
130
162
|
// BUG-011 (0.6.2) — process-lifetime counter of failed audit appends from
|
|
131
163
|
// the `__rea__health` short-circuit. Exposed on the health snapshot as
|
|
132
164
|
// `summary.audit_fail_count` so operators can detect the silent-audit-gap
|
|
@@ -135,6 +167,9 @@ export function createGateway(opts) {
|
|
|
135
167
|
const server = new Server({ name: 'rea', version: gatewayVersion }, { capabilities: { tools: {} } });
|
|
136
168
|
// Build the circuit breaker with observability hooks wired in — state
|
|
137
169
|
// transitions log a structured record AND update the Prometheus gauge.
|
|
170
|
+
// 0.9.0: also feed SESSION_BLOCKER tracker and live-state publisher so
|
|
171
|
+
// `rea status` and the audit chain surface per-session outages.
|
|
172
|
+
let livePublisher = null;
|
|
138
173
|
const breaker = new CircuitBreaker({
|
|
139
174
|
onStateChange: (event) => {
|
|
140
175
|
const level = event.to === 'open' ? 'warn' : 'info';
|
|
@@ -155,9 +190,56 @@ export function createGateway(opts) {
|
|
|
155
190
|
metrics?.setCircuitState(event.server, CIRCUIT_GAUGE.open);
|
|
156
191
|
break;
|
|
157
192
|
}
|
|
193
|
+
sessionBlocker.recordCircuitTransition({
|
|
194
|
+
server: event.server,
|
|
195
|
+
from: event.from,
|
|
196
|
+
to: event.to,
|
|
197
|
+
});
|
|
198
|
+
livePublisher?.scheduleUpdate();
|
|
158
199
|
},
|
|
159
200
|
});
|
|
160
201
|
const staticChain = buildMiddlewareChain(opts, { breaker });
|
|
202
|
+
// Pool supervisor events → live-state publisher. Covers three kinds:
|
|
203
|
+
// - `child_died_unexpectedly` — child exited outside a caller-initiated
|
|
204
|
+
// close(). Session-blocker counts this indirectly through the breaker
|
|
205
|
+
// transition it eventually triggers.
|
|
206
|
+
// - `respawned` — successful reconnect. Forwarded to session-blocker as
|
|
207
|
+
// an intentional no-op (see `recordRespawn` JSDoc): respawn is NOT
|
|
208
|
+
// equivalent to circuit recovery, so we do NOT clear blocker state
|
|
209
|
+
// on reconnect. The method exists to make the wiring site obvious
|
|
210
|
+
// on the call graph and to give us one place to change if the
|
|
211
|
+
// semantics ever shift — but today it deliberately records nothing.
|
|
212
|
+
// - `health_changed` — a non-transition mutation of a field surfaced in
|
|
213
|
+
// `rea status` (health, last_error, tools_count). Codex 0.9.0 pass-2
|
|
214
|
+
// P2a: without this, the first failure below the breaker threshold
|
|
215
|
+
// or a successful `listTools` count change never reached the
|
|
216
|
+
// publisher, leaving `rea status` showing stale downstream data.
|
|
217
|
+
// `scheduleUpdate()` is debounced (250 ms default) so storm bursts
|
|
218
|
+
// coalesce to one write.
|
|
219
|
+
pool.onSupervisorEvent((event) => {
|
|
220
|
+
if (event.kind === 'respawned')
|
|
221
|
+
sessionBlocker.recordRespawn(event.server);
|
|
222
|
+
livePublisher?.scheduleUpdate();
|
|
223
|
+
});
|
|
224
|
+
if (opts.liveStateFilePath !== undefined) {
|
|
225
|
+
// Build options defensively — exactOptionalPropertyTypes refuses
|
|
226
|
+
// `lastErrorRedactor: undefined` against `lastErrorRedactor?: FieldRedactor`.
|
|
227
|
+
const publisherOpts = {
|
|
228
|
+
baseDir,
|
|
229
|
+
stateFilePath: opts.liveStateFilePath,
|
|
230
|
+
sessionId: opts.liveStateSessionId ?? currentSessionId(),
|
|
231
|
+
startedAt: opts.liveStateStartedAt ?? new Date(startedAtMs).toISOString(),
|
|
232
|
+
metricsPort: opts.liveStateMetricsPort ?? null,
|
|
233
|
+
pool,
|
|
234
|
+
breaker,
|
|
235
|
+
sessionBlocker,
|
|
236
|
+
logger,
|
|
237
|
+
...(opts.liveStateLastErrorRedactor !== undefined
|
|
238
|
+
? { lastErrorRedactor: opts.liveStateLastErrorRedactor }
|
|
239
|
+
: {}),
|
|
240
|
+
};
|
|
241
|
+
livePublisher = new LiveStatePublisher(publisherOpts);
|
|
242
|
+
}
|
|
161
243
|
// Read `.rea/HALT` without ever throwing. Returns `{halt, reason}` where
|
|
162
244
|
// `reason` is the (trimmed) file contents or null when the file is absent
|
|
163
245
|
// / unreadable. The meta-tool never surfaces I/O errors — health is the one
|
|
@@ -462,12 +544,21 @@ export function createGateway(opts) {
|
|
|
462
544
|
}
|
|
463
545
|
const activeTransport = transport ?? new StdioServerTransport();
|
|
464
546
|
await server.connect(activeTransport);
|
|
547
|
+
// Publish the initial live-state snapshot so `rea status` sees the
|
|
548
|
+
// `downstreams` block from the first moment the gateway is up, not
|
|
549
|
+
// only after the first circuit transition.
|
|
550
|
+
livePublisher?.flushNow();
|
|
465
551
|
}
|
|
466
552
|
async function stop() {
|
|
467
553
|
if (stopping)
|
|
468
554
|
return;
|
|
469
555
|
stopping = true;
|
|
470
556
|
logger.info({ event: 'gateway.shutdown', message: 'gateway stop requested' });
|
|
557
|
+
// Final flush BEFORE we drop the publisher so any last-moment transition
|
|
558
|
+
// (e.g. a circuit closing as pool.close() quiesces it) is reflected on
|
|
559
|
+
// disk for the very last `rea status` after shutdown.
|
|
560
|
+
livePublisher?.flushNow();
|
|
561
|
+
livePublisher?.stop();
|
|
471
562
|
try {
|
|
472
563
|
await server.close();
|
|
473
564
|
}
|
|
@@ -476,5 +567,14 @@ export function createGateway(opts) {
|
|
|
476
567
|
}
|
|
477
568
|
await pool.close();
|
|
478
569
|
}
|
|
479
|
-
return {
|
|
570
|
+
return {
|
|
571
|
+
server,
|
|
572
|
+
start,
|
|
573
|
+
stop,
|
|
574
|
+
pool,
|
|
575
|
+
logger,
|
|
576
|
+
metrics,
|
|
577
|
+
livePublisher,
|
|
578
|
+
sessionBlocker,
|
|
579
|
+
};
|
|
480
580
|
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SESSION_BLOCKER tracker (BUG-004, 0.9.0).
|
|
3
|
+
*
|
|
4
|
+
* When a downstream MCP server fails repeatedly in a single session the
|
|
5
|
+
* operator needs one LOUD signal — not a log stream full of identical
|
|
6
|
+
* circuit-open records. This module owns the per-(session_id, server_name)
|
|
7
|
+
* counter and emits exactly one `SESSION_BLOCKER` event once a threshold is
|
|
8
|
+
* crossed; the event is replayed neither on continued failure nor on a
|
|
9
|
+
* circuit-breaker flap. Recovery (downstream returns to healthy) resets the
|
|
10
|
+
* counter and re-arms the emission.
|
|
11
|
+
*
|
|
12
|
+
* ## Why this lives separately from CircuitBreaker
|
|
13
|
+
*
|
|
14
|
+
* The circuit breaker tracks CONSECUTIVE CALL-LEVEL failures per server —
|
|
15
|
+
* it is wire-hot and opens/closes many times across a long session. The
|
|
16
|
+
* session blocker tracks OPEN-LEVEL failures per session: every
|
|
17
|
+
* circuit-open transition counts as ONE. A downstream that flaps
|
|
18
|
+
* open→closed→open three times in ten minutes is already a blocker from an
|
|
19
|
+
* operator perspective — it should be surfaced once, not muted by the
|
|
20
|
+
* breaker's own internal recoveries.
|
|
21
|
+
*
|
|
22
|
+
* ## Emission semantics
|
|
23
|
+
*
|
|
24
|
+
* - Increment on every circuit transition to `open`.
|
|
25
|
+
* - When the counter for (session, server) crosses `threshold`, fire
|
|
26
|
+
* ONE `SESSION_BLOCKER` record (structured log + audit append). The
|
|
27
|
+
* counter keeps incrementing — subsequent opens do NOT re-fire.
|
|
28
|
+
* - On circuit recovery (transition to `closed`) the counter resets and
|
|
29
|
+
* the "already emitted" flag clears; the next threshold crossing will
|
|
30
|
+
* fire a fresh record.
|
|
31
|
+
* - On session change (new session_id) every counter is dropped — a new
|
|
32
|
+
* `rea serve` instance starts fresh.
|
|
33
|
+
*
|
|
34
|
+
* ## Why audit
|
|
35
|
+
*
|
|
36
|
+
* The hash-chained audit log is the single place an operator can look for a
|
|
37
|
+
* forensic record of persistent downstream outages. A `SESSION_BLOCKER`
|
|
38
|
+
* record in the audit trail pinpoints the session + downstream that went
|
|
39
|
+
* dark, independent of whichever log sink the operator had configured.
|
|
40
|
+
*
|
|
41
|
+
* Audit appends are best-effort; a failure to write never breaks the
|
|
42
|
+
* gateway. The log-side emission happens first and unconditionally.
|
|
43
|
+
*/
|
|
44
|
+
import type { Logger } from './log.js';
|
|
45
|
+
/**
|
|
46
|
+
* Event shape observed by the tracker. Only `from` → `to` and `server` are
|
|
47
|
+
* needed; the tracker does not care about retryAt/reason.
|
|
48
|
+
*/
|
|
49
|
+
export interface CircuitTransitionEvent {
|
|
50
|
+
server: string;
|
|
51
|
+
from: 'closed' | 'open' | 'half-open';
|
|
52
|
+
to: 'closed' | 'open' | 'half-open';
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Structured record emitted when a session-level block threshold is
|
|
56
|
+
* crossed. Exposed so tests and audit-append helpers can construct the
|
|
57
|
+
* canonical shape without re-declaring the fields.
|
|
58
|
+
*/
|
|
59
|
+
export interface SessionBlockerEvent {
|
|
60
|
+
event: 'SESSION_BLOCKER';
|
|
61
|
+
session_id: string;
|
|
62
|
+
server_name: string;
|
|
63
|
+
open_transitions: number;
|
|
64
|
+
threshold: number;
|
|
65
|
+
/** ISO timestamp at emission. */
|
|
66
|
+
emitted_at: string;
|
|
67
|
+
message: string;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Callback the tracker invokes when a SESSION_BLOCKER fires. The gateway
|
|
71
|
+
* wires this to `appendAuditRecord` so forensic capture survives logger
|
|
72
|
+
* downtime. Errors raised by the sink are swallowed — a broken audit
|
|
73
|
+
* pipeline must never break state tracking.
|
|
74
|
+
*/
|
|
75
|
+
export type SessionBlockerAuditSink = (event: SessionBlockerEvent) => Promise<void> | void;
|
|
76
|
+
export interface SessionBlockerOptions {
|
|
77
|
+
/**
|
|
78
|
+
* Number of open-transitions required to fire the event. Default: 3 —
|
|
79
|
+
* matches Jake's "after N consecutive same-downstream failures in one
|
|
80
|
+
* session" from the bug report. Low enough to catch real outages quickly,
|
|
81
|
+
* high enough that a single noisy reconnect doesn't spuriously fire.
|
|
82
|
+
*/
|
|
83
|
+
threshold?: number;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Per-(session_id, server_name) SESSION_BLOCKER tracker.
|
|
87
|
+
*
|
|
88
|
+
* Stateful and single-instance per gateway process. The circuit breaker's
|
|
89
|
+
* `onStateChange` listener plus the pool's respawn events feed it; the
|
|
90
|
+
* tracker decides whether to emit.
|
|
91
|
+
*/
|
|
92
|
+
export declare class SessionBlockerTracker {
|
|
93
|
+
private readonly threshold;
|
|
94
|
+
private readonly logger;
|
|
95
|
+
private readonly auditSink;
|
|
96
|
+
private sessionId;
|
|
97
|
+
private readonly entries;
|
|
98
|
+
constructor(sessionId: string, options?: SessionBlockerOptions, logger?: Logger, auditSink?: SessionBlockerAuditSink);
|
|
99
|
+
/**
|
|
100
|
+
* Replace the tracked session id and clear all counters. Called from the
|
|
101
|
+
* serve entry when a fresh session boots. In practice `session_id` is
|
|
102
|
+
* assigned once per process — this is here for test determinism and
|
|
103
|
+
* future multi-session transports.
|
|
104
|
+
*/
|
|
105
|
+
resetForSession(sessionId: string): void;
|
|
106
|
+
/**
|
|
107
|
+
* Feed a circuit-breaker transition. Fires a SESSION_BLOCKER record when
|
|
108
|
+
* the threshold is crossed for the first time. Subsequent opens increment
|
|
109
|
+
* the counter but do NOT re-fire until recovery resets.
|
|
110
|
+
*/
|
|
111
|
+
recordCircuitTransition(event: CircuitTransitionEvent): void;
|
|
112
|
+
/**
|
|
113
|
+
* Feed a respawn event from the supervisor. A successful respawn is NOT
|
|
114
|
+
* the same as circuit recovery — the circuit closes only after a
|
|
115
|
+
* successful probe tool call, not just after reconnect. We intentionally
|
|
116
|
+
* do nothing here so the respawn path does not mask a live outage.
|
|
117
|
+
* Exposed as a method so the wiring site is obvious at the call graph.
|
|
118
|
+
*/
|
|
119
|
+
recordRespawn(_server: string): void;
|
|
120
|
+
/**
|
|
121
|
+
* Snapshot for observability / status — the `rea status` JSON output
|
|
122
|
+
* surfaces per-server transition counts so operators can see "this one
|
|
123
|
+
* has failed twice but hasn't crossed threshold yet".
|
|
124
|
+
*/
|
|
125
|
+
snapshot(): Array<{
|
|
126
|
+
server: string;
|
|
127
|
+
open_transitions: number;
|
|
128
|
+
emitted: boolean;
|
|
129
|
+
}>;
|
|
130
|
+
private getOrCreate;
|
|
131
|
+
private fire;
|
|
132
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SESSION_BLOCKER tracker (BUG-004, 0.9.0).
|
|
3
|
+
*
|
|
4
|
+
* When a downstream MCP server fails repeatedly in a single session the
|
|
5
|
+
* operator needs one LOUD signal — not a log stream full of identical
|
|
6
|
+
* circuit-open records. This module owns the per-(session_id, server_name)
|
|
7
|
+
* counter and emits exactly one `SESSION_BLOCKER` event once a threshold is
|
|
8
|
+
* crossed; the event is replayed neither on continued failure nor on a
|
|
9
|
+
* circuit-breaker flap. Recovery (downstream returns to healthy) resets the
|
|
10
|
+
* counter and re-arms the emission.
|
|
11
|
+
*
|
|
12
|
+
* ## Why this lives separately from CircuitBreaker
|
|
13
|
+
*
|
|
14
|
+
* The circuit breaker tracks CONSECUTIVE CALL-LEVEL failures per server —
|
|
15
|
+
* it is wire-hot and opens/closes many times across a long session. The
|
|
16
|
+
* session blocker tracks OPEN-LEVEL failures per session: every
|
|
17
|
+
* circuit-open transition counts as ONE. A downstream that flaps
|
|
18
|
+
* open→closed→open three times in ten minutes is already a blocker from an
|
|
19
|
+
* operator perspective — it should be surfaced once, not muted by the
|
|
20
|
+
* breaker's own internal recoveries.
|
|
21
|
+
*
|
|
22
|
+
* ## Emission semantics
|
|
23
|
+
*
|
|
24
|
+
* - Increment on every circuit transition to `open`.
|
|
25
|
+
* - When the counter for (session, server) crosses `threshold`, fire
|
|
26
|
+
* ONE `SESSION_BLOCKER` record (structured log + audit append). The
|
|
27
|
+
* counter keeps incrementing — subsequent opens do NOT re-fire.
|
|
28
|
+
* - On circuit recovery (transition to `closed`) the counter resets and
|
|
29
|
+
* the "already emitted" flag clears; the next threshold crossing will
|
|
30
|
+
* fire a fresh record.
|
|
31
|
+
* - On session change (new session_id) every counter is dropped — a new
|
|
32
|
+
* `rea serve` instance starts fresh.
|
|
33
|
+
*
|
|
34
|
+
* ## Why audit
|
|
35
|
+
*
|
|
36
|
+
* The hash-chained audit log is the single place an operator can look for a
|
|
37
|
+
* forensic record of persistent downstream outages. A `SESSION_BLOCKER`
|
|
38
|
+
* record in the audit trail pinpoints the session + downstream that went
|
|
39
|
+
* dark, independent of whichever log sink the operator had configured.
|
|
40
|
+
*
|
|
41
|
+
* Audit appends are best-effort; a failure to write never breaks the
|
|
42
|
+
* gateway. The log-side emission happens first and unconditionally.
|
|
43
|
+
*/
|
|
44
|
+
/**
|
|
45
|
+
* Per-(session_id, server_name) SESSION_BLOCKER tracker.
|
|
46
|
+
*
|
|
47
|
+
* Stateful and single-instance per gateway process. The circuit breaker's
|
|
48
|
+
* `onStateChange` listener plus the pool's respawn events feed it; the
|
|
49
|
+
* tracker decides whether to emit.
|
|
50
|
+
*/
|
|
51
|
+
export class SessionBlockerTracker {
|
|
52
|
+
threshold;
|
|
53
|
+
logger;
|
|
54
|
+
auditSink;
|
|
55
|
+
sessionId;
|
|
56
|
+
entries = new Map();
|
|
57
|
+
constructor(sessionId, options = {}, logger, auditSink) {
|
|
58
|
+
this.threshold = Math.max(1, options.threshold ?? 3);
|
|
59
|
+
this.logger = logger;
|
|
60
|
+
this.auditSink = auditSink;
|
|
61
|
+
this.sessionId = sessionId;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Replace the tracked session id and clear all counters. Called from the
|
|
65
|
+
* serve entry when a fresh session boots. In practice `session_id` is
|
|
66
|
+
* assigned once per process — this is here for test determinism and
|
|
67
|
+
* future multi-session transports.
|
|
68
|
+
*/
|
|
69
|
+
resetForSession(sessionId) {
|
|
70
|
+
this.sessionId = sessionId;
|
|
71
|
+
this.entries.clear();
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Feed a circuit-breaker transition. Fires a SESSION_BLOCKER record when
|
|
75
|
+
* the threshold is crossed for the first time. Subsequent opens increment
|
|
76
|
+
* the counter but do NOT re-fire until recovery resets.
|
|
77
|
+
*/
|
|
78
|
+
recordCircuitTransition(event) {
|
|
79
|
+
const entry = this.getOrCreate(event.server);
|
|
80
|
+
if (event.to === 'closed') {
|
|
81
|
+
// Recovery resets state — a future threshold crossing will fire a
|
|
82
|
+
// fresh record rather than being muted by the prior one.
|
|
83
|
+
entry.openTransitions = 0;
|
|
84
|
+
entry.alreadyEmitted = false;
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
if (event.to !== 'open')
|
|
88
|
+
return;
|
|
89
|
+
entry.openTransitions += 1;
|
|
90
|
+
if (!entry.alreadyEmitted && entry.openTransitions >= this.threshold) {
|
|
91
|
+
entry.alreadyEmitted = true;
|
|
92
|
+
this.fire(event.server, entry.openTransitions);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Feed a respawn event from the supervisor. A successful respawn is NOT
|
|
97
|
+
* the same as circuit recovery — the circuit closes only after a
|
|
98
|
+
* successful probe tool call, not just after reconnect. We intentionally
|
|
99
|
+
* do nothing here so the respawn path does not mask a live outage.
|
|
100
|
+
* Exposed as a method so the wiring site is obvious at the call graph.
|
|
101
|
+
*/
|
|
102
|
+
recordRespawn(_server) {
|
|
103
|
+
// Intentional no-op. See JSDoc.
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Snapshot for observability / status — the `rea status` JSON output
|
|
107
|
+
* surfaces per-server transition counts so operators can see "this one
|
|
108
|
+
* has failed twice but hasn't crossed threshold yet".
|
|
109
|
+
*/
|
|
110
|
+
snapshot() {
|
|
111
|
+
const out = [];
|
|
112
|
+
for (const [server, state] of this.entries) {
|
|
113
|
+
out.push({
|
|
114
|
+
server,
|
|
115
|
+
open_transitions: state.openTransitions,
|
|
116
|
+
emitted: state.alreadyEmitted,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
return out;
|
|
120
|
+
}
|
|
121
|
+
getOrCreate(server) {
|
|
122
|
+
let entry = this.entries.get(server);
|
|
123
|
+
if (entry === undefined) {
|
|
124
|
+
entry = { openTransitions: 0, alreadyEmitted: false };
|
|
125
|
+
this.entries.set(server, entry);
|
|
126
|
+
}
|
|
127
|
+
return entry;
|
|
128
|
+
}
|
|
129
|
+
fire(server, count) {
|
|
130
|
+
const event = {
|
|
131
|
+
event: 'SESSION_BLOCKER',
|
|
132
|
+
session_id: this.sessionId,
|
|
133
|
+
server_name: server,
|
|
134
|
+
open_transitions: count,
|
|
135
|
+
threshold: this.threshold,
|
|
136
|
+
emitted_at: new Date().toISOString(),
|
|
137
|
+
message: `downstream "${server}" has opened the circuit ${count} time(s) in this session ` +
|
|
138
|
+
`(threshold ${this.threshold}). This is a SESSION_BLOCKER — the gateway will keep ` +
|
|
139
|
+
`routing around it, but operator attention is required to restore capacity.`,
|
|
140
|
+
};
|
|
141
|
+
// LOUD structured log at error level. This is the primary surface for
|
|
142
|
+
// live operators tailing stderr; the audit record below is the forensic
|
|
143
|
+
// companion.
|
|
144
|
+
this.logger?.error({
|
|
145
|
+
event: 'session_blocker',
|
|
146
|
+
server_name: server,
|
|
147
|
+
message: event.message,
|
|
148
|
+
session_id: this.sessionId,
|
|
149
|
+
open_transitions: count,
|
|
150
|
+
threshold: this.threshold,
|
|
151
|
+
});
|
|
152
|
+
if (this.auditSink === undefined)
|
|
153
|
+
return;
|
|
154
|
+
// Fire-and-forget: a slow audit sink must not block the circuit-state
|
|
155
|
+
// transition path. The sink itself is contracted to swallow errors.
|
|
156
|
+
void Promise.resolve()
|
|
157
|
+
.then(() => this.auditSink(event))
|
|
158
|
+
.catch(() => {
|
|
159
|
+
// All errors are already swallowed in the sink; this is a defensive
|
|
160
|
+
// catch for an unlikely sync throw on the thenable boundary.
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -213,6 +213,14 @@ pr_core_run() {
|
|
|
213
213
|
# typically unset. Default to `origin` for BUG-008 sniff consistency.
|
|
214
214
|
local argv_remote="${1:-origin}"
|
|
215
215
|
|
|
216
|
+
# 0.8.0 (#85): when REA_SKIP_CODEX_REVIEW is set, this flag flips to 1
|
|
217
|
+
# in section 5c. The protected-path Codex-audit check (section 7) then
|
|
218
|
+
# treats the requirement as satisfied — but every other gate (HALT,
|
|
219
|
+
# cross-repo guard, ref-resolution, push-review cache, blocked-paths)
|
|
220
|
+
# still runs. Full-gate bypass moved to REA_SKIP_PUSH_REVIEW a release
|
|
221
|
+
# cycle ago; this narrows REA_SKIP_CODEX_REVIEW to what its name implies.
|
|
222
|
+
local CODEX_WAIVER_ACTIVE=0
|
|
223
|
+
|
|
216
224
|
# ── 1a. Cross-repo guard (must come FIRST — before any rea-scoped check) ──
|
|
217
225
|
# BUG-012 (0.6.2) — anchor the install to the SCRIPT'S OWN LOCATION on disk.
|
|
218
226
|
# The hook knows where it lives: installed at `<root>/.claude/hooks/<name>.sh`,
|
|
@@ -653,16 +661,23 @@ pr_core_run() {
|
|
|
653
661
|
|
|
654
662
|
{
|
|
655
663
|
printf '\n'
|
|
656
|
-
printf '== CODEX REVIEW
|
|
664
|
+
printf '== CODEX REVIEW WAIVER active (REA_SKIP_CODEX_REVIEW)\n'
|
|
657
665
|
printf ' Reason: %s\n' "$SKIP_REASON"
|
|
658
666
|
printf ' Actor: %s\n' "$SKIP_ACTOR"
|
|
659
667
|
printf ' Head SHA: %s\n' "${SKIP_HEAD:-<unknown>}"
|
|
660
668
|
printf ' Audited: .rea/audit.jsonl (tool_name=codex.review.skipped)\n'
|
|
661
669
|
printf '\n'
|
|
662
|
-
printf '
|
|
670
|
+
printf ' Scope: waives the protected-path Codex-audit requirement only.\n'
|
|
671
|
+
printf ' Still active: HALT, cross-repo guard, ref-resolution,\n'
|
|
672
|
+
printf ' push-review cache. For a full-gate bypass\n'
|
|
673
|
+
# shellcheck disable=SC2016 # backticks are literal markdown in user-facing message
|
|
674
|
+
printf ' use `REA_SKIP_PUSH_REVIEW=<reason>`.\n'
|
|
675
|
+
printf '\n'
|
|
676
|
+
printf ' This is a gate weakening. The waiver receipt is written BEFORE\n'
|
|
677
|
+
printf ' this banner — seeing this banner means the audit is durable.\n'
|
|
663
678
|
printf '\n'
|
|
664
679
|
} >&2
|
|
665
|
-
|
|
680
|
+
CODEX_WAIVER_ACTIVE=1
|
|
666
681
|
fi
|
|
667
682
|
|
|
668
683
|
# ── 6. Determine source/target commits for each refspec ───────────────────
|
|
@@ -853,7 +868,13 @@ pr_core_run() {
|
|
|
853
868
|
'; then
|
|
854
869
|
local _audit="${REA_ROOT}/.rea/audit.jsonl"
|
|
855
870
|
local _codex_ok=0
|
|
856
|
-
|
|
871
|
+
# 0.8.0 (#85): Codex-only waiver satisfies this check without a real
|
|
872
|
+
# audit entry. Every other gate still ran — HALT, cross-repo guard,
|
|
873
|
+
# ref-resolution, push-review cache — and the waiver itself is
|
|
874
|
+
# already recorded in .rea/audit.jsonl as tool_name=codex.review.skipped.
|
|
875
|
+
if [[ "$CODEX_WAIVER_ACTIVE" == "1" ]]; then
|
|
876
|
+
_codex_ok=1
|
|
877
|
+
elif [[ -f "$_audit" ]]; then
|
|
857
878
|
if jq -e --arg sha "$local_sha" '
|
|
858
879
|
select(
|
|
859
880
|
.tool_name == "codex.review"
|
|
@@ -977,15 +998,38 @@ pr_core_run() {
|
|
|
977
998
|
REA_CLI_ARGS=(node "${REA_ROOT}/dist/cli/index.js")
|
|
978
999
|
fi
|
|
979
1000
|
|
|
1001
|
+
# Cache-branch derivation (Codex 0.8.0 pass-2 finding #2, pass-3 finding #1):
|
|
1002
|
+
# Use the PUSHED source ref (from pre-push stdin / bootstrap walk), not the
|
|
1003
|
+
# checkout branch. `git push origin hotfix:main` from a `feature` checkout
|
|
1004
|
+
# must look up a cache entry keyed on `hotfix`, not `feature`. Strip the
|
|
1005
|
+
# `refs/heads/` prefix.
|
|
1006
|
+
#
|
|
1007
|
+
# Fall back to the checkout branch when SOURCE_REF is:
|
|
1008
|
+
# • unset (defence-in-depth, not reached on any observed path), or
|
|
1009
|
+
# • the literal string "HEAD" — emitted by pr_resolve_argv_refspecs for a
|
|
1010
|
+
# bare `git push` with no explicit refspec. Keying a cache lookup on
|
|
1011
|
+
# "HEAD" would force a miss on every bare push; the checkout branch
|
|
1012
|
+
# name is the right lookup key for that workflow.
|
|
1013
|
+
local SOURCE_BRANCH="${SOURCE_REF#refs/heads/}"
|
|
1014
|
+
if [[ -z "$SOURCE_BRANCH" || "$SOURCE_BRANCH" == "HEAD" ]]; then
|
|
1015
|
+
SOURCE_BRANCH="$CURRENT_BRANCH"
|
|
1016
|
+
fi
|
|
1017
|
+
|
|
980
1018
|
if [[ -n "$PUSH_SHA" ]] && [[ ${#REA_CLI_ARGS[@]} -gt 0 ]]; then
|
|
981
1019
|
local CACHE_RESULT
|
|
982
|
-
CACHE_RESULT=$("${REA_CLI_ARGS[@]}" cache check "$PUSH_SHA" --branch "$
|
|
983
|
-
|
|
1020
|
+
CACHE_RESULT=$("${REA_CLI_ARGS[@]}" cache check "$PUSH_SHA" --branch "$SOURCE_BRANCH" --base "$TARGET_BRANCH" 2>/dev/null || echo '{"hit":false}')
|
|
1021
|
+
# Require BOTH hit == true AND result == "pass". A cached `fail` verdict
|
|
1022
|
+
# (Codex 0.8.0 pass-2 finding #1) must NOT satisfy the gate — cache.ts
|
|
1023
|
+
# serializes `result` verbatim, so a negative verdict would otherwise
|
|
1024
|
+
# slip through. Under the #85 narrowed semantic the cache is the ONLY
|
|
1025
|
+
# way a waiver-using operator reaches exit 0, so a permissive predicate
|
|
1026
|
+
# here would be a real security regression.
|
|
1027
|
+
if printf '%s' "$CACHE_RESULT" | jq -e '.hit == true and .result == "pass"' >/dev/null 2>&1; then
|
|
984
1028
|
local DISCORD_LIB="${REA_ROOT}/hooks/_lib/discord.sh"
|
|
985
1029
|
if [ -f "$DISCORD_LIB" ]; then
|
|
986
1030
|
# shellcheck source=/dev/null
|
|
987
1031
|
source "$DISCORD_LIB"
|
|
988
|
-
discord_notify "dev" "Push passed quality gates on \`${
|
|
1032
|
+
discord_notify "dev" "Push passed quality gates on \`${SOURCE_BRANCH}\` -- $(cd "$REA_ROOT" && git log -1 --oneline 2>/dev/null)" "green"
|
|
989
1033
|
fi
|
|
990
1034
|
exit 0
|
|
991
1035
|
fi
|
|
@@ -1006,7 +1050,7 @@ pr_core_run() {
|
|
|
1006
1050
|
printf ' 1. Spawn a code-reviewer agent to review: git diff %s..%s\n' "$MERGE_BASE" "$SOURCE_SHA"
|
|
1007
1051
|
printf ' 2. Spawn a security-engineer agent for security review\n'
|
|
1008
1052
|
printf ' 3. After both pass, cache the result:\n'
|
|
1009
|
-
printf ' rea cache set %s pass --branch %s --base %s\n' "$PUSH_SHA" "$
|
|
1053
|
+
printf ' rea cache set %s pass --branch %s --base %s\n' "$PUSH_SHA" "$SOURCE_BRANCH" "$TARGET_BRANCH"
|
|
1010
1054
|
printf '\n'
|
|
1011
1055
|
} >&2
|
|
1012
1056
|
exit 2
|
|
@@ -45,13 +45,15 @@
|
|
|
45
45
|
# generic Claude Code adapter.
|
|
46
46
|
#
|
|
47
47
|
# ── Escape hatches ────────────────────────────────────────────────────────────
|
|
48
|
-
# REA_SKIP_CODEX_REVIEW=<reason> —
|
|
49
|
-
#
|
|
48
|
+
# REA_SKIP_CODEX_REVIEW=<reason> — Codex-only waiver. Since 0.8.0 (#85)
|
|
49
|
+
# this ONLY satisfies the protected-path
|
|
50
|
+
# Codex-audit requirement. HALT, cross-
|
|
51
|
+
# repo guard, ref-resolution, and the
|
|
52
|
+
# push-review cache still run. See the
|
|
53
|
+
# authoritative docstring in
|
|
54
|
+
# `push-review-gate.sh` for the full
|
|
55
|
+
# scope description. Audit record
|
|
50
56
|
# `tool_name: "codex.review.skipped"`.
|
|
51
|
-
# Currently a whole-gate bypass (see
|
|
52
|
-
# task #85); the distinct audit tool_name
|
|
53
|
-
# keeps it from satisfying the Codex-
|
|
54
|
-
# review jq predicate.
|
|
55
57
|
# REA_SKIP_PUSH_REVIEW=<reason> — bypass the WHOLE gate for this push.
|
|
56
58
|
# Audit record
|
|
57
59
|
# `tool_name: "push.review.skipped"`.
|
|
@@ -22,24 +22,39 @@
|
|
|
22
22
|
# so in practice a consumer can wire THIS file into `.husky/pre-push` and it
|
|
23
23
|
# just works. The `-git` adapter exists for clarity of install intent.
|
|
24
24
|
#
|
|
25
|
-
# ──
|
|
26
|
-
# Env var `REA_SKIP_CODEX_REVIEW=<reason>`
|
|
27
|
-
# review requirement. Set to any non-empty
|
|
28
|
-
# recorded in the audit record (no default
|
|
29
|
-
# operator sets `REA_SKIP_CODEX_REVIEW=1` the
|
|
25
|
+
# ── Codex-only waiver: REA_SKIP_CODEX_REVIEW ─────────────────────────────────
|
|
26
|
+
# Env var `REA_SKIP_CODEX_REVIEW=<reason>` waives the Codex adversarial-
|
|
27
|
+
# review requirement (section 7 protected-path check). Set to any non-empty
|
|
28
|
+
# value; the value IS the reason recorded in the audit record (no default
|
|
29
|
+
# reason is supplied — if the operator sets `REA_SKIP_CODEX_REVIEW=1` the
|
|
30
|
+
# reason is literally "1").
|
|
30
31
|
#
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
39
|
-
#
|
|
40
|
-
#
|
|
41
|
-
#
|
|
42
|
-
#
|
|
32
|
+
# SCOPE (0.8.0, #85): Codex-only. The waiver only satisfies the
|
|
33
|
+
# protected-path Codex-audit requirement. Every other gate this hook
|
|
34
|
+
# runs still runs:
|
|
35
|
+
# • HALT (.rea/HALT) — still blocks.
|
|
36
|
+
# • Cross-repo guard — still blocks.
|
|
37
|
+
# • Ref-resolution failures — still block.
|
|
38
|
+
# • Push-review cache — a miss still falls through to section 9's general
|
|
39
|
+
# review-required block.
|
|
40
|
+
# (Blocked-paths enforcement is a separate hook on Edit/Write tiers, not
|
|
41
|
+
# this push hook — it was never gated by REA_SKIP_CODEX_REVIEW.)
|
|
42
|
+
#
|
|
43
|
+
# For a full-gate bypass, use `REA_SKIP_PUSH_REVIEW=<reason>` (section 5a).
|
|
44
|
+
# The 0.7.0 semantic (whole-gate bypass via the Codex hatch) was misleading
|
|
45
|
+
# — operators reached for REA_SKIP_CODEX_REVIEW to silence a transient
|
|
46
|
+
# Codex unavailability and accidentally bypassed every other check too.
|
|
47
|
+
# 0.8.0 narrows it to what the name implies.
|
|
48
|
+
#
|
|
49
|
+
# ORDERING: the waiver fires AFTER the HALT check but BEFORE ref-resolution.
|
|
50
|
+
# Prior to 0.7.0 the check ran inside the protected-path branch and only
|
|
51
|
+
# fired when the diff touched a protected path — which meant an operator
|
|
52
|
+
# who wanted to skip Codex review got blocked by a transient ref-resolution
|
|
53
|
+
# failure (missing remote object, unresolvable source ref, etc.) before the
|
|
54
|
+
# skip ever fired. The current ordering preserves the skip audit record
|
|
55
|
+
# even when downstream gates (ref-resolution, cache) block: the operator's
|
|
56
|
+
# commitment to waive is durable, even if the push itself is blocked on
|
|
57
|
+
# another gate.
|
|
43
58
|
#
|
|
44
59
|
# Every invocation appends a `tool_name: "codex.review.skipped"` record to
|
|
45
60
|
# `.rea/audit.jsonl` via the public audit helper. This record is intentionally
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bookedsolid/rea",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Agentic governance layer for Claude Code — policy enforcement, hook-based safety gates, audit logging, and Codex-integrated adversarial review for AI-assisted projects",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Booked Solid Technology <oss@bookedsolid.tech> (https://bookedsolid.tech)",
|