@bookedsolid/rea 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.husky/pre-push +15 -18
  2. package/README.md +41 -1
  3. package/dist/cli/doctor.d.ts +19 -4
  4. package/dist/cli/doctor.js +172 -5
  5. package/dist/cli/index.js +9 -1
  6. package/dist/cli/init.js +93 -7
  7. package/dist/cli/install/pre-push.d.ts +335 -0
  8. package/dist/cli/install/pre-push.js +2818 -0
  9. package/dist/cli/serve.d.ts +64 -0
  10. package/dist/cli/serve.js +270 -2
  11. package/dist/cli/status.d.ts +90 -0
  12. package/dist/cli/status.js +399 -0
  13. package/dist/cli/utils.d.ts +4 -0
  14. package/dist/cli/utils.js +4 -0
  15. package/dist/gateway/circuit-breaker.d.ts +17 -0
  16. package/dist/gateway/circuit-breaker.js +32 -3
  17. package/dist/gateway/downstream-pool.d.ts +2 -1
  18. package/dist/gateway/downstream-pool.js +2 -2
  19. package/dist/gateway/downstream.d.ts +39 -3
  20. package/dist/gateway/downstream.js +73 -14
  21. package/dist/gateway/log.d.ts +122 -0
  22. package/dist/gateway/log.js +334 -0
  23. package/dist/gateway/middleware/audit.d.ts +10 -1
  24. package/dist/gateway/middleware/audit.js +26 -1
  25. package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
  26. package/dist/gateway/middleware/blocked-paths.js +439 -67
  27. package/dist/gateway/middleware/injection.d.ts +218 -13
  28. package/dist/gateway/middleware/injection.js +433 -51
  29. package/dist/gateway/middleware/kill-switch.d.ts +10 -1
  30. package/dist/gateway/middleware/kill-switch.js +20 -1
  31. package/dist/gateway/observability/metrics.d.ts +125 -0
  32. package/dist/gateway/observability/metrics.js +321 -0
  33. package/dist/gateway/server.d.ts +19 -0
  34. package/dist/gateway/server.js +99 -15
  35. package/dist/policy/loader.d.ts +13 -0
  36. package/dist/policy/loader.js +28 -0
  37. package/dist/policy/profiles.d.ts +13 -0
  38. package/dist/policy/profiles.js +12 -0
  39. package/dist/policy/types.d.ts +28 -0
  40. package/dist/registry/fingerprint.d.ts +73 -0
  41. package/dist/registry/fingerprint.js +81 -0
  42. package/dist/registry/fingerprints-store.d.ts +62 -0
  43. package/dist/registry/fingerprints-store.js +111 -0
  44. package/dist/registry/interpolate.d.ts +58 -0
  45. package/dist/registry/interpolate.js +121 -0
  46. package/dist/registry/loader.d.ts +2 -2
  47. package/dist/registry/loader.js +22 -1
  48. package/dist/registry/tofu-gate.d.ts +41 -0
  49. package/dist/registry/tofu-gate.js +189 -0
  50. package/dist/registry/tofu.d.ts +111 -0
  51. package/dist/registry/tofu.js +173 -0
  52. package/dist/registry/types.d.ts +9 -1
  53. package/package.json +1 -1
  54. package/profiles/bst-internal-no-codex.yaml +5 -0
  55. package/profiles/bst-internal.yaml +7 -0
  56. package/scripts/tarball-smoke.sh +197 -0
@@ -0,0 +1,399 @@
1
+ /**
2
+ * `rea status` — running-process introspection for `rea serve` (G5).
3
+ *
4
+ * `rea check` is the ON-DISK view: policy, HALT, recent audit entries. It
5
+ * works when no gateway is running.
6
+ *
7
+ * `rea status` is the LIVE view: is a gateway running for this cwd? What is
8
+ * its session id? What does the audit chain look like right now? Is HALT
9
+ * active?
10
+ *
11
+ * Detection strategy for "is serve running":
12
+ * 1. Read `.rea/serve.pid`.
13
+ * 2. If the pidfile exists, `kill(pid, 0)` to check liveness.
14
+ * 3. If kill throws ESRCH or EPERM, the pid is stale — treat as not-running
15
+ * and surface that nuance in the output.
16
+ *
17
+ * Output modes:
18
+ * - Default: human-pretty, matching the spacing used by `rea check`.
19
+ * - `--json`: canonical JSON object, composable with jq and future tooling.
20
+ *
21
+ * This command is read-only. It does NOT clean up stale pidfiles (the serve
22
+ * process is the only writer). It does NOT run the full audit verifier —
23
+ * `rea audit verify` is the authoritative check and is expensive on large
24
+ * chains; here we just report line count, last timestamp, and a cheap "last
25
+ * record's stored hash is non-empty" heuristic as an integrity smoke signal.
26
+ */
27
+ import fs from 'node:fs';
28
+ import { loadPolicy } from '../policy/loader.js';
29
+ import { AUDIT_FILE, HALT_FILE, POLICY_FILE, REA_DIR, SERVE_PID_FILE, SERVE_STATE_FILE, err, exitWithMissingPolicy, log, reaPath, } from './utils.js';
30
+ /**
31
+ * Tail window size for the audit summary. 64 KiB is more than enough to
32
+ * hold the last audit record (typical record ≪ 1 KiB) but small enough
33
+ * that reading it never spikes memory even on a multi-hundred-MB chain.
34
+ */
35
+ const AUDIT_TAIL_WINDOW_BYTES = 64 * 1024;
36
+ /**
37
+ * Strip every ASCII control code (C0 plus DEL) from a string. Defense
38
+ * against ANSI/OSC escape injection when a disk-controlled field reaches
39
+ * the operator's terminal via `console.log` in pretty mode.
40
+ *
41
+ * This is strict: every byte in 0x00-0x1F plus 0x7F is replaced with `?`.
42
+ * That drops CR/LF/TAB inside fields, which is fine — the fields this
43
+ * helper guards (halt_reason, session_id, started_at, last_timestamp,
44
+ * profile) are short identifiers or trimmed reasons, not multi-line
45
+ * narratives. Preserving TAB/LF would reopen the ESC+... attack surface
46
+ * because ANSI sequences begin with ESC (0x1B).
47
+ *
48
+ * SECURITY: Only pretty-print paths call this — JSON mode must not, since
49
+ * JSON.stringify already escapes control chars safely (`\u0000`), and a
50
+ * double-pass would corrupt legitimate audit values for downstream jq
51
+ * consumers.
52
+ *
53
+ * Exported so unit tests can assert the exact sanitization behavior.
54
+ */
55
+ export function sanitizeForTerminal(value) {
56
+ return value.replace(/[\x00-\x1f\x7f\u200b-\u200f\u202a-\u202e\u2028\u2029\u2066-\u2069]/g, '?');
57
+ }
58
+ /**
59
+ * Null-safe wrapper for {@link sanitizeForTerminal} so call sites don't
60
+ * need a ternary at every disk-sourced field.
61
+ */
62
+ function safePretty(value) {
63
+ if (value === null || value === undefined)
64
+ return null;
65
+ return sanitizeForTerminal(value);
66
+ }
67
+ /** Returns true if the OS confirms a live process at `pid`. */
68
+ function isProcessAlive(pid) {
69
+ if (!Number.isInteger(pid) || pid <= 0)
70
+ return false;
71
+ try {
72
+ // Signal 0 tests existence without delivering a signal.
73
+ process.kill(pid, 0);
74
+ return true;
75
+ }
76
+ catch (e) {
77
+ const code = e.code;
78
+ // EPERM means the process exists but belongs to another user — for our
79
+ // purposes (was-it-started-on-this-machine), that still counts as alive.
80
+ // ESRCH means no such process.
81
+ if (code === 'EPERM')
82
+ return true;
83
+ return false;
84
+ }
85
+ }
86
+ function readPidfile(baseDir) {
87
+ const p = reaPath(baseDir, SERVE_PID_FILE);
88
+ try {
89
+ const raw = fs.readFileSync(p, 'utf8').trim();
90
+ const n = Number.parseInt(raw, 10);
91
+ if (!Number.isInteger(n) || n <= 0)
92
+ return null;
93
+ return n;
94
+ }
95
+ catch {
96
+ return null;
97
+ }
98
+ }
99
+ function readServeState(baseDir) {
100
+ const p = reaPath(baseDir, SERVE_STATE_FILE);
101
+ try {
102
+ const raw = fs.readFileSync(p, 'utf8');
103
+ const parsed = JSON.parse(raw);
104
+ return {
105
+ session_id: typeof parsed.session_id === 'string' ? parsed.session_id : null,
106
+ started_at: typeof parsed.started_at === 'string' ? parsed.started_at : null,
107
+ metrics_port: typeof parsed.metrics_port === 'number' && Number.isInteger(parsed.metrics_port)
108
+ ? parsed.metrics_port
109
+ : null,
110
+ };
111
+ }
112
+ catch {
113
+ return { session_id: null, started_at: null, metrics_port: null };
114
+ }
115
+ }
116
+ function probeServe(baseDir) {
117
+ const pid = readPidfile(baseDir);
118
+ if (pid === null) {
119
+ // No pidfile — serve isn't running (at least not via `rea serve`).
120
+ return {
121
+ running: false,
122
+ pid: null,
123
+ stale: false,
124
+ session_id: null,
125
+ started_at: null,
126
+ metrics_port: null,
127
+ };
128
+ }
129
+ const alive = isProcessAlive(pid);
130
+ const state = readServeState(baseDir);
131
+ return {
132
+ running: alive,
133
+ pid,
134
+ stale: !alive,
135
+ session_id: state.session_id,
136
+ started_at: state.started_at,
137
+ metrics_port: state.metrics_port,
138
+ };
139
+ }
140
+ /**
141
+ * Count newline bytes in the file via a streaming read. O(file-size) in
142
+ * wall-clock but O(chunk-size) in memory — production chains can reach
143
+ * hundreds of MB; we must never hold the full file in a Buffer.
144
+ */
145
+ function countLinesStreaming(filePath) {
146
+ let count = 0;
147
+ let fd;
148
+ try {
149
+ fd = fs.openSync(filePath, 'r');
150
+ const buf = Buffer.alloc(64 * 1024);
151
+ let bytesRead = 0;
152
+ while ((bytesRead = fs.readSync(fd, buf, 0, buf.length, null)) > 0) {
153
+ for (let i = 0; i < bytesRead; i++) {
154
+ if (buf[i] === 0x0a)
155
+ count++;
156
+ }
157
+ }
158
+ }
159
+ catch {
160
+ // Partial result is still useful; return whatever we counted.
161
+ }
162
+ finally {
163
+ if (fd !== undefined) {
164
+ try {
165
+ fs.closeSync(fd);
166
+ }
167
+ catch {
168
+ /* ignored */
169
+ }
170
+ }
171
+ }
172
+ return count;
173
+ }
174
+ /**
175
+ * Read up to `windowBytes` from the end of the file. Uses `pread` via a
176
+ * positioned `readSync` so we never materialize more than the window into
177
+ * memory, regardless of file size. The window is intentionally generous
178
+ * (default 64 KiB) vs. a typical ~200-byte audit record so the tail line
179
+ * is always fully represented.
180
+ */
181
+ function readTailBytes(filePath, windowBytes) {
182
+ let fd;
183
+ try {
184
+ fd = fs.openSync(filePath, 'r');
185
+ const stat = fs.fstatSync(fd);
186
+ if (stat.size === 0)
187
+ return '';
188
+ const toRead = Math.min(windowBytes, stat.size);
189
+ const buf = Buffer.alloc(toRead);
190
+ const start = stat.size - toRead;
191
+ fs.readSync(fd, buf, 0, toRead, start);
192
+ return buf.toString('utf8');
193
+ }
194
+ catch {
195
+ return '';
196
+ }
197
+ finally {
198
+ if (fd !== undefined) {
199
+ try {
200
+ fs.closeSync(fd);
201
+ }
202
+ catch {
203
+ /* ignored */
204
+ }
205
+ }
206
+ }
207
+ }
208
+ /**
209
+ * Quickly compute audit stats without running the full verifier. Memory
210
+ * posture:
211
+ * - Line count is computed with a streaming newline scan (64 KiB chunk
212
+ * buffer, regardless of total file size).
213
+ * - `last_timestamp` + `tail_hash_looks_valid` come from a 64-KiB tail
214
+ * window read via `readSync` at a positive offset — we never
215
+ * materialize the full file.
216
+ *
217
+ * Missing / corrupt / empty files degrade to "present: false" or
218
+ * "lines: 0".
219
+ */
220
+ function summarizeAudit(baseDir) {
221
+ const p = reaPath(baseDir, AUDIT_FILE);
222
+ if (!fs.existsSync(p)) {
223
+ return { present: false, lines: 0, last_timestamp: null, tail_hash_looks_valid: false };
224
+ }
225
+ // Streaming line count — O(file-size) CPU, O(chunk) memory.
226
+ // NOTE: countLinesStreaming and readTailBytes open the file independently.
227
+ // A concurrent append between the two opens can produce a `lines` count
228
+ // that is one higher than the tail record implies. This is a display-only
229
+ // function; the inconsistency is cosmetic and intentionally accepted.
230
+ const lineCount = countLinesStreaming(p);
231
+ // Tail-window scan for the last JSON record. If the last window isn't
232
+ // large enough to contain a full record (extremely rare: record >64 KiB),
233
+ // we degrade gracefully — the JSON parse just fails and we emit null.
234
+ const tailWindow = readTailBytes(p, AUDIT_TAIL_WINDOW_BYTES);
235
+ if (tailWindow.length === 0) {
236
+ return {
237
+ present: true,
238
+ lines: lineCount,
239
+ last_timestamp: null,
240
+ tail_hash_looks_valid: false,
241
+ };
242
+ }
243
+ // Find the last complete line. The first line in the window may be a
244
+ // partial record (we sliced mid-line); ignore it by finding the last
245
+ // newline-terminated segment.
246
+ const windowLines = tailWindow.split('\n').filter((line) => line.length > 0);
247
+ const tail = windowLines[windowLines.length - 1];
248
+ let last_timestamp = null;
249
+ let tail_hash_looks_valid = false;
250
+ if (tail !== undefined) {
251
+ try {
252
+ const rec = JSON.parse(tail);
253
+ if (typeof rec.timestamp === 'string')
254
+ last_timestamp = rec.timestamp;
255
+ if (typeof rec.hash === 'string' && /^[0-9a-f]{64}$/i.test(rec.hash)) {
256
+ tail_hash_looks_valid = true;
257
+ }
258
+ }
259
+ catch {
260
+ // Broken last line — leave both as default.
261
+ }
262
+ }
263
+ return { present: true, lines: lineCount, last_timestamp, tail_hash_looks_valid };
264
+ }
265
+ /**
266
+ * Build the canonical payload. Separate from print paths so the JSON and
267
+ * pretty outputs stay in lockstep.
268
+ */
269
+ export function computeStatusPayload(baseDir) {
270
+ const policyPath = reaPath(baseDir, POLICY_FILE);
271
+ if (!fs.existsSync(policyPath)) {
272
+ exitWithMissingPolicy(policyPath);
273
+ }
274
+ const policy = loadPolicy(baseDir);
275
+ const haltPath = reaPath(baseDir, HALT_FILE);
276
+ const haltActive = fs.existsSync(haltPath);
277
+ let haltReason = null;
278
+ if (haltActive) {
279
+ try {
280
+ haltReason = fs.readFileSync(haltPath, 'utf8').trim();
281
+ }
282
+ catch {
283
+ haltReason = null;
284
+ }
285
+ }
286
+ return {
287
+ base_dir: baseDir,
288
+ serve: probeServe(baseDir),
289
+ policy: {
290
+ profile: policy.profile,
291
+ autonomy_level: policy.autonomy_level,
292
+ blocked_paths_count: policy.blocked_paths.length,
293
+ codex_required: policy.review?.codex_required !== false,
294
+ halt_active: haltActive,
295
+ halt_reason: haltReason,
296
+ },
297
+ audit: summarizeAudit(baseDir),
298
+ };
299
+ }
300
+ function printPretty(payload) {
301
+ // Every terminal-bound string field flows through `safePretty` or
302
+ // `sanitizeForTerminal` to prevent ANSI/OSC escape injection. This
303
+ // includes `base_dir`: although it originates from `process.cwd()`, the
304
+ // filesystem path is operator-controlled and a maliciously named directory
305
+ // can embed ESC/OSC bytes that inject terminal sequences when printed.
306
+ const p = payload.policy;
307
+ const s = payload.serve;
308
+ const a = payload.audit;
309
+ const baseDir = sanitizeForTerminal(payload.base_dir);
310
+ const profile = sanitizeForTerminal(p.profile);
311
+ const autonomy = sanitizeForTerminal(p.autonomy_level);
312
+ const haltReason = safePretty(p.halt_reason);
313
+ const sessionId = safePretty(s.session_id);
314
+ const startedAt = safePretty(s.started_at);
315
+ const lastTimestamp = safePretty(a.last_timestamp);
316
+ console.log('');
317
+ log(`Status — ${baseDir}`);
318
+ console.log('');
319
+ console.log(' Policy');
320
+ console.log(` Profile: ${profile}`);
321
+ console.log(` Autonomy: ${autonomy}`);
322
+ console.log(` Blocked paths: ${p.blocked_paths_count} entries`);
323
+ console.log(` Codex required: ${p.codex_required ? 'yes' : 'no'}`);
324
+ if (p.halt_active) {
325
+ console.log(` HALT: ACTIVE`);
326
+ if (haltReason !== null) {
327
+ console.log(` ${haltReason}`);
328
+ }
329
+ }
330
+ else {
331
+ console.log(` HALT: inactive`);
332
+ }
333
+ console.log('');
334
+ console.log(' rea serve');
335
+ if (!s.running) {
336
+ if (s.pid !== null && s.stale) {
337
+ console.log(` Running: no (stale pidfile — pid ${s.pid})`);
338
+ }
339
+ else {
340
+ console.log(` Running: no`);
341
+ }
342
+ }
343
+ else {
344
+ console.log(` Running: yes (pid ${s.pid ?? '?'})`);
345
+ if (sessionId !== null) {
346
+ console.log(` Session id: ${sessionId}`);
347
+ }
348
+ if (startedAt !== null) {
349
+ console.log(` Started at: ${startedAt}`);
350
+ }
351
+ if (s.metrics_port !== null) {
352
+ console.log(` Metrics endpoint: http://127.0.0.1:${s.metrics_port}/metrics`);
353
+ }
354
+ else {
355
+ console.log(` Metrics endpoint: disabled (set REA_METRICS_PORT to enable)`);
356
+ }
357
+ }
358
+ console.log('');
359
+ console.log(' Audit log');
360
+ if (!a.present) {
361
+ console.log(` State: not yet written`);
362
+ }
363
+ else if (a.lines === 0) {
364
+ console.log(` State: empty`);
365
+ }
366
+ else {
367
+ console.log(` Lines: ${a.lines}`);
368
+ if (lastTimestamp !== null) {
369
+ console.log(` Last record at: ${lastTimestamp}`);
370
+ }
371
+ console.log(` Tail hash: ${a.tail_hash_looks_valid ? 'looks valid' : 'unexpected shape — run `rea audit verify`'}`);
372
+ }
373
+ console.log('');
374
+ }
375
+ function printJson(payload) {
376
+ process.stdout.write(JSON.stringify(payload, null, 2) + '\n');
377
+ }
378
+ export function runStatus(options = {}) {
379
+ const baseDir = process.cwd();
380
+ let payload;
381
+ try {
382
+ payload = computeStatusPayload(baseDir);
383
+ }
384
+ catch (e) {
385
+ // `exitWithMissingPolicy` already handles the missing-policy path; any
386
+ // other loadPolicy error reaches here.
387
+ err(`Failed to build status: ${e instanceof Error ? e.message : String(e)}`);
388
+ process.exit(1);
389
+ }
390
+ if (options.json === true) {
391
+ printJson(payload);
392
+ }
393
+ else {
394
+ printPretty(payload);
395
+ }
396
+ }
397
+ // Exported so tests can construct the expected directory without duplicating
398
+ // the path segment.
399
+ export const INTERNAL = { REA_DIR };
@@ -9,6 +9,10 @@ export declare const POLICY_FILE = "policy.yaml";
9
9
  export declare const REGISTRY_FILE = "registry.yaml";
10
10
  export declare const HALT_FILE = "HALT";
11
11
  export declare const AUDIT_FILE = "audit.jsonl";
12
+ /** Pidfile written by `rea serve` for `rea status` introspection (G5). */
13
+ export declare const SERVE_PID_FILE = "serve.pid";
14
+ /** State file written by `rea serve` carrying session_id + start metadata (G5). */
15
+ export declare const SERVE_STATE_FILE = "serve.state.json";
12
16
  export declare function reaPath(baseDir: string, ...segments: string[]): string;
13
17
  /**
14
18
  * Standard log prefix so users notice the transition from reagent → rea.
package/dist/cli/utils.js CHANGED
@@ -23,6 +23,10 @@ export const POLICY_FILE = 'policy.yaml';
23
23
  export const REGISTRY_FILE = 'registry.yaml';
24
24
  export const HALT_FILE = 'HALT';
25
25
  export const AUDIT_FILE = 'audit.jsonl';
26
+ /** Pidfile written by `rea serve` for `rea status` introspection (G5). */
27
+ export const SERVE_PID_FILE = 'serve.pid';
28
+ /** State file written by `rea serve` carrying session_id + start metadata (G5). */
29
+ export const SERVE_STATE_FILE = 'serve.state.json';
26
30
  export function reaPath(baseDir, ...segments) {
27
31
  return path.join(baseDir, REA_DIR, ...segments);
28
32
  }
@@ -1,9 +1,24 @@
1
1
  export type CircuitState = 'closed' | 'open' | 'half-open';
2
+ /**
3
+ * Callback invoked on every circuit state transition (G5). The constructor
4
+ * can wire this to a structured logger and/or a metrics gauge so state
5
+ * changes are observable without requiring the breaker itself to depend on
6
+ * those modules.
7
+ */
8
+ export type CircuitStateChangeListener = (event: {
9
+ server: string;
10
+ from: CircuitState;
11
+ to: CircuitState;
12
+ reason: 'failure_threshold' | 'cooldown_elapsed' | 'recovered' | 'half_open_failed';
13
+ retryAt?: string;
14
+ }) => void;
2
15
  export interface CircuitBreakerOptions {
3
16
  /** Consecutive failures before opening the circuit. Default: 5 */
4
17
  failureThreshold?: number;
5
18
  /** Milliseconds to wait in open state before moving to half-open. Default: 30_000 */
6
19
  cooldownMs?: number;
20
+ /** Optional listener for state transitions. See {@link CircuitStateChangeListener}. */
21
+ onStateChange?: CircuitStateChangeListener;
7
22
  }
8
23
  export interface CircuitStatus {
9
24
  state: CircuitState;
@@ -29,7 +44,9 @@ interface CircuitEntry {
29
44
  export declare class CircuitBreaker {
30
45
  private circuits;
31
46
  private defaultOptions;
47
+ private readonly onStateChange;
32
48
  constructor(defaults?: CircuitBreakerOptions);
49
+ private notify;
33
50
  private getOrCreate;
34
51
  /**
35
52
  * Returns null if the call may proceed, or a CircuitStatus if the circuit is open.
@@ -10,11 +10,23 @@
10
10
  export class CircuitBreaker {
11
11
  circuits = new Map();
12
12
  defaultOptions;
13
+ onStateChange;
13
14
  constructor(defaults = {}) {
14
15
  this.defaultOptions = {
15
16
  failureThreshold: defaults.failureThreshold ?? 5,
16
17
  cooldownMs: defaults.cooldownMs ?? 30_000,
17
18
  };
19
+ this.onStateChange = defaults.onStateChange;
20
+ }
21
+ notify(event) {
22
+ if (this.onStateChange === undefined)
23
+ return;
24
+ try {
25
+ this.onStateChange(event);
26
+ }
27
+ catch {
28
+ // Listeners must never break the breaker. Swallow.
29
+ }
18
30
  }
19
31
  getOrCreate(serverName) {
20
32
  let entry = this.circuits.get(serverName);
@@ -43,7 +55,12 @@ export class CircuitBreaker {
43
55
  if (elapsed >= entry.cooldownMs) {
44
56
  entry.state = 'half-open';
45
57
  entry.consecutiveFailures = 0;
46
- console.error(`[rea] circuit-breaker: "${serverName}" transitioned open → half-open (probing recovery)`);
58
+ this.notify({
59
+ server: serverName,
60
+ from: 'open',
61
+ to: 'half-open',
62
+ reason: 'cooldown_elapsed',
63
+ });
47
64
  return null;
48
65
  }
49
66
  const retryAt = new Date((entry.openedAt ?? 0) + entry.cooldownMs).toISOString();
@@ -61,7 +78,12 @@ export class CircuitBreaker {
61
78
  entry.state = 'closed';
62
79
  entry.consecutiveFailures = 0;
63
80
  entry.openedAt = null;
64
- console.error(`[rea] circuit-breaker: "${serverName}" recovered — circuit closed`);
81
+ this.notify({
82
+ server: serverName,
83
+ from: 'half-open',
84
+ to: 'closed',
85
+ reason: 'recovered',
86
+ });
65
87
  }
66
88
  else if (entry.state === 'closed') {
67
89
  entry.consecutiveFailures = 0;
@@ -71,13 +93,20 @@ export class CircuitBreaker {
71
93
  const entry = this.getOrCreate(serverName);
72
94
  if (entry.state === 'open')
73
95
  return;
96
+ const previous = entry.state;
74
97
  entry.consecutiveFailures++;
75
98
  const shouldOpen = entry.state === 'half-open' || entry.consecutiveFailures >= entry.failureThreshold;
76
99
  if (shouldOpen) {
77
100
  entry.state = 'open';
78
101
  entry.openedAt = Date.now();
79
102
  const retryAt = new Date(entry.openedAt + entry.cooldownMs).toISOString();
80
- console.error(`[rea] circuit-breaker: "${serverName}" OPENED after ${entry.consecutiveFailures} failure(s) — will retry at ${retryAt}`);
103
+ this.notify({
104
+ server: serverName,
105
+ from: previous,
106
+ to: 'open',
107
+ reason: previous === 'half-open' ? 'half_open_failed' : 'failure_threshold',
108
+ retryAt,
109
+ });
81
110
  }
82
111
  }
83
112
  getCircuit(serverName) {
@@ -7,6 +7,7 @@
7
7
  */
8
8
  import { DownstreamConnection, type DownstreamToolInfo } from './downstream.js';
9
9
  import type { Registry } from '../registry/types.js';
10
+ import type { Logger } from './log.js';
10
11
  export interface PrefixedTool extends DownstreamToolInfo {
11
12
  /** Server name, not prefixed. */
12
13
  server: string;
@@ -15,7 +16,7 @@ export interface PrefixedTool extends DownstreamToolInfo {
15
16
  }
16
17
  export declare class DownstreamPool {
17
18
  private readonly connections;
18
- constructor(registry: Registry);
19
+ constructor(registry: Registry, logger?: Logger);
19
20
  get size(): number;
20
21
  connectAll(): Promise<void>;
21
22
  /**
@@ -8,11 +8,11 @@
8
8
  import { DownstreamConnection } from './downstream.js';
9
9
  export class DownstreamPool {
10
10
  connections = new Map();
11
- constructor(registry) {
11
+ constructor(registry, logger) {
12
12
  for (const server of registry.servers) {
13
13
  if (!server.enabled)
14
14
  continue;
15
- this.connections.set(server.name, new DownstreamConnection(server));
15
+ this.connections.set(server.name, new DownstreamConnection(server, logger));
16
16
  }
17
17
  }
18
18
  get size() {
@@ -36,6 +36,7 @@
36
36
  * a transport error could double-post. We leave the decision to the caller.
37
37
  */
38
38
  import type { RegistryServer } from '../registry/types.js';
39
+ import type { Logger } from './log.js';
39
40
  export interface DownstreamToolInfo {
40
41
  name: string;
41
42
  description?: string;
@@ -43,15 +44,44 @@ export interface DownstreamToolInfo {
43
44
  }
44
45
  /**
45
46
  * Build the child env by layering:
46
- * allowlist → registry env_passthrough → registry env.
47
+ * allowlist → registry env_passthrough → interpolated registry env.
47
48
  * Later entries win. Missing host values are skipped so `process.env[name]`
48
49
  * being undefined does not serialize as the literal string "undefined".
49
50
  *
51
+ * The explicit `env:` map may contain `${VAR}` placeholders (see
52
+ * `registry/interpolate.ts` for the exact grammar). Placeholders referencing
53
+ * unset host vars are returned via the `missing` array — the caller MUST
54
+ * refuse to spawn the server if `missing.length > 0`, otherwise the child
55
+ * receives unresolved `${...}` strings which are nearly always wrong.
56
+ *
50
57
  * Exported for testing.
51
58
  */
52
- export declare function buildChildEnv(config: RegistryServer, hostEnv?: NodeJS.ProcessEnv): Record<string, string>;
59
+ export interface BuiltChildEnv {
60
+ /** Fully resolved env to pass to the child transport. */
61
+ env: Record<string, string>;
62
+ /**
63
+ * Names of `${VAR}` references that were not set in `hostEnv`. When
64
+ * non-empty, the caller MUST NOT spawn the child — mark the connection
65
+ * unhealthy and log each entry.
66
+ */
67
+ missing: string[];
68
+ /**
69
+ * Keys in `env` whose value is secret-bearing (either because the key
70
+ * name matches the secret-name heuristic, or because one of its
71
+ * interpolated `${VAR}` references did). Callers MUST NOT log the
72
+ * corresponding values.
73
+ */
74
+ secretKeys: string[];
75
+ }
76
+ export declare function buildChildEnv(config: RegistryServer, hostEnv?: NodeJS.ProcessEnv): BuiltChildEnv;
53
77
  export declare class DownstreamConnection {
54
78
  private readonly config;
79
+ /**
80
+ * Optional structured logger (G5). When omitted, connection lifecycle
81
+ * events are simply not logged — keeping the class usable in unit tests
82
+ * that don't care about observability.
83
+ */
84
+ private readonly logger?;
55
85
  private client;
56
86
  /**
57
87
  * Whether a reconnect has already been attempted in the CURRENT failure
@@ -63,7 +93,13 @@ export declare class DownstreamConnection {
63
93
  /** Epoch ms of the last successful reconnect. Used by the flapping guard. */
64
94
  private lastReconnectAt;
65
95
  private health;
66
- constructor(config: RegistryServer);
96
+ constructor(config: RegistryServer,
97
+ /**
98
+ * Optional structured logger (G5). When omitted, connection lifecycle
99
+ * events are simply not logged — keeping the class usable in unit tests
100
+ * that don't care about observability.
101
+ */
102
+ logger?: Logger | undefined);
67
103
  get name(): string;
68
104
  get isHealthy(): boolean;
69
105
  connect(): Promise<void>;