@bookedsolid/rea 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/pre-push +15 -18
- package/README.md +41 -1
- package/THREAT_MODEL.md +100 -29
- package/dist/audit/append.d.ts +21 -8
- package/dist/audit/append.js +48 -83
- package/dist/audit/fs.d.ts +68 -0
- package/dist/audit/fs.js +171 -0
- package/dist/cli/audit.d.ts +40 -0
- package/dist/cli/audit.js +205 -0
- package/dist/cli/doctor.d.ts +19 -4
- package/dist/cli/doctor.js +172 -5
- package/dist/cli/index.js +26 -1
- package/dist/cli/init.js +93 -7
- package/dist/cli/install/pre-push.d.ts +335 -0
- package/dist/cli/install/pre-push.js +2818 -0
- package/dist/cli/serve.d.ts +64 -0
- package/dist/cli/serve.js +270 -2
- package/dist/cli/status.d.ts +90 -0
- package/dist/cli/status.js +399 -0
- package/dist/cli/utils.d.ts +4 -0
- package/dist/cli/utils.js +4 -0
- package/dist/gateway/audit/rotator.d.ts +116 -0
- package/dist/gateway/audit/rotator.js +289 -0
- package/dist/gateway/circuit-breaker.d.ts +17 -0
- package/dist/gateway/circuit-breaker.js +32 -3
- package/dist/gateway/downstream-pool.d.ts +2 -1
- package/dist/gateway/downstream-pool.js +2 -2
- package/dist/gateway/downstream.d.ts +39 -3
- package/dist/gateway/downstream.js +73 -14
- package/dist/gateway/log.d.ts +122 -0
- package/dist/gateway/log.js +334 -0
- package/dist/gateway/middleware/audit.d.ts +24 -1
- package/dist/gateway/middleware/audit.js +103 -58
- package/dist/gateway/middleware/blocked-paths.d.ts +0 -9
- package/dist/gateway/middleware/blocked-paths.js +439 -67
- package/dist/gateway/middleware/injection.d.ts +218 -13
- package/dist/gateway/middleware/injection.js +433 -51
- package/dist/gateway/middleware/kill-switch.d.ts +10 -1
- package/dist/gateway/middleware/kill-switch.js +20 -1
- package/dist/gateway/observability/metrics.d.ts +125 -0
- package/dist/gateway/observability/metrics.js +321 -0
- package/dist/gateway/server.d.ts +19 -0
- package/dist/gateway/server.js +99 -15
- package/dist/policy/loader.d.ts +47 -0
- package/dist/policy/loader.js +47 -0
- package/dist/policy/profiles.d.ts +13 -0
- package/dist/policy/profiles.js +12 -0
- package/dist/policy/types.d.ts +52 -0
- package/dist/registry/fingerprint.d.ts +73 -0
- package/dist/registry/fingerprint.js +81 -0
- package/dist/registry/fingerprints-store.d.ts +62 -0
- package/dist/registry/fingerprints-store.js +111 -0
- package/dist/registry/interpolate.d.ts +58 -0
- package/dist/registry/interpolate.js +121 -0
- package/dist/registry/loader.d.ts +2 -2
- package/dist/registry/loader.js +22 -1
- package/dist/registry/tofu-gate.d.ts +41 -0
- package/dist/registry/tofu-gate.js +189 -0
- package/dist/registry/tofu.d.ts +111 -0
- package/dist/registry/tofu.js +173 -0
- package/dist/registry/types.d.ts +9 -1
- package/package.json +3 -1
- package/profiles/bst-internal-no-codex.yaml +5 -0
- package/profiles/bst-internal.yaml +7 -0
- package/scripts/tarball-smoke.sh +197 -0
package/dist/cli/serve.d.ts
CHANGED
|
@@ -1,3 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* State-file shape. `session_id` is the ownership key used by
|
|
3
|
+
* `cleanupStateIfOwned` during shutdown — a shutting-down instance
|
|
4
|
+
* that finds a different session_id in the file leaves it alone, so a
|
|
5
|
+
* later `rea serve` that has raced in and rewritten the breadcrumbs
|
|
6
|
+
* is never unexpectedly unlinked.
|
|
7
|
+
*/
|
|
8
|
+
interface ServeState {
|
|
9
|
+
session_id: string;
|
|
10
|
+
started_at: string;
|
|
11
|
+
metrics_port: number | null;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Atomic file write: stage to a per-pid temp name, then rename(2). The
|
|
15
|
+
* rename is atomic on POSIX within the same filesystem, so readers never
|
|
16
|
+
* see a half-written buffer. The unique-per-pid temp prefix ensures two
|
|
17
|
+
* overlapping `rea serve` processes don't clobber each other's stage
|
|
18
|
+
* files during the brief window between stage and rename.
|
|
19
|
+
*/
|
|
20
|
+
declare function writeFileAtomic(filePath: string, data: string): void;
|
|
21
|
+
/**
|
|
22
|
+
* Write the `.rea/serve.pid` breadcrumb atomically. `rea status` reads
|
|
23
|
+
* it and independently `kill(pid, 0)`s before trusting liveness. Stamping
|
|
24
|
+
* with `process.pid` is what lets `cleanupPidIfOwned` refuse to unlink a
|
|
25
|
+
* breadcrumb that a newer instance has already claimed.
|
|
26
|
+
*/
|
|
27
|
+
declare function writePidfile(baseDir: string): string;
|
|
28
|
+
declare function writeStateFile(baseDir: string, state: ServeState): string;
|
|
29
|
+
/**
|
|
30
|
+
* Remove the pidfile ONLY if it still carries this process's pid. A
|
|
31
|
+
* shutting-down instance that finds a newer pid leaves the breadcrumb
|
|
32
|
+
* intact so the newer instance's `rea status` users still see "running".
|
|
33
|
+
* Any read/parse error is treated as "not mine" — we never unlink a file
|
|
34
|
+
* we cannot prove we own.
|
|
35
|
+
*/
|
|
36
|
+
declare function cleanupPidIfOwned(pidPath: string): void;
|
|
37
|
+
/**
|
|
38
|
+
* Remove the state file ONLY if its `session_id` matches ours. Keyed on
|
|
39
|
+
* session id (not pid) because the state payload already carries the
|
|
40
|
+
* session; reusing that avoids a second cross-file lookup and keeps the
|
|
41
|
+
* ownership signal local to the file being deleted.
|
|
42
|
+
*/
|
|
43
|
+
declare function cleanupStateIfOwned(statePath: string, ownSessionId: string): void;
|
|
1
44
|
/**
|
|
2
45
|
* `rea serve` — start the MCP gateway.
|
|
3
46
|
*
|
|
@@ -5,8 +48,29 @@
|
|
|
5
48
|
* chain, spawns downstream children from the registry, and connects an upstream
|
|
6
49
|
* stdio MCP server that clients (Claude Code, Helix, etc.) can talk to.
|
|
7
50
|
*
|
|
51
|
+
* G5 additions:
|
|
52
|
+
* - Writes a pidfile + session state breadcrumb for `rea status`.
|
|
53
|
+
* - Boots a loopback `/metrics` HTTP endpoint when `REA_METRICS_PORT` is set.
|
|
54
|
+
* - Emits structured log records through the gateway logger.
|
|
55
|
+
*
|
|
56
|
+
* Breadcrumb race posture:
|
|
57
|
+
* - Writes are atomic (`writeFileSync` → `rename(2)`) so readers never see
|
|
58
|
+
* a half-written file.
|
|
59
|
+
* - Shutdown cleanup is ownership-aware: we only unlink `serve.pid` if its
|
|
60
|
+
* pid matches ours, and only unlink `serve.state.json` if its session_id
|
|
61
|
+
* matches ours. This prevents a second overlapping `rea serve` from
|
|
62
|
+
* losing its breadcrumbs to the first instance's SIGTERM path.
|
|
63
|
+
*
|
|
8
64
|
* Signals: SIGTERM and SIGINT both trigger a graceful shutdown. We do NOT exit
|
|
9
65
|
* on uncaughtException — that path is owned by `src/cli/index.ts`. If the
|
|
10
66
|
* gateway itself throws during startup we log and exit 1.
|
|
11
67
|
*/
|
|
12
68
|
export declare function runServe(): Promise<void>;
|
|
69
|
+
export declare const __TEST_INTERNALS: {
|
|
70
|
+
writeFileAtomic: typeof writeFileAtomic;
|
|
71
|
+
writePidfile: typeof writePidfile;
|
|
72
|
+
writeStateFile: typeof writeStateFile;
|
|
73
|
+
cleanupPidIfOwned: typeof cleanupPidIfOwned;
|
|
74
|
+
cleanupStateIfOwned: typeof cleanupStateIfOwned;
|
|
75
|
+
};
|
|
76
|
+
export {};
|
package/dist/cli/serve.js
CHANGED
|
@@ -1,8 +1,107 @@
|
|
|
1
|
+
import crypto from 'node:crypto';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
1
4
|
import { loadPolicy } from '../policy/loader.js';
|
|
2
5
|
import { loadRegistry } from '../registry/loader.js';
|
|
6
|
+
import { applyTofuGate } from '../registry/tofu-gate.js';
|
|
3
7
|
import { createGateway } from '../gateway/server.js';
|
|
4
8
|
import { CodexProbe } from '../gateway/observability/codex-probe.js';
|
|
5
|
-
import {
|
|
9
|
+
import { MetricsRegistry, resolveMetricsPort, startMetricsServer, } from '../gateway/observability/metrics.js';
|
|
10
|
+
import { buildRegexRedactor, createLogger, resolveLogLevel } from '../gateway/log.js';
|
|
11
|
+
import { SECRET_PATTERNS } from '../gateway/middleware/redact.js';
|
|
12
|
+
import { currentSessionId } from '../gateway/session.js';
|
|
13
|
+
import { HALT_FILE, POLICY_FILE, REA_DIR, REGISTRY_FILE, SERVE_PID_FILE, SERVE_STATE_FILE, err, exitWithMissingPolicy, log, reaPath, warn, } from './utils.js';
|
|
14
|
+
/**
|
|
15
|
+
* Atomic file write: stage to a per-pid temp name, then rename(2). The
|
|
16
|
+
* rename is atomic on POSIX within the same filesystem, so readers never
|
|
17
|
+
* see a half-written buffer. The unique-per-pid temp prefix ensures two
|
|
18
|
+
* overlapping `rea serve` processes don't clobber each other's stage
|
|
19
|
+
* files during the brief window between stage and rename.
|
|
20
|
+
*/
|
|
21
|
+
function writeFileAtomic(filePath, data) {
|
|
22
|
+
const dir = path.dirname(filePath);
|
|
23
|
+
const base = path.basename(filePath);
|
|
24
|
+
const tmp = path.join(dir, `.${base}.${crypto.randomUUID()}.tmp`);
|
|
25
|
+
fs.writeFileSync(tmp, data, { encoding: 'utf8', mode: 0o600 });
|
|
26
|
+
try {
|
|
27
|
+
fs.renameSync(tmp, filePath);
|
|
28
|
+
}
|
|
29
|
+
catch (e) {
|
|
30
|
+
try {
|
|
31
|
+
fs.unlinkSync(tmp);
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
/* ignored */
|
|
35
|
+
}
|
|
36
|
+
throw e;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Write the `.rea/serve.pid` breadcrumb atomically. `rea status` reads
|
|
41
|
+
* it and independently `kill(pid, 0)`s before trusting liveness. Stamping
|
|
42
|
+
* with `process.pid` is what lets `cleanupPidIfOwned` refuse to unlink a
|
|
43
|
+
* breadcrumb that a newer instance has already claimed.
|
|
44
|
+
*/
|
|
45
|
+
function writePidfile(baseDir) {
|
|
46
|
+
const reaDir = path.join(baseDir, REA_DIR);
|
|
47
|
+
if (!fs.existsSync(reaDir))
|
|
48
|
+
fs.mkdirSync(reaDir, { recursive: true });
|
|
49
|
+
const pidPath = reaPath(baseDir, SERVE_PID_FILE);
|
|
50
|
+
writeFileAtomic(pidPath, String(process.pid));
|
|
51
|
+
return pidPath;
|
|
52
|
+
}
|
|
53
|
+
function writeStateFile(baseDir, state) {
|
|
54
|
+
const p = reaPath(baseDir, SERVE_STATE_FILE);
|
|
55
|
+
writeFileAtomic(p, JSON.stringify(state, null, 2) + '\n');
|
|
56
|
+
return p;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Remove the pidfile ONLY if it still carries this process's pid. A
|
|
60
|
+
* shutting-down instance that finds a newer pid leaves the breadcrumb
|
|
61
|
+
* intact so the newer instance's `rea status` users still see "running".
|
|
62
|
+
* Any read/parse error is treated as "not mine" — we never unlink a file
|
|
63
|
+
* we cannot prove we own.
|
|
64
|
+
*/
|
|
65
|
+
function cleanupPidIfOwned(pidPath) {
|
|
66
|
+
try {
|
|
67
|
+
const raw = fs.readFileSync(pidPath, 'utf8').trim();
|
|
68
|
+
const pid = Number.parseInt(raw, 10);
|
|
69
|
+
if (pid === process.pid) {
|
|
70
|
+
try {
|
|
71
|
+
fs.unlinkSync(pidPath);
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
/* already gone */
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// Missing, unreadable, mid-rename — nothing to clean up safely.
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Remove the state file ONLY if its `session_id` matches ours. Keyed on
|
|
84
|
+
* session id (not pid) because the state payload already carries the
|
|
85
|
+
* session; reusing that avoids a second cross-file lookup and keeps the
|
|
86
|
+
* ownership signal local to the file being deleted.
|
|
87
|
+
*/
|
|
88
|
+
function cleanupStateIfOwned(statePath, ownSessionId) {
|
|
89
|
+
try {
|
|
90
|
+
const raw = fs.readFileSync(statePath, 'utf8');
|
|
91
|
+
const parsed = JSON.parse(raw);
|
|
92
|
+
if (parsed.session_id === ownSessionId) {
|
|
93
|
+
try {
|
|
94
|
+
fs.unlinkSync(statePath);
|
|
95
|
+
}
|
|
96
|
+
catch {
|
|
97
|
+
/* already gone */
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// Missing, unreadable, mid-rename — leave alone.
|
|
103
|
+
}
|
|
104
|
+
}
|
|
6
105
|
/**
|
|
7
106
|
* `rea serve` — start the MCP gateway.
|
|
8
107
|
*
|
|
@@ -10,6 +109,19 @@ import { POLICY_FILE, REGISTRY_FILE, err, exitWithMissingPolicy, log, reaPath, w
|
|
|
10
109
|
* chain, spawns downstream children from the registry, and connects an upstream
|
|
11
110
|
* stdio MCP server that clients (Claude Code, Helix, etc.) can talk to.
|
|
12
111
|
*
|
|
112
|
+
* G5 additions:
|
|
113
|
+
* - Writes a pidfile + session state breadcrumb for `rea status`.
|
|
114
|
+
* - Boots a loopback `/metrics` HTTP endpoint when `REA_METRICS_PORT` is set.
|
|
115
|
+
* - Emits structured log records through the gateway logger.
|
|
116
|
+
*
|
|
117
|
+
* Breadcrumb race posture:
|
|
118
|
+
* - Writes are atomic (`writeFileSync` → `rename(2)`) so readers never see
|
|
119
|
+
* a half-written file.
|
|
120
|
+
* - Shutdown cleanup is ownership-aware: we only unlink `serve.pid` if its
|
|
121
|
+
* pid matches ours, and only unlink `serve.state.json` if its session_id
|
|
122
|
+
* matches ours. This prevents a second overlapping `rea serve` from
|
|
123
|
+
* losing its breadcrumbs to the first instance's SIGTERM path.
|
|
124
|
+
*
|
|
13
125
|
* Signals: SIGTERM and SIGINT both trigger a graceful shutdown. We do NOT exit
|
|
14
126
|
* on uncaughtException — that path is owned by `src/cli/index.ts`. If the
|
|
15
127
|
* gateway itself throws during startup we log and exit 1.
|
|
@@ -45,7 +157,113 @@ export async function runServe() {
|
|
|
45
157
|
err(`Failed to load registry: ${message}`);
|
|
46
158
|
process.exit(1);
|
|
47
159
|
}
|
|
48
|
-
|
|
160
|
+
// ── Observability setup (G5) ─────────────────────────────────────────────
|
|
161
|
+
const sessionId = currentSessionId();
|
|
162
|
+
// Build the log redactor from both built-in SECRET_PATTERNS and any
|
|
163
|
+
// operator-defined policy.redact.patterns. This is safe because
|
|
164
|
+
// applyRedactor in log.ts hard-caps every string field to
|
|
165
|
+
// MAX_LOG_FIELD_BYTES (4096 bytes) BEFORE running any regex — so
|
|
166
|
+
// attacker-influenced error strings are already bounded when the patterns
|
|
167
|
+
// execute. The earlier exclusion of policy patterns was motivated by the
|
|
168
|
+
// risk of applying operator regexes to unbounded strings; the 4096-byte
|
|
169
|
+
// cap in applyRedactor eliminates that risk. Policy patterns are validated
|
|
170
|
+
// as safe-regex at load time (G3), so catastrophic backtracking is already
|
|
171
|
+
// prevented at the source.
|
|
172
|
+
const policyLogPatterns = (policy.redact?.patterns ?? []).map((p) => ({
|
|
173
|
+
name: p.name,
|
|
174
|
+
pattern: new RegExp(p.regex, p.flags ?? 'g'),
|
|
175
|
+
}));
|
|
176
|
+
const logRedactor = buildRegexRedactor([...SECRET_PATTERNS, ...policyLogPatterns]);
|
|
177
|
+
const logger = createLogger({
|
|
178
|
+
level: resolveLogLevel(process.env['REA_LOG_LEVEL']),
|
|
179
|
+
base: { session_id: sessionId },
|
|
180
|
+
redactField: logRedactor,
|
|
181
|
+
});
|
|
182
|
+
const metricsRegistry = new MetricsRegistry();
|
|
183
|
+
metricsRegistry.markHaltCheck();
|
|
184
|
+
const metricsPort = resolveMetricsPort(process.env['REA_METRICS_PORT'], logger);
|
|
185
|
+
let metricsServer;
|
|
186
|
+
if (metricsPort !== null) {
|
|
187
|
+
try {
|
|
188
|
+
metricsServer = await startMetricsServer({
|
|
189
|
+
port: metricsPort,
|
|
190
|
+
registry: metricsRegistry,
|
|
191
|
+
logger,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
catch (e) {
|
|
195
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
196
|
+
// We do NOT fail gateway startup because of a metrics-bind failure —
|
|
197
|
+
// observability is best-effort. Log loudly so the operator notices.
|
|
198
|
+
logger.error({
|
|
199
|
+
event: 'metrics.bind_failed',
|
|
200
|
+
message: `failed to start /metrics on port ${metricsPort}: ${message}`,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// G7: TOFU fingerprint gate. Runs BEFORE we build the downstream pool so
|
|
205
|
+
// drifted servers are filtered out at the edge. First-seen and accepted
|
|
206
|
+
// drift fire LOUD stderr + audit + log; the gateway stays up either way.
|
|
207
|
+
//
|
|
208
|
+
// We pass the FULL declared server list (enabled AND disabled) to the
|
|
209
|
+
// gate so every entry gets a fingerprint baseline on first sight.
|
|
210
|
+
// Disabled-entry escape was a real bypass: an attacker who tampered
|
|
211
|
+
// with a disabled entry got no baseline recorded, so the
|
|
212
|
+
// disabled→enabled transition always looked benign first-seen on the
|
|
213
|
+
// next boot. Fingerprinting is a pure canonicalize+sha256 operation on
|
|
214
|
+
// the registry config (no spawn), so including disabled entries is
|
|
215
|
+
// cheap and safe.
|
|
216
|
+
//
|
|
217
|
+
// The `enabled` filter is applied AFTER the gate: only enabled servers
|
|
218
|
+
// that passed the gate are handed to the downstream pool for spawn.
|
|
219
|
+
//
|
|
220
|
+
// When the registry declares zero servers there is nothing to
|
|
221
|
+
// fingerprint — skip the gate entirely to avoid a redundant disk write
|
|
222
|
+
// on zero-server installs.
|
|
223
|
+
let gatedRegistry = registry;
|
|
224
|
+
try {
|
|
225
|
+
if (registry.servers.length > 0) {
|
|
226
|
+
const { accepted } = await applyTofuGate(baseDir, registry.servers, logger);
|
|
227
|
+
const acceptedNames = new Set(accepted.map((s) => s.name));
|
|
228
|
+
gatedRegistry = {
|
|
229
|
+
...registry,
|
|
230
|
+
// Keep only entries that passed the TOFU gate. The `enabled`
|
|
231
|
+
// filter is applied downstream when the pool decides what to
|
|
232
|
+
// spawn — disabled entries that passed the gate stay in the
|
|
233
|
+
// registry so a future enable uses the already-recorded
|
|
234
|
+
// baseline instead of looking like a fresh first-seen.
|
|
235
|
+
servers: registry.servers.filter((s) => acceptedNames.has(s.name)),
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
catch (e) {
|
|
240
|
+
// Fail-closed on TOFU errors (e.g. corrupt fingerprint store). An attacker
|
|
241
|
+
// who can corrupt the store must not be able to downgrade drift detection
|
|
242
|
+
// by forcing the gateway into a "first-run" fallback. Surface the error
|
|
243
|
+
// and exit — operator can delete the store deliberately to re-bootstrap.
|
|
244
|
+
err(`TOFU gate failed: ${e instanceof Error ? e.message : e}`);
|
|
245
|
+
console.error('');
|
|
246
|
+
console.error(' To intentionally re-bootstrap the fingerprint store:');
|
|
247
|
+
console.error(' 1. Inspect .rea/fingerprints.json for tampering');
|
|
248
|
+
console.error(' 2. If safe, delete it and re-run `rea serve`');
|
|
249
|
+
console.error('');
|
|
250
|
+
process.exit(1);
|
|
251
|
+
}
|
|
252
|
+
const handle = createGateway({
|
|
253
|
+
baseDir,
|
|
254
|
+
policy,
|
|
255
|
+
registry: gatedRegistry,
|
|
256
|
+
logger,
|
|
257
|
+
metrics: metricsRegistry,
|
|
258
|
+
});
|
|
259
|
+
// ── HALT acknowledgement at startup (G5) ─────────────────────────────────
|
|
260
|
+
const haltPath = reaPath(baseDir, HALT_FILE);
|
|
261
|
+
if (fs.existsSync(haltPath)) {
|
|
262
|
+
logger.info({
|
|
263
|
+
event: 'halt.acknowledged_at_startup',
|
|
264
|
+
message: 'HALT present at startup — every tool call will be denied until `.rea/HALT` is removed',
|
|
265
|
+
});
|
|
266
|
+
}
|
|
49
267
|
// G11.3 — Codex availability probe. Observational only: a failed probe
|
|
50
268
|
// NEVER fail-closes the gateway at startup. When the policy explicitly
|
|
51
269
|
// opts out of Codex (`review.codex_required: false`), skip the probe
|
|
@@ -61,7 +279,22 @@ export async function runServe() {
|
|
|
61
279
|
}
|
|
62
280
|
codexProbe.start();
|
|
63
281
|
}
|
|
282
|
+
// ── Pidfile + state (AFTER metrics boot so we persist the real port) ─────
|
|
283
|
+
const startedAt = new Date().toISOString();
|
|
284
|
+
const pidPath = writePidfile(baseDir);
|
|
285
|
+
const statePath = writeStateFile(baseDir, {
|
|
286
|
+
session_id: sessionId,
|
|
287
|
+
started_at: startedAt,
|
|
288
|
+
metrics_port: metricsServer?.port() ?? null,
|
|
289
|
+
});
|
|
290
|
+
let shuttingDown = false;
|
|
64
291
|
const shutdown = async (signal) => {
|
|
292
|
+
// A second signal (e.g. SIGTERM then SIGINT) must NOT re-enter cleanup —
|
|
293
|
+
// `handle.stop()` is idempotent but `process.exit(0)` racing against
|
|
294
|
+
// still-running unlink calls would be messy. One-shot guard.
|
|
295
|
+
if (shuttingDown)
|
|
296
|
+
return;
|
|
297
|
+
shuttingDown = true;
|
|
65
298
|
log(`rea serve: received ${signal} — draining and shutting down`);
|
|
66
299
|
codexProbe?.stop();
|
|
67
300
|
try {
|
|
@@ -70,6 +303,20 @@ export async function runServe() {
|
|
|
70
303
|
catch (e) {
|
|
71
304
|
err(`shutdown error: ${e instanceof Error ? e.message : e}`);
|
|
72
305
|
}
|
|
306
|
+
if (metricsServer !== undefined) {
|
|
307
|
+
try {
|
|
308
|
+
await metricsServer.close();
|
|
309
|
+
}
|
|
310
|
+
catch {
|
|
311
|
+
// Best-effort
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Remove the breadcrumbs LAST and ONLY if we still own them. Another
|
|
315
|
+
// `rea serve` in the same baseDir may have rewritten them — in that
|
|
316
|
+
// case the newer instance's `rea status` users should keep seeing
|
|
317
|
+
// "running".
|
|
318
|
+
cleanupPidIfOwned(pidPath);
|
|
319
|
+
cleanupStateIfOwned(statePath, sessionId);
|
|
73
320
|
process.exit(0);
|
|
74
321
|
};
|
|
75
322
|
process.on('SIGTERM', () => void shutdown('SIGTERM'));
|
|
@@ -80,6 +327,27 @@ export async function runServe() {
|
|
|
80
327
|
}
|
|
81
328
|
catch (e) {
|
|
82
329
|
err(`gateway start failed: ${e instanceof Error ? e.message : e}`);
|
|
330
|
+
// Clean up breadcrumbs before exit — a failed startup should not leave
|
|
331
|
+
// a stale pidfile claiming we're up. Ownership-aware so we don't nuke
|
|
332
|
+
// a sibling's breadcrumbs that raced in during our failing startup.
|
|
333
|
+
cleanupPidIfOwned(pidPath);
|
|
334
|
+
cleanupStateIfOwned(statePath, sessionId);
|
|
335
|
+
if (metricsServer !== undefined) {
|
|
336
|
+
try {
|
|
337
|
+
await metricsServer.close();
|
|
338
|
+
}
|
|
339
|
+
catch {
|
|
340
|
+
/* ignored */
|
|
341
|
+
}
|
|
342
|
+
}
|
|
83
343
|
process.exit(1);
|
|
84
344
|
}
|
|
85
345
|
}
|
|
346
|
+
// Exported for unit testing (the serve entry point itself is process-global).
|
|
347
|
+
export const __TEST_INTERNALS = {
|
|
348
|
+
writeFileAtomic,
|
|
349
|
+
writePidfile,
|
|
350
|
+
writeStateFile,
|
|
351
|
+
cleanupPidIfOwned,
|
|
352
|
+
cleanupStateIfOwned,
|
|
353
|
+
};
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `rea status` — running-process introspection for `rea serve` (G5).
|
|
3
|
+
*
|
|
4
|
+
* `rea check` is the ON-DISK view: policy, HALT, recent audit entries. It
|
|
5
|
+
* works when no gateway is running.
|
|
6
|
+
*
|
|
7
|
+
* `rea status` is the LIVE view: is a gateway running for this cwd? What is
|
|
8
|
+
* its session id? What does the audit chain look like right now? Is HALT
|
|
9
|
+
* active?
|
|
10
|
+
*
|
|
11
|
+
* Detection strategy for "is serve running":
|
|
12
|
+
* 1. Read `.rea/serve.pid`.
|
|
13
|
+
* 2. If the pidfile exists, `kill(pid, 0)` to check liveness.
|
|
14
|
+
* 3. If kill throws ESRCH or EPERM, the pid is stale — treat as not-running
|
|
15
|
+
* and surface that nuance in the output.
|
|
16
|
+
*
|
|
17
|
+
* Output modes:
|
|
18
|
+
* - Default: human-pretty, matching the spacing used by `rea check`.
|
|
19
|
+
* - `--json`: canonical JSON object, composable with jq and future tooling.
|
|
20
|
+
*
|
|
21
|
+
* This command is read-only. It does NOT clean up stale pidfiles (the serve
|
|
22
|
+
* process is the only writer). It does NOT run the full audit verifier —
|
|
23
|
+
* `rea audit verify` is the authoritative check and is expensive on large
|
|
24
|
+
* chains; here we just report line count, last timestamp, and a cheap "last
|
|
25
|
+
* record's stored hash is non-empty" heuristic as an integrity smoke signal.
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* Strip every ASCII control code (C0 plus DEL) from a string. Defense
|
|
29
|
+
* against ANSI/OSC escape injection when a disk-controlled field reaches
|
|
30
|
+
* the operator's terminal via `console.log` in pretty mode.
|
|
31
|
+
*
|
|
32
|
+
* This is strict: every byte in 0x00-0x1F plus 0x7F is replaced with `?`.
|
|
33
|
+
* That drops CR/LF/TAB inside fields, which is fine — the fields this
|
|
34
|
+
* helper guards (halt_reason, session_id, started_at, last_timestamp,
|
|
35
|
+
* profile) are short identifiers or trimmed reasons, not multi-line
|
|
36
|
+
* narratives. Preserving TAB/LF would reopen the ESC+... attack surface
|
|
37
|
+
* because ANSI sequences begin with ESC (0x1B).
|
|
38
|
+
*
|
|
39
|
+
* SECURITY: Only pretty-print paths call this — JSON mode must not, since
|
|
40
|
+
* JSON.stringify already escapes control chars safely (`\u0000`), and a
|
|
41
|
+
* double-pass would corrupt legitimate audit values for downstream jq
|
|
42
|
+
* consumers.
|
|
43
|
+
*
|
|
44
|
+
* Exported so unit tests can assert the exact sanitization behavior.
|
|
45
|
+
*/
|
|
46
|
+
export declare function sanitizeForTerminal(value: string): string;
|
|
47
|
+
export interface StatusOptions {
|
|
48
|
+
json?: boolean | undefined;
|
|
49
|
+
}
|
|
50
|
+
interface ServeLiveness {
|
|
51
|
+
running: boolean;
|
|
52
|
+
pid: number | null;
|
|
53
|
+
/** When pidfile exists but the process isn't responsive. */
|
|
54
|
+
stale: boolean;
|
|
55
|
+
/** From `.rea/serve.state.json`, when present. */
|
|
56
|
+
session_id: string | null;
|
|
57
|
+
started_at: string | null;
|
|
58
|
+
metrics_port: number | null;
|
|
59
|
+
}
|
|
60
|
+
interface AuditStats {
|
|
61
|
+
present: boolean;
|
|
62
|
+
lines: number;
|
|
63
|
+
last_timestamp: string | null;
|
|
64
|
+
/** Cheap chain smoke: last record has a 64-char hex hash. NOT a full verify. */
|
|
65
|
+
tail_hash_looks_valid: boolean;
|
|
66
|
+
}
|
|
67
|
+
interface PolicySummary {
|
|
68
|
+
profile: string;
|
|
69
|
+
autonomy_level: string;
|
|
70
|
+
blocked_paths_count: number;
|
|
71
|
+
codex_required: boolean;
|
|
72
|
+
halt_active: boolean;
|
|
73
|
+
halt_reason: string | null;
|
|
74
|
+
}
|
|
75
|
+
interface StatusPayload {
|
|
76
|
+
base_dir: string;
|
|
77
|
+
serve: ServeLiveness;
|
|
78
|
+
policy: PolicySummary;
|
|
79
|
+
audit: AuditStats;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Build the canonical payload. Separate from print paths so the JSON and
|
|
83
|
+
* pretty outputs stay in lockstep.
|
|
84
|
+
*/
|
|
85
|
+
export declare function computeStatusPayload(baseDir: string): StatusPayload;
|
|
86
|
+
export declare function runStatus(options?: StatusOptions): void;
|
|
87
|
+
export declare const INTERNAL: {
|
|
88
|
+
REA_DIR: string;
|
|
89
|
+
};
|
|
90
|
+
export {};
|