@bookedsolid/rea 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audit/append.js +12 -1
- package/dist/cache/review-cache.d.ts +115 -0
- package/dist/cache/review-cache.js +200 -0
- package/dist/cli/cache.d.ts +52 -0
- package/dist/cli/cache.js +112 -0
- package/dist/cli/doctor.d.ts +27 -0
- package/dist/cli/doctor.js +85 -3
- package/dist/cli/index.js +41 -0
- package/dist/cli/init.js +16 -0
- package/dist/cli/install/gitignore.d.ts +114 -0
- package/dist/cli/install/gitignore.js +356 -0
- package/dist/cli/upgrade.js +20 -0
- package/dist/gateway/downstream-pool.d.ts +34 -0
- package/dist/gateway/downstream-pool.js +37 -0
- package/dist/gateway/downstream.d.ts +11 -0
- package/dist/gateway/downstream.js +36 -5
- package/dist/gateway/meta/health.d.ts +117 -0
- package/dist/gateway/meta/health.js +108 -0
- package/dist/gateway/server.js +109 -12
- package/dist/policy/loader.d.ts +10 -0
- package/dist/policy/loader.js +2 -0
- package/dist/policy/types.d.ts +20 -0
- package/hooks/push-review-gate.sh +185 -1
- package/package.json +1 -1
|
@@ -8,6 +8,13 @@
|
|
|
8
8
|
import { DownstreamConnection } from './downstream.js';
|
|
9
9
|
export class DownstreamPool {
|
|
10
10
|
connections = new Map();
|
|
11
|
+
/**
|
|
12
|
+
* Cached tool counts from the most recent successful `listAllTools` cycle,
|
|
13
|
+
* keyed by server name. Surfaced via `healthSnapshot()` so the meta-tool
|
|
14
|
+
* can report per-server counts even when the current listing pass fails
|
|
15
|
+
* or is skipped. Stale but truthful > absent.
|
|
16
|
+
*/
|
|
17
|
+
lastToolsCount = new Map();
|
|
11
18
|
constructor(registry, logger) {
|
|
12
19
|
for (const server of registry.servers) {
|
|
13
20
|
if (!server.enabled)
|
|
@@ -45,6 +52,7 @@ export class DownstreamPool {
|
|
|
45
52
|
continue;
|
|
46
53
|
try {
|
|
47
54
|
const tools = await conn.listTools();
|
|
55
|
+
this.lastToolsCount.set(server, tools.length);
|
|
48
56
|
for (const t of tools) {
|
|
49
57
|
const prefixed = {
|
|
50
58
|
...t,
|
|
@@ -60,6 +68,35 @@ export class DownstreamPool {
|
|
|
60
68
|
}
|
|
61
69
|
return out;
|
|
62
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Snapshot per-server connection state for the `__rea__health` meta-tool.
|
|
73
|
+
* Pure / non-blocking — no MCP I/O — so it can be called while HALT is
|
|
74
|
+
* active or while other tool calls are in-flight.
|
|
75
|
+
*/
|
|
76
|
+
healthSnapshot() {
|
|
77
|
+
const out = [];
|
|
78
|
+
for (const [name, conn] of this.connections) {
|
|
79
|
+
const cached = this.lastToolsCount.get(name);
|
|
80
|
+
const connected = conn.isConnected;
|
|
81
|
+
const healthy = conn.isHealthy;
|
|
82
|
+
// Only surface the cached tool count when the connection is BOTH
|
|
83
|
+
// connected AND healthy right now. Codex F1 caught that a dead
|
|
84
|
+
// downstream was showing its last-successful count alongside
|
|
85
|
+
// `healthy: false`, which is a worse-than-null diagnostic — operators
|
|
86
|
+
// would read "5 tools reachable" from a server that is reachable
|
|
87
|
+
// through exactly zero tools.
|
|
88
|
+
const tools_count = connected && healthy && typeof cached === 'number' ? cached : null;
|
|
89
|
+
out.push({
|
|
90
|
+
name,
|
|
91
|
+
enabled: true,
|
|
92
|
+
connected,
|
|
93
|
+
healthy,
|
|
94
|
+
last_error: conn.lastError,
|
|
95
|
+
tools_count,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
return out;
|
|
99
|
+
}
|
|
63
100
|
/**
|
|
64
101
|
* Split a prefixed tool name and dispatch. Returns the raw result from the
|
|
65
102
|
* downstream (the gateway response handler shapes it for the upstream reply).
|
|
@@ -93,6 +93,13 @@ export declare class DownstreamConnection {
|
|
|
93
93
|
/** Epoch ms of the last successful reconnect. Used by the flapping guard. */
|
|
94
94
|
private lastReconnectAt;
|
|
95
95
|
private health;
|
|
96
|
+
/**
|
|
97
|
+
* The most recent error observed on this connection (connect or call
|
|
98
|
+
* failure). Surfaced via `__rea__health` so callers can diagnose an empty
|
|
99
|
+
* tool catalog without digging through stderr logs. Set to `null` after a
|
|
100
|
+
* successful connect/reconnect.
|
|
101
|
+
*/
|
|
102
|
+
private lastErrorMessage;
|
|
96
103
|
constructor(config: RegistryServer,
|
|
97
104
|
/**
|
|
98
105
|
* Optional structured logger (G5). When omitted, connection lifecycle
|
|
@@ -102,6 +109,10 @@ export declare class DownstreamConnection {
|
|
|
102
109
|
logger?: Logger | undefined);
|
|
103
110
|
get name(): string;
|
|
104
111
|
get isHealthy(): boolean;
|
|
112
|
+
/** True iff the underlying MCP client is currently connected. */
|
|
113
|
+
get isConnected(): boolean;
|
|
114
|
+
/** Last error observed, or null if the connection has never failed (or fully recovered). */
|
|
115
|
+
get lastError(): string | null;
|
|
105
116
|
connect(): Promise<void>;
|
|
106
117
|
listTools(): Promise<DownstreamToolInfo[]>;
|
|
107
118
|
/**
|
|
@@ -107,6 +107,13 @@ export class DownstreamConnection {
|
|
|
107
107
|
/** Epoch ms of the last successful reconnect. Used by the flapping guard. */
|
|
108
108
|
lastReconnectAt = 0;
|
|
109
109
|
health = 'healthy';
|
|
110
|
+
/**
|
|
111
|
+
* The most recent error observed on this connection (connect or call
|
|
112
|
+
* failure). Surfaced via `__rea__health` so callers can diagnose an empty
|
|
113
|
+
* tool catalog without digging through stderr logs. Set to `null` after a
|
|
114
|
+
* successful connect/reconnect.
|
|
115
|
+
*/
|
|
116
|
+
lastErrorMessage = null;
|
|
110
117
|
constructor(config,
|
|
111
118
|
/**
|
|
112
119
|
* Optional structured logger (G5). When omitted, connection lifecycle
|
|
@@ -123,6 +130,14 @@ export class DownstreamConnection {
|
|
|
123
130
|
get isHealthy() {
|
|
124
131
|
return this.health !== 'unhealthy';
|
|
125
132
|
}
|
|
133
|
+
/** True iff the underlying MCP client is currently connected. */
|
|
134
|
+
get isConnected() {
|
|
135
|
+
return this.client !== null;
|
|
136
|
+
}
|
|
137
|
+
/** Last error observed, or null if the connection has never failed (or fully recovered). */
|
|
138
|
+
get lastError() {
|
|
139
|
+
return this.lastErrorMessage;
|
|
140
|
+
}
|
|
126
141
|
async connect() {
|
|
127
142
|
if (this.client !== null)
|
|
128
143
|
return;
|
|
@@ -143,10 +158,13 @@ export class DownstreamConnection {
|
|
|
143
158
|
}
|
|
144
159
|
catch (err) {
|
|
145
160
|
this.health = 'unhealthy';
|
|
146
|
-
|
|
161
|
+
const msg = `failed to resolve env for downstream "${this.config.name}": ${err instanceof Error ? err.message : err}`;
|
|
162
|
+
this.lastErrorMessage = msg;
|
|
163
|
+
throw new Error(msg);
|
|
147
164
|
}
|
|
148
165
|
if (built.missing.length > 0) {
|
|
149
166
|
this.health = 'unhealthy';
|
|
167
|
+
this.lastErrorMessage = `missing env: ${built.missing.join(', ')}`;
|
|
150
168
|
// One line per missing var so grep/jq users can find the exact gap.
|
|
151
169
|
// We intentionally do NOT log the env key name's VALUE (there is none —
|
|
152
170
|
// it's unresolved) nor any other env values.
|
|
@@ -166,10 +184,13 @@ export class DownstreamConnection {
|
|
|
166
184
|
await client.connect(transport);
|
|
167
185
|
this.client = client;
|
|
168
186
|
this.health = 'healthy';
|
|
187
|
+
this.lastErrorMessage = null;
|
|
169
188
|
}
|
|
170
189
|
catch (err) {
|
|
171
190
|
this.health = 'unhealthy';
|
|
172
|
-
|
|
191
|
+
const msg = `failed to connect to downstream "${this.config.name}" (${this.config.command}): ${err instanceof Error ? err.message : err}`;
|
|
192
|
+
this.lastErrorMessage = msg;
|
|
193
|
+
throw new Error(msg);
|
|
173
194
|
}
|
|
174
195
|
}
|
|
175
196
|
async listTools() {
|
|
@@ -190,7 +211,13 @@ export class DownstreamConnection {
|
|
|
190
211
|
await this.connect();
|
|
191
212
|
}
|
|
192
213
|
try {
|
|
193
|
-
|
|
214
|
+
const result = await this.client.callTool({ name: toolName, arguments: args });
|
|
215
|
+
// Clear any lingering error from a previous transient failure. Without
|
|
216
|
+
// this, a connection that failed once and then recovered on the very
|
|
217
|
+
// next call (same client, no reconnect) would forever report the old
|
|
218
|
+
// error via `__rea__health`, misleading operators about live state.
|
|
219
|
+
this.lastErrorMessage = null;
|
|
220
|
+
return result;
|
|
194
221
|
}
|
|
195
222
|
catch (err) {
|
|
196
223
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -212,6 +239,7 @@ export class DownstreamConnection {
|
|
|
212
239
|
// stamp the reconnect time so flap-guard can refuse rapid repeats.
|
|
213
240
|
this.reconnectAttempted = false;
|
|
214
241
|
this.lastReconnectAt = Date.now();
|
|
242
|
+
this.lastErrorMessage = null;
|
|
215
243
|
this.logger?.info({
|
|
216
244
|
event: 'downstream.reconnected',
|
|
217
245
|
server_name: this.config.name,
|
|
@@ -221,16 +249,19 @@ export class DownstreamConnection {
|
|
|
221
249
|
}
|
|
222
250
|
catch (reconnectErr) {
|
|
223
251
|
this.health = 'unhealthy';
|
|
252
|
+
const errMsg = reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr);
|
|
253
|
+
this.lastErrorMessage = errMsg;
|
|
224
254
|
this.logger?.error({
|
|
225
255
|
event: 'downstream.reconnect_failed',
|
|
226
256
|
server_name: this.config.name,
|
|
227
257
|
message: `downstream "${this.config.name}" unhealthy after one reconnect`,
|
|
228
|
-
error:
|
|
258
|
+
error: errMsg,
|
|
229
259
|
});
|
|
230
|
-
throw new Error(`downstream "${this.config.name}" unhealthy after one reconnect: ${
|
|
260
|
+
throw new Error(`downstream "${this.config.name}" unhealthy after one reconnect: ${errMsg}`);
|
|
231
261
|
}
|
|
232
262
|
}
|
|
233
263
|
this.health = 'unhealthy';
|
|
264
|
+
this.lastErrorMessage = message;
|
|
234
265
|
this.logger?.error({
|
|
235
266
|
event: 'downstream.call_failed',
|
|
236
267
|
server_name: this.config.name,
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gateway-internal `__rea__health` meta-tool.
|
|
3
|
+
*
|
|
4
|
+
* WHY THIS EXISTS
|
|
5
|
+
* ===============
|
|
6
|
+
*
|
|
7
|
+
* The MCP `listTools` catalog the gateway advertises is the UNION of every
|
|
8
|
+
* healthy downstream's own catalog. When all downstreams are unhealthy — or
|
|
9
|
+
* the registry is empty, or fingerprints fail, or an env var is missing — the
|
|
10
|
+
* catalog is empty. From the LLM's perspective this is indistinguishable from
|
|
11
|
+
* a gateway that came up fine but happens to have nothing to proxy, and there
|
|
12
|
+
* is no tool it can call to ask "why is this empty?" because, well, the
|
|
13
|
+
* catalog is empty.
|
|
14
|
+
*
|
|
15
|
+
* This meta-tool closes that diagnostic gap: the gateway ALWAYS exposes
|
|
16
|
+
* `__rea__health` regardless of downstream state, the kill-switch, or the
|
|
17
|
+
* middleware chain. A caller can invoke it to get a snapshot of every
|
|
18
|
+
* registered server's connection state, last error, and tool count.
|
|
19
|
+
*
|
|
20
|
+
* DESIGN CHOICES
|
|
21
|
+
* --------------
|
|
22
|
+
*
|
|
23
|
+
* 1. Name shape: `__rea__health`. The leading `__` (instead of a normal
|
|
24
|
+
* `<server>__<tool>` prefix) reserves the namespace for gateway-internal
|
|
25
|
+
* tools. It never collides with a registered server because
|
|
26
|
+
* `src/registry/loader.ts` restricts `name` to `^[a-z0-9][a-z0-9-]*$` —
|
|
27
|
+
* no underscores allowed.
|
|
28
|
+
*
|
|
29
|
+
* 2. Short-circuit in `server.ts`: the CallTool handler matches on the
|
|
30
|
+
* constant below BEFORE calling `splitPrefixed`, and responds directly
|
|
31
|
+
* without running the middleware chain. Reasons, ordered:
|
|
32
|
+
* (a) This tool must be callable while HALT is present — otherwise the
|
|
33
|
+
* operator can't introspect a frozen gateway.
|
|
34
|
+
* (b) Tier middleware would classify `health` as Write (default for
|
|
35
|
+
* unlisted names) and deny L0 callers — wrong for read-only
|
|
36
|
+
* introspection.
|
|
37
|
+
* (c) There is no downstream to dispatch to — the entire middleware
|
|
38
|
+
* chain is about getting to one safely.
|
|
39
|
+
* The short-circuit still writes an audit record via `appendAuditRecord`
|
|
40
|
+
* so invocations remain accountable.
|
|
41
|
+
*
|
|
42
|
+
* 3. Never throws. Health is the one tool the caller uses when things are
|
|
43
|
+
* broken. Every field is best-effort; a missing value is surfaced as
|
|
44
|
+
* `null`, not as an exception.
|
|
45
|
+
*/
|
|
46
|
+
import type { Policy } from '../../policy/types.js';
|
|
47
|
+
import type { DownstreamHealth } from '../downstream-pool.js';
|
|
48
|
+
/** Canonical MCP tool name exposed by the gateway. */
|
|
49
|
+
export declare const META_HEALTH_TOOL_NAME = "__rea__health";
|
|
50
|
+
/** `server_name` recorded in audit entries for this meta-tool. */
|
|
51
|
+
export declare const META_SERVER_NAME = "__rea__";
|
|
52
|
+
/** `tool_name` recorded in audit entries for this meta-tool. */
|
|
53
|
+
export declare const META_TOOL_NAME = "health";
|
|
54
|
+
export interface MetaHealthSnapshot {
|
|
55
|
+
/** rea gateway version (from package.json, pinned to the shipped version). */
|
|
56
|
+
gateway: {
|
|
57
|
+
version: string;
|
|
58
|
+
/** Seconds since gateway process started. */
|
|
59
|
+
uptime_s: number;
|
|
60
|
+
/** Whether `.rea/HALT` is present. */
|
|
61
|
+
halt: boolean;
|
|
62
|
+
/** When true, the health tool is the only callable tool right now. */
|
|
63
|
+
halt_reason: string | null;
|
|
64
|
+
};
|
|
65
|
+
policy: {
|
|
66
|
+
profile: string;
|
|
67
|
+
autonomy_level: string;
|
|
68
|
+
max_autonomy_level: string;
|
|
69
|
+
block_ai_attribution: boolean;
|
|
70
|
+
blocked_paths_count: number;
|
|
71
|
+
};
|
|
72
|
+
/** Per-downstream state. Empty array iff the registry is empty. */
|
|
73
|
+
downstreams: DownstreamHealth[];
|
|
74
|
+
/** Rolled-up counts the LLM can act on without walking the array. */
|
|
75
|
+
summary: {
|
|
76
|
+
registered: number;
|
|
77
|
+
connected: number;
|
|
78
|
+
healthy: number;
|
|
79
|
+
total_tools: number;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
export interface BuildHealthSnapshotDeps {
|
|
83
|
+
/** Gateway version (so we can test deterministically without reading package.json). */
|
|
84
|
+
gatewayVersion: string;
|
|
85
|
+
/** Gateway boot time in epoch ms. `uptime_s` is computed from this. */
|
|
86
|
+
startedAtMs: number;
|
|
87
|
+
/** Frozen policy snapshot — we do not re-read `.rea/policy.yaml` here. */
|
|
88
|
+
policy: Policy;
|
|
89
|
+
/** Per-downstream state from the pool. */
|
|
90
|
+
downstreams: DownstreamHealth[];
|
|
91
|
+
/** Whether `.rea/HALT` is present at snapshot time. */
|
|
92
|
+
halt: boolean;
|
|
93
|
+
/**
|
|
94
|
+
* HALT reason, if any. `null` when HALT is absent OR when the file exists
|
|
95
|
+
* but the caller couldn't read its contents — we never surface an I/O
|
|
96
|
+
* exception through this tool.
|
|
97
|
+
*/
|
|
98
|
+
haltReason: string | null;
|
|
99
|
+
/** Current epoch ms. Injected for determinism in tests. */
|
|
100
|
+
nowMs?: number;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Pure function that builds the snapshot from injected state. All I/O happens
|
|
104
|
+
* in the caller (`server.ts`) — keeps this testable and keeps "health never
|
|
105
|
+
* throws" a local invariant rather than a chain-wide claim.
|
|
106
|
+
*/
|
|
107
|
+
export declare function buildHealthSnapshot(deps: BuildHealthSnapshotDeps): MetaHealthSnapshot;
|
|
108
|
+
/**
|
|
109
|
+
* The descriptor the gateway advertises via `tools/list`. No arguments —
|
|
110
|
+
* callers request a snapshot by calling with `{}`. Keeping the surface
|
|
111
|
+
* argument-free makes the tool trivially safe for any autonomy level.
|
|
112
|
+
*/
|
|
113
|
+
export declare function metaHealthToolDescriptor(): {
|
|
114
|
+
name: string;
|
|
115
|
+
description: string;
|
|
116
|
+
inputSchema: Record<string, unknown>;
|
|
117
|
+
};
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gateway-internal `__rea__health` meta-tool.
|
|
3
|
+
*
|
|
4
|
+
* WHY THIS EXISTS
|
|
5
|
+
* ===============
|
|
6
|
+
*
|
|
7
|
+
* The MCP `listTools` catalog the gateway advertises is the UNION of every
|
|
8
|
+
* healthy downstream's own catalog. When all downstreams are unhealthy — or
|
|
9
|
+
* the registry is empty, or fingerprints fail, or an env var is missing — the
|
|
10
|
+
* catalog is empty. From the LLM's perspective this is indistinguishable from
|
|
11
|
+
* a gateway that came up fine but happens to have nothing to proxy, and there
|
|
12
|
+
* is no tool it can call to ask "why is this empty?" because, well, the
|
|
13
|
+
* catalog is empty.
|
|
14
|
+
*
|
|
15
|
+
* This meta-tool closes that diagnostic gap: the gateway ALWAYS exposes
|
|
16
|
+
* `__rea__health` regardless of downstream state, the kill-switch, or the
|
|
17
|
+
* middleware chain. A caller can invoke it to get a snapshot of every
|
|
18
|
+
* registered server's connection state, last error, and tool count.
|
|
19
|
+
*
|
|
20
|
+
* DESIGN CHOICES
|
|
21
|
+
* --------------
|
|
22
|
+
*
|
|
23
|
+
* 1. Name shape: `__rea__health`. The leading `__` (instead of a normal
|
|
24
|
+
* `<server>__<tool>` prefix) reserves the namespace for gateway-internal
|
|
25
|
+
* tools. It never collides with a registered server because
|
|
26
|
+
* `src/registry/loader.ts` restricts `name` to `^[a-z0-9][a-z0-9-]*$` —
|
|
27
|
+
* no underscores allowed.
|
|
28
|
+
*
|
|
29
|
+
* 2. Short-circuit in `server.ts`: the CallTool handler matches on the
|
|
30
|
+
* constant below BEFORE calling `splitPrefixed`, and responds directly
|
|
31
|
+
* without running the middleware chain. Reasons, ordered:
|
|
32
|
+
* (a) This tool must be callable while HALT is present — otherwise the
|
|
33
|
+
* operator can't introspect a frozen gateway.
|
|
34
|
+
* (b) Tier middleware would classify `health` as Write (default for
|
|
35
|
+
* unlisted names) and deny L0 callers — wrong for read-only
|
|
36
|
+
* introspection.
|
|
37
|
+
* (c) There is no downstream to dispatch to — the entire middleware
|
|
38
|
+
* chain is about getting to one safely.
|
|
39
|
+
* The short-circuit still writes an audit record via `appendAuditRecord`
|
|
40
|
+
* so invocations remain accountable.
|
|
41
|
+
*
|
|
42
|
+
* 3. Never throws. Health is the one tool the caller uses when things are
|
|
43
|
+
* broken. Every field is best-effort; a missing value is surfaced as
|
|
44
|
+
* `null`, not as an exception.
|
|
45
|
+
*/
|
|
46
|
+
/** Canonical MCP tool name exposed by the gateway. */
|
|
47
|
+
export const META_HEALTH_TOOL_NAME = '__rea__health';
|
|
48
|
+
/** `server_name` recorded in audit entries for this meta-tool. */
|
|
49
|
+
export const META_SERVER_NAME = '__rea__';
|
|
50
|
+
/** `tool_name` recorded in audit entries for this meta-tool. */
|
|
51
|
+
export const META_TOOL_NAME = 'health';
|
|
52
|
+
/**
|
|
53
|
+
* Pure function that builds the snapshot from injected state. All I/O happens
|
|
54
|
+
* in the caller (`server.ts`) — keeps this testable and keeps "health never
|
|
55
|
+
* throws" a local invariant rather than a chain-wide claim.
|
|
56
|
+
*/
|
|
57
|
+
export function buildHealthSnapshot(deps) {
|
|
58
|
+
const now = deps.nowMs ?? Date.now();
|
|
59
|
+
const uptime_s = Math.max(0, Math.floor((now - deps.startedAtMs) / 1000));
|
|
60
|
+
let connected = 0;
|
|
61
|
+
let healthy = 0;
|
|
62
|
+
let total_tools = 0;
|
|
63
|
+
for (const d of deps.downstreams) {
|
|
64
|
+
if (d.connected)
|
|
65
|
+
connected += 1;
|
|
66
|
+
if (d.healthy)
|
|
67
|
+
healthy += 1;
|
|
68
|
+
if (typeof d.tools_count === 'number')
|
|
69
|
+
total_tools += d.tools_count;
|
|
70
|
+
}
|
|
71
|
+
return {
|
|
72
|
+
gateway: {
|
|
73
|
+
version: deps.gatewayVersion,
|
|
74
|
+
uptime_s,
|
|
75
|
+
halt: deps.halt,
|
|
76
|
+
halt_reason: deps.haltReason,
|
|
77
|
+
},
|
|
78
|
+
policy: {
|
|
79
|
+
profile: deps.policy.profile,
|
|
80
|
+
autonomy_level: String(deps.policy.autonomy_level),
|
|
81
|
+
max_autonomy_level: String(deps.policy.max_autonomy_level),
|
|
82
|
+
block_ai_attribution: deps.policy.block_ai_attribution,
|
|
83
|
+
blocked_paths_count: deps.policy.blocked_paths.length,
|
|
84
|
+
},
|
|
85
|
+
downstreams: deps.downstreams,
|
|
86
|
+
summary: {
|
|
87
|
+
registered: deps.downstreams.length,
|
|
88
|
+
connected,
|
|
89
|
+
healthy,
|
|
90
|
+
total_tools,
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* The descriptor the gateway advertises via `tools/list`. No arguments —
|
|
96
|
+
* callers request a snapshot by calling with `{}`. Keeping the surface
|
|
97
|
+
* argument-free makes the tool trivially safe for any autonomy level.
|
|
98
|
+
*/
|
|
99
|
+
export function metaHealthToolDescriptor() {
|
|
100
|
+
return {
|
|
101
|
+
name: META_HEALTH_TOOL_NAME,
|
|
102
|
+
description: 'rea gateway self-diagnostic. Returns the gateway version, HALT state, policy summary, ' +
|
|
103
|
+
'and per-downstream connection/health/tool-count. Always available, even when every ' +
|
|
104
|
+
'downstream is unhealthy or HALT is active — this is the tool you call when listTools ' +
|
|
105
|
+
'comes back empty or suspicious.',
|
|
106
|
+
inputSchema: { type: 'object', properties: {}, additionalProperties: false },
|
|
107
|
+
};
|
|
108
|
+
}
|
package/dist/gateway/server.js
CHANGED
|
@@ -32,7 +32,12 @@
|
|
|
32
32
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
33
33
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
34
34
|
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
35
|
+
import fs from 'node:fs/promises';
|
|
36
|
+
import path from 'node:path';
|
|
35
37
|
import { DownstreamPool, splitPrefixed } from './downstream-pool.js';
|
|
38
|
+
import { META_HEALTH_TOOL_NAME, META_SERVER_NAME, META_TOOL_NAME, buildHealthSnapshot, metaHealthToolDescriptor, } from './meta/health.js';
|
|
39
|
+
import { appendAuditRecord } from '../audit/append.js';
|
|
40
|
+
import { getPkgVersion } from '../cli/utils.js';
|
|
36
41
|
import { createAuditMiddleware } from './middleware/audit.js';
|
|
37
42
|
import { createKillSwitchMiddleware } from './middleware/kill-switch.js';
|
|
38
43
|
import { createTierMiddleware } from './middleware/tier.js';
|
|
@@ -116,11 +121,13 @@ function buildMiddlewareChain(opts, deps) {
|
|
|
116
121
|
];
|
|
117
122
|
}
|
|
118
123
|
export function createGateway(opts) {
|
|
119
|
-
const { registry } = opts;
|
|
124
|
+
const { registry, policy, baseDir } = opts;
|
|
120
125
|
const logger = opts.logger ?? createLogger({ base: { session_id: currentSessionId() } });
|
|
121
126
|
const metrics = opts.metrics;
|
|
122
127
|
const pool = new DownstreamPool(registry, logger);
|
|
123
|
-
const
|
|
128
|
+
const gatewayVersion = getPkgVersion();
|
|
129
|
+
const startedAtMs = Date.now();
|
|
130
|
+
const server = new Server({ name: 'rea', version: gatewayVersion }, { capabilities: { tools: {} } });
|
|
124
131
|
// Build the circuit breaker with observability hooks wired in — state
|
|
125
132
|
// transitions log a structured record AND update the Prometheus gauge.
|
|
126
133
|
const breaker = new CircuitBreaker({
|
|
@@ -146,22 +153,116 @@ export function createGateway(opts) {
|
|
|
146
153
|
},
|
|
147
154
|
});
|
|
148
155
|
const staticChain = buildMiddlewareChain(opts, { breaker });
|
|
156
|
+
// Read `.rea/HALT` without ever throwing. Returns `{halt, reason}` where
|
|
157
|
+
// `reason` is the (trimmed) file contents or null when the file is absent
|
|
158
|
+
// / unreadable. The meta-tool never surfaces I/O errors — health is the one
|
|
159
|
+
// thing that has to keep working when everything else is broken.
|
|
160
|
+
async function readHalt() {
|
|
161
|
+
try {
|
|
162
|
+
const contents = await fs.readFile(path.join(baseDir, '.rea', 'HALT'), 'utf8');
|
|
163
|
+
const trimmed = contents.trim();
|
|
164
|
+
return { halt: true, reason: trimmed.length > 0 ? trimmed : null };
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
return { halt: false, reason: null };
|
|
168
|
+
}
|
|
169
|
+
}
|
|
149
170
|
// ── Handlers ─────────────────────────────────────────────────────────────
|
|
150
171
|
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
172
|
+
// The `__rea__health` meta-tool is ALWAYS advertised, regardless of
|
|
173
|
+
// downstream state. This is the systemic answer to the "listTools came
|
|
174
|
+
// back empty, now what?" diagnostic gap — the LLM can always call
|
|
175
|
+
// health to find out why.
|
|
176
|
+
const metaTool = metaHealthToolDescriptor();
|
|
151
177
|
if (pool.size === 0)
|
|
152
|
-
return { tools: [] };
|
|
178
|
+
return { tools: [metaTool] };
|
|
153
179
|
const prefixed = await pool.listAllTools();
|
|
154
180
|
return {
|
|
155
|
-
tools:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
181
|
+
tools: [
|
|
182
|
+
metaTool,
|
|
183
|
+
...prefixed.map((t) => ({
|
|
184
|
+
name: t.name,
|
|
185
|
+
description: t.description ?? `${t.server} → ${t.name.slice(t.server.length + 2)}`,
|
|
186
|
+
inputSchema: t.inputSchema ?? { type: 'object' },
|
|
187
|
+
})),
|
|
188
|
+
],
|
|
160
189
|
};
|
|
161
190
|
});
|
|
162
191
|
server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
163
192
|
const prefixed = req.params.name;
|
|
164
193
|
const args = (req.params.arguments ?? {});
|
|
194
|
+
// Short-circuit the `__rea__health` meta-tool BEFORE the middleware chain
|
|
195
|
+
// and BEFORE splitPrefixed. Reasons:
|
|
196
|
+
// - Must be callable while HALT is active (so the operator can
|
|
197
|
+
// introspect a frozen gateway). The kill-switch middleware would
|
|
198
|
+
// otherwise deny.
|
|
199
|
+
// - `deriveBaseTier('health')` defaults to Write, which would deny L0
|
|
200
|
+
// callers. Health is pure introspection — tier doesn't apply.
|
|
201
|
+
// - There's no downstream to dispatch to. The middleware chain exists
|
|
202
|
+
// to reach one safely.
|
|
203
|
+
// We still write an audit record so invocations remain accountable.
|
|
204
|
+
// The `__rea__` prefix is reserved for gateway-internal meta-tools.
|
|
205
|
+
// Reject any unknown name in that namespace with a clear error rather
|
|
206
|
+
// than letting `splitPrefixed` produce the confusing `unknown downstream
|
|
207
|
+
// server ""` message for e.g. `__rea__health ` (trailing space) or a
|
|
208
|
+
// future meta-tool name the client was guessing at.
|
|
209
|
+
if (prefixed.startsWith('__rea__') && prefixed !== META_HEALTH_TOOL_NAME) {
|
|
210
|
+
return {
|
|
211
|
+
isError: true,
|
|
212
|
+
content: [
|
|
213
|
+
{
|
|
214
|
+
type: 'text',
|
|
215
|
+
text: `reserved meta-namespace: only "${META_HEALTH_TOOL_NAME}" is defined under __rea__`,
|
|
216
|
+
},
|
|
217
|
+
],
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
if (prefixed === META_HEALTH_TOOL_NAME) {
|
|
221
|
+
const startMs = Date.now();
|
|
222
|
+
const haltState = await readHalt();
|
|
223
|
+
const snapshot = buildHealthSnapshot({
|
|
224
|
+
gatewayVersion,
|
|
225
|
+
startedAtMs,
|
|
226
|
+
policy,
|
|
227
|
+
downstreams: pool.healthSnapshot(),
|
|
228
|
+
halt: haltState.halt,
|
|
229
|
+
haltReason: haltState.reason,
|
|
230
|
+
});
|
|
231
|
+
// Best-effort audit append. Failures here must never prevent the
|
|
232
|
+
// caller from getting the health response — that would defeat the
|
|
233
|
+
// whole point of a "works when everything else is broken" tool.
|
|
234
|
+
try {
|
|
235
|
+
await appendAuditRecord(baseDir, {
|
|
236
|
+
tool_name: META_TOOL_NAME,
|
|
237
|
+
server_name: META_SERVER_NAME,
|
|
238
|
+
status: InvocationStatus.Allowed,
|
|
239
|
+
tier: Tier.Read,
|
|
240
|
+
autonomy_level: String(policy.autonomy_level),
|
|
241
|
+
session_id: currentSessionId(),
|
|
242
|
+
duration_ms: Date.now() - startMs,
|
|
243
|
+
metadata: {
|
|
244
|
+
halt: snapshot.gateway.halt,
|
|
245
|
+
downstreams_registered: snapshot.summary.registered,
|
|
246
|
+
downstreams_healthy: snapshot.summary.healthy,
|
|
247
|
+
},
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
catch (err) {
|
|
251
|
+
logger.warn({
|
|
252
|
+
event: 'meta.health.audit_failed',
|
|
253
|
+
message: 'failed to append audit record for __rea__health; serving response anyway',
|
|
254
|
+
error: err instanceof Error ? err.message : String(err),
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
return {
|
|
258
|
+
content: [
|
|
259
|
+
{
|
|
260
|
+
type: 'text',
|
|
261
|
+
text: JSON.stringify(snapshot, null, 2),
|
|
262
|
+
},
|
|
263
|
+
],
|
|
264
|
+
};
|
|
265
|
+
}
|
|
165
266
|
// Split prefix for downstream dispatch; the terminal middleware uses the
|
|
166
267
|
// full prefixed name to call the pool (which re-splits internally).
|
|
167
268
|
let serverName;
|
|
@@ -336,7 +437,3 @@ export function createGateway(opts) {
|
|
|
336
437
|
}
|
|
337
438
|
return { server, start, stop, pool, logger, metrics };
|
|
338
439
|
}
|
|
339
|
-
// Prevent TS from complaining about the unused `Tier` import when the file is
|
|
340
|
-
// compiled in isolation; keeping the import pins the semantic dependency edge
|
|
341
|
-
// for future middleware that may want to inspect the tier in terminal.
|
|
342
|
-
void Tier;
|
package/dist/policy/loader.d.ts
CHANGED
|
@@ -32,10 +32,16 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
32
32
|
}>>;
|
|
33
33
|
review: z.ZodOptional<z.ZodObject<{
|
|
34
34
|
codex_required: z.ZodOptional<z.ZodBoolean>;
|
|
35
|
+
cache_max_age_seconds: z.ZodOptional<z.ZodNumber>;
|
|
36
|
+
allow_skip_in_ci: z.ZodOptional<z.ZodBoolean>;
|
|
35
37
|
}, "strict", z.ZodTypeAny, {
|
|
36
38
|
codex_required?: boolean | undefined;
|
|
39
|
+
cache_max_age_seconds?: number | undefined;
|
|
40
|
+
allow_skip_in_ci?: boolean | undefined;
|
|
37
41
|
}, {
|
|
38
42
|
codex_required?: boolean | undefined;
|
|
43
|
+
cache_max_age_seconds?: number | undefined;
|
|
44
|
+
allow_skip_in_ci?: boolean | undefined;
|
|
39
45
|
}>>;
|
|
40
46
|
redact: z.ZodOptional<z.ZodObject<{
|
|
41
47
|
match_timeout_ms: z.ZodOptional<z.ZodNumber>;
|
|
@@ -110,6 +116,8 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
110
116
|
} | undefined;
|
|
111
117
|
review?: {
|
|
112
118
|
codex_required?: boolean | undefined;
|
|
119
|
+
cache_max_age_seconds?: number | undefined;
|
|
120
|
+
allow_skip_in_ci?: boolean | undefined;
|
|
113
121
|
} | undefined;
|
|
114
122
|
redact?: {
|
|
115
123
|
match_timeout_ms?: number | undefined;
|
|
@@ -146,6 +154,8 @@ declare const PolicySchema: z.ZodObject<{
|
|
|
146
154
|
} | undefined;
|
|
147
155
|
review?: {
|
|
148
156
|
codex_required?: boolean | undefined;
|
|
157
|
+
cache_max_age_seconds?: number | undefined;
|
|
158
|
+
allow_skip_in_ci?: boolean | undefined;
|
|
149
159
|
} | undefined;
|
|
150
160
|
redact?: {
|
|
151
161
|
match_timeout_ms?: number | undefined;
|
package/dist/policy/loader.js
CHANGED
|
@@ -24,6 +24,8 @@ const ContextProtectionSchema = z.object({
|
|
|
24
24
|
const ReviewPolicySchema = z
|
|
25
25
|
.object({
|
|
26
26
|
codex_required: z.boolean().optional(),
|
|
27
|
+
cache_max_age_seconds: z.number().int().positive().optional(),
|
|
28
|
+
allow_skip_in_ci: z.boolean().optional(),
|
|
27
29
|
})
|
|
28
30
|
.strict();
|
|
29
31
|
/**
|
package/dist/policy/types.d.ts
CHANGED
|
@@ -31,6 +31,26 @@ export interface ReviewPolicy {
|
|
|
31
31
|
* log. Default when unset is `true` (Codex required).
|
|
32
32
|
*/
|
|
33
33
|
codex_required?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* Review-cache TTL used by `rea cache check` (BUG-009). Entries older
|
|
36
|
+
* than this window are treated as a miss, forcing re-review. Default
|
|
37
|
+
* when unset is 3600 seconds (1 hour) — matches the windows the
|
|
38
|
+
* push-review-gate hook already assumes. Express in seconds, positive
|
|
39
|
+
* integer.
|
|
40
|
+
*/
|
|
41
|
+
cache_max_age_seconds?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Authorization for `REA_SKIP_PUSH_REVIEW` / `REA_SKIP_CODEX_REVIEW` when
|
|
44
|
+
* the `CI` environment variable is set. The skip hatches are ambient and
|
|
45
|
+
* unauthenticated — a leaked env file or a malicious parent process can
|
|
46
|
+
* bypass the gate and record a forged actor (git config is mutable repo
|
|
47
|
+
* config). Refusing these hatches in CI contexts by default removes that
|
|
48
|
+
* bypass surface. Set `true` ONLY on build agents where the operator has
|
|
49
|
+
* an independent reason to trust the environment. Default `false`.
|
|
50
|
+
*
|
|
51
|
+
* Added in 0.5.0 as Codex F2 on the PR1 adversarial review.
|
|
52
|
+
*/
|
|
53
|
+
allow_skip_in_ci?: boolean;
|
|
34
54
|
}
|
|
35
55
|
/**
|
|
36
56
|
* User-supplied redaction pattern entry. Each pattern has a stable `name` used
|