@bookedsolid/rea 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,7 +35,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprot
35
35
  import fs from 'node:fs/promises';
36
36
  import path from 'node:path';
37
37
  import { DownstreamPool, splitPrefixed } from './downstream-pool.js';
38
- import { META_HEALTH_TOOL_NAME, META_SERVER_NAME, META_TOOL_NAME, buildHealthSnapshot, metaHealthToolDescriptor, } from './meta/health.js';
38
+ import { boundedDiagnosticString, META_HEALTH_TOOL_NAME, META_SERVER_NAME, META_TOOL_NAME, buildHealthSnapshot, metaHealthToolDescriptor, sanitizeHealthSnapshot, } from './meta/health.js';
39
39
  import { appendAuditRecord } from '../audit/append.js';
40
40
  import { getPkgVersion } from '../cli/utils.js';
41
41
  import { createAuditMiddleware } from './middleware/audit.js';
@@ -127,6 +127,11 @@ export function createGateway(opts) {
127
127
  const pool = new DownstreamPool(registry, logger);
128
128
  const gatewayVersion = getPkgVersion();
129
129
  const startedAtMs = Date.now();
130
+ // BUG-011 (0.6.2) — process-lifetime counter of failed audit appends from
131
+ // the `__rea__health` short-circuit. Exposed on the health snapshot as
132
+ // `summary.audit_fail_count` so operators can detect the silent-audit-gap
133
+ // condition without parsing stderr.
134
+ let healthAuditFailCount = 0;
130
135
  const server = new Server({ name: 'rea', version: gatewayVersion }, { capabilities: { tools: {} } });
131
136
  // Build the circuit breaker with observability hooks wired in — state
132
137
  // transitions log a structured record AND update the Prometheus gauge.
@@ -161,7 +166,13 @@ export function createGateway(opts) {
161
166
  try {
162
167
  const contents = await fs.readFile(path.join(baseDir, '.rea', 'HALT'), 'utf8');
163
168
  const trimmed = contents.trim();
164
- return { halt: true, reason: trimmed.length > 0 ? trimmed : null };
169
+ // Hard-cap the raw read at the diagnostic string budget before it
170
+ // enters the snapshot. An oversize HALT file (operator accident or
171
+ // local attacker) must not cause an O(size) allocation on every
172
+ // `__rea__health` call. `sanitizeHealthSnapshot` also truncates,
173
+ // but capping at ingestion keeps the snapshot itself bounded.
174
+ const bounded = boundedDiagnosticString(trimmed);
175
+ return { halt: true, reason: bounded.length > 0 ? bounded : null };
165
176
  }
166
177
  catch {
167
178
  return { halt: false, reason: null };
@@ -220,14 +231,23 @@ export function createGateway(opts) {
220
231
  if (prefixed === META_HEALTH_TOOL_NAME) {
221
232
  const startMs = Date.now();
222
233
  const haltState = await readHalt();
223
- const snapshot = buildHealthSnapshot({
234
+ // Internal snapshot carries the raw diagnostic strings — used by the
235
+ // audit record below so operators have the full text in the log even
236
+ // when the MCP response has them stripped/redacted.
237
+ const internalSnapshot = buildHealthSnapshot({
224
238
  gatewayVersion,
225
239
  startedAtMs,
226
240
  policy,
227
241
  downstreams: pool.healthSnapshot(),
228
242
  halt: haltState.halt,
229
243
  haltReason: haltState.reason,
244
+ auditFailCount: healthAuditFailCount,
230
245
  });
246
+ // BUG-011 (0.6.2) — sanitize BEFORE serializing to the wire. Strips
247
+ // `halt_reason` + per-downstream `last_error` by default; when
248
+ // `gateway.health.expose_diagnostics: true` applies redactSecrets +
249
+ // injection-scan and replaces any non-clean string with the sentinel.
250
+ const wireSnapshot = sanitizeHealthSnapshot(internalSnapshot, policy);
231
251
  // Best-effort audit append. Failures here must never prevent the
232
252
  // caller from getting the health response — that would defeat the
233
253
  // whole point of a "works when everything else is broken" tool.
@@ -241,24 +261,45 @@ export function createGateway(opts) {
241
261
  session_id: currentSessionId(),
242
262
  duration_ms: Date.now() - startMs,
243
263
  metadata: {
244
- halt: snapshot.gateway.halt,
245
- downstreams_registered: snapshot.summary.registered,
246
- downstreams_healthy: snapshot.summary.healthy,
264
+ halt: internalSnapshot.gateway.halt,
265
+ // BUG-011 (0.6.2) — N-3: the audit log is the authoritative
266
+ // trusted-operator sink for full diagnostic text. Strings are
267
+ // already bounded at ingestion (halt-file read + downstream
268
+ // lastError getter) via `boundedDiagnosticString`, and the
269
+ // audit file is on local disk with hash-chained append-only
270
+ // semantics — not LLM-reachable. Log the pre-sanitize strings
271
+ // here so the `rea doctor` / audit-tail path preserves the
272
+ // text the MCP wire strips under the default policy.
273
+ halt_reason: internalSnapshot.gateway.halt_reason,
274
+ downstreams_registered: internalSnapshot.summary.registered,
275
+ downstreams_healthy: internalSnapshot.summary.healthy,
276
+ downstream_errors: internalSnapshot.downstreams
277
+ .filter((d) => d.last_error !== null)
278
+ .map((d) => ({ name: d.name, last_error: d.last_error })),
247
279
  },
248
280
  });
249
281
  }
250
282
  catch (err) {
251
- logger.warn({
283
+ // BUG-011 (0.6.2) — elevated from `warn` to `error`. A dropped
284
+ // meta.health audit entry is an observability gap: the response
285
+ // still goes out but the record of it is missing, which defeats
286
+ // the forensic value of the hash chain for that call. Also bump a
287
+ // process-lifetime counter surfaced on the next snapshot's
288
+ // `summary.audit_fail_count` so operators can detect the condition
289
+ // without parsing stderr.
290
+ healthAuditFailCount += 1;
291
+ logger.error({
252
292
  event: 'meta.health.audit_failed',
253
293
  message: 'failed to append audit record for __rea__health; serving response anyway',
254
294
  error: err instanceof Error ? err.message : String(err),
295
+ audit_fail_count: healthAuditFailCount,
255
296
  });
256
297
  }
257
298
  return {
258
299
  content: [
259
300
  {
260
301
  type: 'text',
261
- text: JSON.stringify(snapshot, null, 2),
302
+ text: JSON.stringify(wireSnapshot, null, 2),
262
303
  },
263
304
  ],
264
305
  };
@@ -95,6 +95,23 @@ declare const PolicySchema: z.ZodObject<{
95
95
  max_age_days?: number | undefined;
96
96
  } | undefined;
97
97
  }>>;
98
+ gateway: z.ZodOptional<z.ZodObject<{
99
+ health: z.ZodOptional<z.ZodObject<{
100
+ expose_diagnostics: z.ZodOptional<z.ZodBoolean>;
101
+ }, "strict", z.ZodTypeAny, {
102
+ expose_diagnostics?: boolean | undefined;
103
+ }, {
104
+ expose_diagnostics?: boolean | undefined;
105
+ }>>;
106
+ }, "strict", z.ZodTypeAny, {
107
+ health?: {
108
+ expose_diagnostics?: boolean | undefined;
109
+ } | undefined;
110
+ }, {
111
+ health?: {
112
+ expose_diagnostics?: boolean | undefined;
113
+ } | undefined;
114
+ }>>;
98
115
  }, "strict", z.ZodTypeAny, {
99
116
  version: string;
100
117
  profile: string;
@@ -133,6 +150,11 @@ declare const PolicySchema: z.ZodObject<{
133
150
  max_age_days?: number | undefined;
134
151
  } | undefined;
135
152
  } | undefined;
153
+ gateway?: {
154
+ health?: {
155
+ expose_diagnostics?: boolean | undefined;
156
+ } | undefined;
157
+ } | undefined;
136
158
  }, {
137
159
  version: string;
138
160
  profile: string;
@@ -171,6 +193,11 @@ declare const PolicySchema: z.ZodObject<{
171
193
  max_age_days?: number | undefined;
172
194
  } | undefined;
173
195
  } | undefined;
196
+ gateway?: {
197
+ health?: {
198
+ expose_diagnostics?: boolean | undefined;
199
+ } | undefined;
200
+ } | undefined;
174
201
  }>;
175
202
  /**
176
203
  * Async policy loader with TTL cache and mtime-based invalidation.
@@ -93,6 +93,20 @@ const InjectionPolicySchema = z
93
93
  suspicious_blocks_writes: z.boolean().optional(),
94
94
  })
95
95
  .strict();
96
+ /**
97
+ * BUG-011 (0.6.2) — gateway-level policy. Currently only the `health`
98
+ * sub-block is defined; kept strict so typos (`gateway.heath`) fail loudly.
99
+ */
100
+ const GatewayHealthPolicySchema = z
101
+ .object({
102
+ expose_diagnostics: z.boolean().optional(),
103
+ })
104
+ .strict();
105
+ const GatewayPolicySchema = z
106
+ .object({
107
+ health: GatewayHealthPolicySchema.optional(),
108
+ })
109
+ .strict();
96
110
  const PolicySchema = z
97
111
  .object({
98
112
  version: z.string(),
@@ -111,6 +125,7 @@ const PolicySchema = z
111
125
  review: ReviewPolicySchema.optional(),
112
126
  redact: RedactPolicySchema.optional(),
113
127
  audit: AuditPolicySchema.optional(),
128
+ gateway: GatewayPolicySchema.optional(),
114
129
  })
115
130
  .strict();
116
131
  const DEFAULT_CACHE_TTL_MS = 30_000;
@@ -124,6 +124,33 @@ export interface AuditPolicy {
124
124
  export interface InjectionPolicy {
125
125
  suspicious_blocks_writes?: boolean;
126
126
  }
127
+ /**
128
+ * BUG-011 (0.6.2) — gateway-level policy knobs.
129
+ *
130
+ * `health.expose_diagnostics` governs whether `__rea__health` emits
131
+ * `halt_reason` and per-downstream `last_error` strings in its MCP response
132
+ * (vs. dropping them to `null`). The short-circuit responds BEFORE the
133
+ * middleware chain — so it bypasses `redact` and `injection` middleware by
134
+ * design (the tool must stay callable under HALT). That means downstream
135
+ * error strings, which are populated verbatim from `err.message`, can carry
136
+ * secrets or injection payloads all the way to the caller unless we
137
+ * sanitize in the short-circuit path itself.
138
+ *
139
+ * Default `false` (fields emitted as `null`). The Helix team's explicit
140
+ * preference was "strip, don't redact" — a smaller trust ask than trusting
141
+ * our secret/injection pattern coverage. Operators who accept that trade-off
142
+ * (e.g. single-tenant dev boxes) can flip `expose_diagnostics: true`, at
143
+ * which point the short-circuit applies the same `redactSecrets` +
144
+ * `classifyInjection` pass the middleware chain would. The full untouched
145
+ * values always flow into the audit log regardless — diagnostics remain
146
+ * available via `rea doctor`, just not over the MCP wire.
147
+ */
148
+ export interface GatewayHealthPolicy {
149
+ expose_diagnostics?: boolean;
150
+ }
151
+ export interface GatewayPolicy {
152
+ health?: GatewayHealthPolicy;
153
+ }
127
154
  export interface Policy {
128
155
  version: string;
129
156
  profile: string;
@@ -141,4 +168,5 @@ export interface Policy {
141
168
  review?: ReviewPolicy;
142
169
  redact?: RedactPolicy;
143
170
  audit?: AuditPolicy;
171
+ gateway?: GatewayPolicy;
144
172
  }