@bookedsolid/rea 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/commit-msg +130 -0
- package/.husky/pre-push +128 -0
- package/README.md +5 -5
- package/agents/codex-adversarial.md +23 -8
- package/commands/codex-review.md +2 -2
- package/dist/audit/append.d.ts +62 -0
- package/dist/audit/append.js +189 -0
- package/dist/audit/codex-event.d.ts +28 -0
- package/dist/audit/codex-event.js +15 -0
- package/dist/cli/doctor.d.ts +60 -1
- package/dist/cli/doctor.js +459 -20
- package/dist/cli/index.js +35 -5
- package/dist/cli/init.d.ts +13 -0
- package/dist/cli/init.js +278 -67
- package/dist/cli/install/canonical.d.ts +43 -0
- package/dist/cli/install/canonical.js +101 -0
- package/dist/cli/install/claude-md.d.ts +48 -0
- package/dist/cli/install/claude-md.js +93 -0
- package/dist/cli/install/commit-msg.d.ts +30 -0
- package/dist/cli/install/commit-msg.js +102 -0
- package/dist/cli/install/copy.d.ts +169 -0
- package/dist/cli/install/copy.js +455 -0
- package/dist/cli/install/fs-safe.d.ts +91 -0
- package/dist/cli/install/fs-safe.js +347 -0
- package/dist/cli/install/manifest-io.d.ts +12 -0
- package/dist/cli/install/manifest-io.js +44 -0
- package/dist/cli/install/manifest-schema.d.ts +83 -0
- package/dist/cli/install/manifest-schema.js +80 -0
- package/dist/cli/install/reagent.d.ts +59 -0
- package/dist/cli/install/reagent.js +160 -0
- package/dist/cli/install/settings-merge.d.ts +91 -0
- package/dist/cli/install/settings-merge.js +239 -0
- package/dist/cli/install/sha.d.ts +9 -0
- package/dist/cli/install/sha.js +21 -0
- package/dist/cli/serve.d.ts +11 -0
- package/dist/cli/serve.js +72 -6
- package/dist/cli/upgrade.d.ts +67 -0
- package/dist/cli/upgrade.js +509 -0
- package/dist/gateway/downstream-pool.d.ts +39 -0
- package/dist/gateway/downstream-pool.js +93 -0
- package/dist/gateway/downstream.d.ts +80 -0
- package/dist/gateway/downstream.js +196 -0
- package/dist/gateway/middleware/audit-types.d.ts +10 -0
- package/dist/gateway/middleware/audit.js +14 -0
- package/dist/gateway/middleware/injection.d.ts +59 -2
- package/dist/gateway/middleware/injection.js +91 -14
- package/dist/gateway/middleware/kill-switch.d.ts +20 -5
- package/dist/gateway/middleware/kill-switch.js +57 -35
- package/dist/gateway/middleware/redact.d.ts +83 -6
- package/dist/gateway/middleware/redact.js +133 -46
- package/dist/gateway/observability/codex-probe.d.ts +110 -0
- package/dist/gateway/observability/codex-probe.js +234 -0
- package/dist/gateway/observability/codex-telemetry.d.ts +93 -0
- package/dist/gateway/observability/codex-telemetry.js +221 -0
- package/dist/gateway/redact-safe/match-timeout.d.ts +83 -0
- package/dist/gateway/redact-safe/match-timeout.js +179 -0
- package/dist/gateway/reviewers/claude-self.d.ts +99 -0
- package/dist/gateway/reviewers/claude-self.js +316 -0
- package/dist/gateway/reviewers/codex.d.ts +64 -0
- package/dist/gateway/reviewers/codex.js +80 -0
- package/dist/gateway/reviewers/select.d.ts +64 -0
- package/dist/gateway/reviewers/select.js +102 -0
- package/dist/gateway/reviewers/types.d.ts +85 -0
- package/dist/gateway/reviewers/types.js +14 -0
- package/dist/gateway/server.d.ts +51 -0
- package/dist/gateway/server.js +258 -0
- package/dist/gateway/session.d.ts +9 -0
- package/dist/gateway/session.js +17 -0
- package/dist/policy/loader.d.ts +59 -0
- package/dist/policy/loader.js +65 -0
- package/dist/policy/profiles.d.ts +80 -0
- package/dist/policy/profiles.js +94 -0
- package/dist/policy/types.d.ts +38 -0
- package/dist/registry/loader.d.ts +98 -0
- package/dist/registry/loader.js +153 -0
- package/dist/registry/types.d.ts +44 -0
- package/dist/registry/types.js +6 -0
- package/dist/scripts/read-policy-field.d.ts +36 -0
- package/dist/scripts/read-policy-field.js +96 -0
- package/hooks/push-review-gate.sh +627 -17
- package/package.json +13 -2
- package/profiles/bst-internal-no-codex.yaml +40 -0
- package/profiles/bst-internal.yaml +23 -0
- package/profiles/client-engagement.yaml +23 -0
- package/profiles/lit-wc.yaml +17 -0
- package/profiles/minimal.yaml +11 -0
- package/profiles/open-source-no-codex.yaml +33 -0
- package/profiles/open-source.yaml +18 -0
- package/scripts/lint-safe-regex.mjs +78 -0
- package/scripts/postinstall.mjs +131 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex availability probe (G11.3).
|
|
3
|
+
*
|
|
4
|
+
* Passive, periodic reachability check for the Codex CLI, used by `rea serve`
|
|
5
|
+
* at startup and by `rea doctor` to surface a one-line status about whether
|
|
6
|
+
* Codex is actually usable right now. This is INTENTIONALLY separate from
|
|
7
|
+
* the reviewer-selection path in `src/gateway/reviewers/select.ts`:
|
|
8
|
+
*
|
|
9
|
+
* - The selector decides which reviewer to run for a specific push (it
|
|
10
|
+
* respects `REA_REVIEWER`, registry pin, policy, etc.).
|
|
11
|
+
* - The probe just reports "is the Codex CLI responding at all?" as a
|
|
12
|
+
* observability signal — never gates a review.
|
|
13
|
+
*
|
|
14
|
+
* Startup must NEVER fail-closed on a probe failure. Codex going away is a
|
|
15
|
+
* degraded state, not a fatal one; the push gate has its own audited escape
|
|
16
|
+
* hatch (`REA_SKIP_CODEX_REVIEW`, G11.1).
|
|
17
|
+
*
|
|
18
|
+
* ## Probe shape
|
|
19
|
+
*
|
|
20
|
+
* 1. `codex --version` — must exit 0 within {@link VERSION_TIMEOUT_MS}.
|
|
21
|
+
* Success → `cli_installed: true` and `version` populated from stdout.
|
|
22
|
+
* 2. Catalog check — see the `tryCatalogProbe` comment below. We try
|
|
23
|
+
* a best-effort authenticated subcommand with a short timeout. If the
|
|
24
|
+
* subcommand is unrecognized by this Codex build, we degrade to "assume
|
|
25
|
+
* authenticated iff cli_installed is true" rather than flagging a false
|
|
26
|
+
* negative.
|
|
27
|
+
*
|
|
28
|
+
* `cli_responsive` is the AND of both. Consumers should treat
|
|
29
|
+
* `cli_responsive: false` as "Codex may be unavailable — plan accordingly",
|
|
30
|
+
* not as authoritative proof that a specific review will fail.
|
|
31
|
+
*
|
|
32
|
+
* ## Concurrency
|
|
33
|
+
*
|
|
34
|
+
* `probe()` is safe to call concurrently. We serialize via a module-local
|
|
35
|
+
* promise; callers queue up behind the in-flight probe instead of kicking off
|
|
36
|
+
* duplicate exec calls. `start()` / `stop()` manage a single `setInterval`
|
|
37
|
+
* with `.unref()` so the probe never pins the event loop.
|
|
38
|
+
*/
|
|
39
|
+
import { execFile } from 'node:child_process';
|
|
40
|
+
import { promisify } from 'node:util';
|
|
41
|
+
const execFileAsync = promisify(execFile);
|
|
42
|
+
/** Upper bound on `codex --version`. A hung CLI must not stall the gateway. */
|
|
43
|
+
const VERSION_TIMEOUT_MS_DEFAULT = 2_000;
|
|
44
|
+
/** Upper bound on the catalog probe. Longer because it may hit the network. */
|
|
45
|
+
const CATALOG_TIMEOUT_MS_DEFAULT = 5_000;
|
|
46
|
+
/** Default polling cadence — 10 minutes. Codex state rarely flaps faster. */
|
|
47
|
+
const DEFAULT_INTERVAL_MS = 10 * 60 * 1_000;
|
|
48
|
+
const defaultExec = (file, args, options) => execFileAsync(file, [...args], options);
|
|
49
|
+
/** Initial sentinel state — cli considered unresponsive until first probe. */
|
|
50
|
+
function unknownState() {
|
|
51
|
+
return {
|
|
52
|
+
cli_installed: false,
|
|
53
|
+
cli_authenticated: false,
|
|
54
|
+
cli_responsive: false,
|
|
55
|
+
last_probe_at: new Date(0).toISOString(),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Shallow equality check across the probe-state shape. We fire listeners
|
|
60
|
+
* only on actual transitions — callers don't want a timer tick to re-log
|
|
61
|
+
* identical state every 10 minutes.
|
|
62
|
+
*/
|
|
63
|
+
function statesEqual(a, b) {
|
|
64
|
+
return (a.cli_installed === b.cli_installed &&
|
|
65
|
+
a.cli_authenticated === b.cli_authenticated &&
|
|
66
|
+
a.cli_responsive === b.cli_responsive &&
|
|
67
|
+
a.last_error === b.last_error &&
|
|
68
|
+
a.version === b.version);
|
|
69
|
+
}
|
|
70
|
+
export class CodexProbe {
|
|
71
|
+
exec;
|
|
72
|
+
versionTimeoutMs;
|
|
73
|
+
catalogTimeoutMs;
|
|
74
|
+
state = unknownState();
|
|
75
|
+
inFlight;
|
|
76
|
+
timer;
|
|
77
|
+
listeners = new Set();
|
|
78
|
+
constructor(opts = {}) {
|
|
79
|
+
this.exec = opts.execFileFn ?? defaultExec;
|
|
80
|
+
this.versionTimeoutMs = opts.timeoutInstallMs ?? VERSION_TIMEOUT_MS_DEFAULT;
|
|
81
|
+
this.catalogTimeoutMs = opts.timeoutCatalogMs ?? CATALOG_TIMEOUT_MS_DEFAULT;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Execute a single probe. Safe to call concurrently — overlapping callers
|
|
85
|
+
* await the single in-flight attempt. Never throws.
|
|
86
|
+
*/
|
|
87
|
+
probe() {
|
|
88
|
+
if (this.inFlight !== undefined)
|
|
89
|
+
return this.inFlight;
|
|
90
|
+
const attempt = this.runProbe().finally(() => {
|
|
91
|
+
this.inFlight = undefined;
|
|
92
|
+
});
|
|
93
|
+
this.inFlight = attempt;
|
|
94
|
+
return attempt;
|
|
95
|
+
}
|
|
96
|
+
/** Start periodic polling. Immediate probe, then every `intervalMs`. */
|
|
97
|
+
start(intervalMs = DEFAULT_INTERVAL_MS) {
|
|
98
|
+
if (this.timer !== undefined)
|
|
99
|
+
return;
|
|
100
|
+
// Fire-and-forget the initial probe; callers can await `probe()`
|
|
101
|
+
// separately if they need the result right now.
|
|
102
|
+
void this.probe();
|
|
103
|
+
this.timer = setInterval(() => void this.probe(), intervalMs);
|
|
104
|
+
// `unref` so the poller doesn't keep the Node event loop alive when the
|
|
105
|
+
// rest of the process is idle/exiting.
|
|
106
|
+
this.timer.unref?.();
|
|
107
|
+
}
|
|
108
|
+
/** Stop periodic polling. Safe to call even if never started. */
|
|
109
|
+
stop() {
|
|
110
|
+
if (this.timer !== undefined) {
|
|
111
|
+
clearInterval(this.timer);
|
|
112
|
+
this.timer = undefined;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/** Snapshot of the most recent probe state. Never throws. */
|
|
116
|
+
getState() {
|
|
117
|
+
return { ...this.state };
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Subscribe to state transitions. Returns an unsubscribe function. The
|
|
121
|
+
* listener fires only when any observable field changes, not on every
|
|
122
|
+
* tick.
|
|
123
|
+
*/
|
|
124
|
+
onStateChange(listener) {
|
|
125
|
+
this.listeners.add(listener);
|
|
126
|
+
return () => {
|
|
127
|
+
this.listeners.delete(listener);
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
/** Core probe logic. Private — use `probe()`. */
|
|
131
|
+
async runProbe() {
|
|
132
|
+
const next = {
|
|
133
|
+
cli_installed: false,
|
|
134
|
+
cli_authenticated: false,
|
|
135
|
+
cli_responsive: false,
|
|
136
|
+
last_probe_at: new Date().toISOString(),
|
|
137
|
+
};
|
|
138
|
+
// 1. `codex --version` — cheap reachability signal.
|
|
139
|
+
try {
|
|
140
|
+
const { stdout } = await this.exec('codex', ['--version'], {
|
|
141
|
+
timeout: this.versionTimeoutMs,
|
|
142
|
+
});
|
|
143
|
+
next.cli_installed = true;
|
|
144
|
+
const parsed = stdout.trim();
|
|
145
|
+
if (parsed.length > 0)
|
|
146
|
+
next.version = parsed;
|
|
147
|
+
}
|
|
148
|
+
catch (err) {
|
|
149
|
+
next.last_error = formatExecError(err, 'codex --version');
|
|
150
|
+
this.commit(next);
|
|
151
|
+
return this.getState();
|
|
152
|
+
}
|
|
153
|
+
// 2. Catalog probe — best-effort authenticated check.
|
|
154
|
+
//
|
|
155
|
+
// `codex catalog --json` is the aspirational subcommand. If this Codex
|
|
156
|
+
// build doesn't recognize it, we refuse to fail the probe solely on
|
|
157
|
+
// that basis — the risk of a false "unauthenticated" flag driving users
|
|
158
|
+
// to re-login for no reason is higher than the benefit of a rigorous
|
|
159
|
+
// catalog check today. When the subcommand truly errors (not
|
|
160
|
+
// "unrecognized"), we surface the error verbatim.
|
|
161
|
+
const catalogResult = await this.tryCatalogProbe();
|
|
162
|
+
if (catalogResult.ok) {
|
|
163
|
+
next.cli_authenticated = true;
|
|
164
|
+
}
|
|
165
|
+
else if (catalogResult.skipped) {
|
|
166
|
+
// Degraded path: CLI installed, catalog subcommand unrecognized → we
|
|
167
|
+
// assume auth-healthy iff version probe succeeded AND nothing else
|
|
168
|
+
// has written `last_error`. Documented assumption in module header.
|
|
169
|
+
next.cli_authenticated = next.last_error === undefined;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
next.last_error = catalogResult.error;
|
|
173
|
+
}
|
|
174
|
+
next.cli_responsive = next.cli_installed && next.cli_authenticated;
|
|
175
|
+
this.commit(next);
|
|
176
|
+
return this.getState();
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Try `codex catalog --json`. Returns:
|
|
180
|
+
* - `{ ok: true }` on exit 0.
|
|
181
|
+
* - `{ ok: false, skipped: true }` when the subcommand is unrecognized
|
|
182
|
+
* (best-effort detection on stderr).
|
|
183
|
+
* - `{ ok: false, skipped: false, error }` on any other failure.
|
|
184
|
+
*/
|
|
185
|
+
async tryCatalogProbe() {
|
|
186
|
+
try {
|
|
187
|
+
await this.exec('codex', ['catalog', '--json'], {
|
|
188
|
+
timeout: this.catalogTimeoutMs,
|
|
189
|
+
});
|
|
190
|
+
return { ok: true };
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
const message = formatExecError(err, 'codex catalog --json');
|
|
194
|
+
// A subcommand that isn't baked into this Codex build typically prints
|
|
195
|
+
// something like "unknown command" or "unrecognized" and exits non-
|
|
196
|
+
// zero. Treat those as degraded-skip rather than a hard failure.
|
|
197
|
+
if (/unknown command|unrecognized|usage:|invalid subcommand/i.test(message)) {
|
|
198
|
+
return { ok: false, skipped: true };
|
|
199
|
+
}
|
|
200
|
+
return { ok: false, skipped: false, error: message };
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/** Persist `next` and fire listeners if anything observable changed. */
|
|
204
|
+
commit(next) {
|
|
205
|
+
const changed = !statesEqual(this.state, next);
|
|
206
|
+
this.state = next;
|
|
207
|
+
if (!changed)
|
|
208
|
+
return;
|
|
209
|
+
// Snapshot listeners in case a handler mutates the set.
|
|
210
|
+
for (const listener of [...this.listeners]) {
|
|
211
|
+
try {
|
|
212
|
+
listener({ ...next });
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
// Listener errors must not break the probe.
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
/** Format a child_process error into a single human-readable line. */
|
|
221
|
+
function formatExecError(err, context) {
|
|
222
|
+
if (err instanceof Error) {
|
|
223
|
+
const maybeCode = err.code;
|
|
224
|
+
const maybeSignal = err.signal;
|
|
225
|
+
// execFile surfaces SIGTERM when `timeout` fires.
|
|
226
|
+
if (maybeSignal === 'SIGTERM' || /ETIMEDOUT|ESRCH/.test(String(maybeCode))) {
|
|
227
|
+
return `${context}: timeout`;
|
|
228
|
+
}
|
|
229
|
+
if (maybeCode === 'ENOENT')
|
|
230
|
+
return `${context}: not installed (ENOENT)`;
|
|
231
|
+
return `${context}: ${err.message}`;
|
|
232
|
+
}
|
|
233
|
+
return `${context}: ${String(err)}`;
|
|
234
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex reviewer telemetry (G11.5).
|
|
3
|
+
*
|
|
4
|
+
* Append-only observational metrics for adversarial-review invocations. Each
|
|
5
|
+
* record captures the invocation type, estimated token counts, duration,
|
|
6
|
+
* exit code, and whether the downstream reviewer appears to have been
|
|
7
|
+
* rate-limited (detected from stderr).
|
|
8
|
+
*
|
|
9
|
+
* ## Non-goals
|
|
10
|
+
*
|
|
11
|
+
* - This is NOT the audit log. The audit log (`.rea/audit.jsonl`) is a
|
|
12
|
+
* hash-chained record of every middleware invocation and is authoritative
|
|
13
|
+
* for compliance. Telemetry is free-form, per-reviewer-call numbers for
|
|
14
|
+
* operators to watch spend and rate-limit pressure.
|
|
15
|
+
* - No input/output payloads are stored. We estimate token counts from
|
|
16
|
+
* character counts on the fly; the raw strings are discarded after the
|
|
17
|
+
* record is constructed. This is non-negotiable — the brief explicitly
|
|
18
|
+
* prohibits storing the diff or the reviewer output. Any future extension
|
|
19
|
+
* that seems to need the text should reach for the audit log or a
|
|
20
|
+
* dedicated, policy-gated payload store instead.
|
|
21
|
+
*
|
|
22
|
+
* ## Write discipline
|
|
23
|
+
*
|
|
24
|
+
* - File: `<reaDir>/.rea/metrics.jsonl`. Created with the parent dir if
|
|
25
|
+
* absent. One JSON object per line, newline-terminated, fsync'd after
|
|
26
|
+
* each append.
|
|
27
|
+
* - Fail-soft: write errors log a single stderr warning but never throw.
|
|
28
|
+
* Telemetry must never interfere with the reviewed operation.
|
|
29
|
+
*
|
|
30
|
+
* ## Read discipline
|
|
31
|
+
*
|
|
32
|
+
* - `summarizeTelemetry` streams the file, bucketed by local-tz day, and
|
|
33
|
+
* returns a fixed-shape summary. Missing file → all-zero summary.
|
|
34
|
+
*
|
|
35
|
+
* ## Token estimation
|
|
36
|
+
*
|
|
37
|
+
* - `chars / 4` — a well-known rule of thumb. Close enough for spend
|
|
38
|
+
* forecasting; not suitable for billing reconciliation. If a future
|
|
39
|
+
* caller needs precise counts, plug in a tokenizer per reviewer and keep
|
|
40
|
+
* the estimation as a fallback.
|
|
41
|
+
*/
|
|
42
|
+
/**
|
|
43
|
+
* Stable identifiers for the contexts in which a reviewer runs. Keep this
|
|
44
|
+
* closed — downstream dashboards will key on these strings.
|
|
45
|
+
*/
|
|
46
|
+
export type TelemetryInvocationType = 'review' | 'adversarial-review' | 'rescue';
|
|
47
|
+
/** One row in `metrics.jsonl`. */
|
|
48
|
+
export interface TelemetryRecord {
|
|
49
|
+
timestamp: string;
|
|
50
|
+
invocation_type: TelemetryInvocationType;
|
|
51
|
+
estimated_input_tokens: number;
|
|
52
|
+
estimated_output_tokens: number;
|
|
53
|
+
duration_ms: number;
|
|
54
|
+
exit_code: number;
|
|
55
|
+
rate_limited: boolean;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Call site input. `input_text` / `output_text` are used ONLY for token
|
|
59
|
+
* estimation and are NOT persisted. See the file header.
|
|
60
|
+
*/
|
|
61
|
+
export interface RecordTelemetryInput {
|
|
62
|
+
invocation_type: TelemetryInvocationType;
|
|
63
|
+
input_text: string;
|
|
64
|
+
output_text: string;
|
|
65
|
+
duration_ms: number;
|
|
66
|
+
exit_code: number;
|
|
67
|
+
stderr?: string;
|
|
68
|
+
}
|
|
69
|
+
/** Shape returned by {@link summarizeTelemetry}. */
|
|
70
|
+
export interface TelemetrySummary {
|
|
71
|
+
/** Number of days the summary covers. */
|
|
72
|
+
window_days: number;
|
|
73
|
+
/** Count per day, most-recent first. Always length === window_days. */
|
|
74
|
+
invocations_per_day: number[];
|
|
75
|
+
/** Sum of input + output estimates across the window. */
|
|
76
|
+
total_estimated_tokens: number;
|
|
77
|
+
/** How many records in the window flagged `rate_limited: true`. */
|
|
78
|
+
rate_limited_count: number;
|
|
79
|
+
/** Arithmetic mean of duration_ms across all records in the window. */
|
|
80
|
+
avg_latency_ms: number;
|
|
81
|
+
}
|
|
82
|
+
/** Canonical location for the metrics file under `baseDir`. */
|
|
83
|
+
export declare function metricsFilePath(baseDir: string): string;
|
|
84
|
+
/**
|
|
85
|
+
* Append a single telemetry row. Always fail-soft — the caller must be
|
|
86
|
+
* able to treat this as a best-effort observation and continue.
|
|
87
|
+
*/
|
|
88
|
+
export declare function recordTelemetry(baseDir: string, input: RecordTelemetryInput): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Build the fixed-shape summary. When `metrics.jsonl` is missing the result
|
|
91
|
+
* is all-zero — callers should NEVER see an exception for "no data yet".
|
|
92
|
+
*/
|
|
93
|
+
export declare function summarizeTelemetry(baseDir: string, windowDays?: number): Promise<TelemetrySummary>;
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex reviewer telemetry (G11.5).
|
|
3
|
+
*
|
|
4
|
+
* Append-only observational metrics for adversarial-review invocations. Each
|
|
5
|
+
* record captures the invocation type, estimated token counts, duration,
|
|
6
|
+
* exit code, and whether the downstream reviewer appears to have been
|
|
7
|
+
* rate-limited (detected from stderr).
|
|
8
|
+
*
|
|
9
|
+
* ## Non-goals
|
|
10
|
+
*
|
|
11
|
+
* - This is NOT the audit log. The audit log (`.rea/audit.jsonl`) is a
|
|
12
|
+
* hash-chained record of every middleware invocation and is authoritative
|
|
13
|
+
* for compliance. Telemetry is free-form, per-reviewer-call numbers for
|
|
14
|
+
* operators to watch spend and rate-limit pressure.
|
|
15
|
+
* - No input/output payloads are stored. We estimate token counts from
|
|
16
|
+
* character counts on the fly; the raw strings are discarded after the
|
|
17
|
+
* record is constructed. This is non-negotiable — the brief explicitly
|
|
18
|
+
* prohibits storing the diff or the reviewer output. Any future extension
|
|
19
|
+
* that seems to need the text should reach for the audit log or a
|
|
20
|
+
* dedicated, policy-gated payload store instead.
|
|
21
|
+
*
|
|
22
|
+
* ## Write discipline
|
|
23
|
+
*
|
|
24
|
+
* - File: `<reaDir>/.rea/metrics.jsonl`. Created with the parent dir if
|
|
25
|
+
* absent. One JSON object per line, newline-terminated, fsync'd after
|
|
26
|
+
* each append.
|
|
27
|
+
* - Fail-soft: write errors log a single stderr warning but never throw.
|
|
28
|
+
* Telemetry must never interfere with the reviewed operation.
|
|
29
|
+
*
|
|
30
|
+
* ## Read discipline
|
|
31
|
+
*
|
|
32
|
+
* - `summarizeTelemetry` streams the file, bucketed by local-tz day, and
|
|
33
|
+
* returns a fixed-shape summary. Missing file → all-zero summary.
|
|
34
|
+
*
|
|
35
|
+
* ## Token estimation
|
|
36
|
+
*
|
|
37
|
+
* - `chars / 4` — a well-known rule of thumb. Close enough for spend
|
|
38
|
+
* forecasting; not suitable for billing reconciliation. If a future
|
|
39
|
+
* caller needs precise counts, plug in a tokenizer per reviewer and keep
|
|
40
|
+
* the estimation as a fallback.
|
|
41
|
+
*/
|
|
42
|
+
import fs from 'node:fs/promises';
|
|
43
|
+
import path from 'node:path';
|
|
44
|
+
const REA_DIR = '.rea';
|
|
45
|
+
const METRICS_FILE = 'metrics.jsonl';
|
|
46
|
+
/** Shared denominator for the chars/tokens heuristic. */
|
|
47
|
+
const CHARS_PER_TOKEN = 4;
|
|
48
|
+
/**
|
|
49
|
+
* Regex for rate-limit markers. Matches the common phrasings we've seen
|
|
50
|
+
* across Codex, OpenAI API, and Anthropic API error tails. Case-insensitive
|
|
51
|
+
* so "Rate Limit" and "429" both match.
|
|
52
|
+
*
|
|
53
|
+
* Keep this permissive — a false positive is cheap (we flag an invocation
|
|
54
|
+
* as throttled that wasn't), a false negative silently under-reports the
|
|
55
|
+
* problem the operator is trying to measure.
|
|
56
|
+
*/
|
|
57
|
+
const RATE_LIMIT_REGEX = /rate[- ]limit|\b429\b|usage limit|exceeded quota/i;
|
|
58
|
+
/**
|
|
59
|
+
* Token count estimate. Floors to zero for empty strings so downstream
|
|
60
|
+
* math doesn't have to guard.
|
|
61
|
+
*/
|
|
62
|
+
function estimateTokens(text) {
|
|
63
|
+
if (text.length === 0)
|
|
64
|
+
return 0;
|
|
65
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
66
|
+
}
|
|
67
|
+
/** Detect whether the reviewer's stderr looks rate-limited. */
|
|
68
|
+
function detectRateLimited(stderr) {
|
|
69
|
+
if (stderr === undefined || stderr.length === 0)
|
|
70
|
+
return false;
|
|
71
|
+
return RATE_LIMIT_REGEX.test(stderr);
|
|
72
|
+
}
|
|
73
|
+
/** Canonical location for the metrics file under `baseDir`. */
|
|
74
|
+
export function metricsFilePath(baseDir) {
|
|
75
|
+
return path.join(baseDir, REA_DIR, METRICS_FILE);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Append a single telemetry row. Always fail-soft — the caller must be
|
|
79
|
+
* able to treat this as a best-effort observation and continue.
|
|
80
|
+
*/
|
|
81
|
+
export async function recordTelemetry(baseDir, input) {
|
|
82
|
+
const record = {
|
|
83
|
+
timestamp: new Date().toISOString(),
|
|
84
|
+
invocation_type: input.invocation_type,
|
|
85
|
+
estimated_input_tokens: estimateTokens(input.input_text),
|
|
86
|
+
estimated_output_tokens: estimateTokens(input.output_text),
|
|
87
|
+
duration_ms: Math.max(0, input.duration_ms | 0),
|
|
88
|
+
exit_code: input.exit_code | 0,
|
|
89
|
+
rate_limited: detectRateLimited(input.stderr),
|
|
90
|
+
};
|
|
91
|
+
const filePath = metricsFilePath(baseDir);
|
|
92
|
+
const dir = path.dirname(filePath);
|
|
93
|
+
const line = JSON.stringify(record) + '\n';
|
|
94
|
+
try {
|
|
95
|
+
await fs.mkdir(dir, { recursive: true });
|
|
96
|
+
await fs.appendFile(filePath, line);
|
|
97
|
+
// Best-effort fsync; failure is non-fatal.
|
|
98
|
+
let fh;
|
|
99
|
+
try {
|
|
100
|
+
fh = await fs.open(filePath, 'r');
|
|
101
|
+
await fh.sync();
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
/* ignored */
|
|
105
|
+
}
|
|
106
|
+
finally {
|
|
107
|
+
if (fh)
|
|
108
|
+
await fh.close();
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
113
|
+
// One line, to stderr, never throw. Consumers tailing logs will see it.
|
|
114
|
+
console.warn(`[rea] WARN: codex telemetry write failed: ${message}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/** Group records by local-tz YYYY-MM-DD. */
|
|
118
|
+
function dayKey(iso) {
|
|
119
|
+
const d = new Date(iso);
|
|
120
|
+
if (Number.isNaN(d.getTime()))
|
|
121
|
+
return 'invalid';
|
|
122
|
+
// Local-tz date — operators want "today" to mean their local today.
|
|
123
|
+
const y = d.getFullYear();
|
|
124
|
+
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
125
|
+
const day = String(d.getDate()).padStart(2, '0');
|
|
126
|
+
return `${y}-${m}-${day}`;
|
|
127
|
+
}
|
|
128
|
+
/** Local-tz day key for a JS Date. */
|
|
129
|
+
function dayKeyForDate(d) {
|
|
130
|
+
const y = d.getFullYear();
|
|
131
|
+
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
132
|
+
const day = String(d.getDate()).padStart(2, '0');
|
|
133
|
+
return `${y}-${m}-${day}`;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Load all records from `metrics.jsonl`. Returns `[]` when the file is
|
|
137
|
+
* missing; skips (not throws) individual unparseable lines so a single
|
|
138
|
+
* corrupt row doesn't hide the rest of the window.
|
|
139
|
+
*/
|
|
140
|
+
async function readRecords(filePath) {
|
|
141
|
+
let raw;
|
|
142
|
+
try {
|
|
143
|
+
raw = await fs.readFile(filePath, 'utf8');
|
|
144
|
+
}
|
|
145
|
+
catch (err) {
|
|
146
|
+
if (err.code === 'ENOENT')
|
|
147
|
+
return [];
|
|
148
|
+
throw err;
|
|
149
|
+
}
|
|
150
|
+
const out = [];
|
|
151
|
+
for (const line of raw.split('\n')) {
|
|
152
|
+
if (line.length === 0)
|
|
153
|
+
continue;
|
|
154
|
+
try {
|
|
155
|
+
const parsed = JSON.parse(line);
|
|
156
|
+
if (typeof parsed.timestamp === 'string' &&
|
|
157
|
+
typeof parsed.duration_ms === 'number' &&
|
|
158
|
+
typeof parsed.exit_code === 'number' &&
|
|
159
|
+
typeof parsed.rate_limited === 'boolean') {
|
|
160
|
+
out.push(parsed);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// Malformed line — skip. A future integrity check can flag this.
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return out;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Build the fixed-shape summary. When `metrics.jsonl` is missing the result
|
|
171
|
+
* is all-zero — callers should NEVER see an exception for "no data yet".
|
|
172
|
+
*/
|
|
173
|
+
export async function summarizeTelemetry(baseDir, windowDays = 7) {
|
|
174
|
+
const days = Math.max(1, windowDays | 0);
|
|
175
|
+
const filePath = metricsFilePath(baseDir);
|
|
176
|
+
let records;
|
|
177
|
+
try {
|
|
178
|
+
records = await readRecords(filePath);
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// Read error (permissions, etc.) — treat as empty. Telemetry must never
|
|
182
|
+
// break a consumer that just wants to see "is this up?".
|
|
183
|
+
records = [];
|
|
184
|
+
}
|
|
185
|
+
// Build day buckets most-recent-first.
|
|
186
|
+
const now = new Date();
|
|
187
|
+
const bucketKeys = [];
|
|
188
|
+
for (let i = 0; i < days; i += 1) {
|
|
189
|
+
const d = new Date(now.getTime());
|
|
190
|
+
d.setDate(d.getDate() - i);
|
|
191
|
+
bucketKeys.push(dayKeyForDate(d));
|
|
192
|
+
}
|
|
193
|
+
const countsByKey = new Map();
|
|
194
|
+
for (const k of bucketKeys)
|
|
195
|
+
countsByKey.set(k, 0);
|
|
196
|
+
let totalTokens = 0;
|
|
197
|
+
let rateLimitedCount = 0;
|
|
198
|
+
let durationSum = 0;
|
|
199
|
+
let inWindow = 0;
|
|
200
|
+
for (const r of records) {
|
|
201
|
+
const key = dayKey(r.timestamp);
|
|
202
|
+
if (!countsByKey.has(key))
|
|
203
|
+
continue; // outside window
|
|
204
|
+
countsByKey.set(key, (countsByKey.get(key) ?? 0) + 1);
|
|
205
|
+
totalTokens +=
|
|
206
|
+
(r.estimated_input_tokens ?? 0) + (r.estimated_output_tokens ?? 0);
|
|
207
|
+
if (r.rate_limited)
|
|
208
|
+
rateLimitedCount += 1;
|
|
209
|
+
durationSum += r.duration_ms ?? 0;
|
|
210
|
+
inWindow += 1;
|
|
211
|
+
}
|
|
212
|
+
const invocations_per_day = bucketKeys.map((k) => countsByKey.get(k) ?? 0);
|
|
213
|
+
const avg_latency_ms = inWindow === 0 ? 0 : durationSum / inWindow;
|
|
214
|
+
return {
|
|
215
|
+
window_days: days,
|
|
216
|
+
invocations_per_day,
|
|
217
|
+
total_estimated_tokens: totalTokens,
|
|
218
|
+
rate_limited_count: rateLimitedCount,
|
|
219
|
+
avg_latency_ms,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* G3 — ReDoS safety wrapper.
|
|
3
|
+
*
|
|
4
|
+
* Every regex that the middleware chain runs on untrusted MCP payloads must be
|
|
5
|
+
* bounded. This module provides a `SafeRegex` wrapper that enforces a per-call
|
|
6
|
+
* timeout by executing the match inside a worker thread, then `Promise.race`-ing
|
|
7
|
+
* against a `setTimeout`. On timeout the worker is terminated, the caller is told
|
|
8
|
+
* the operation timed out, and an optional callback fires so the audit layer can
|
|
9
|
+
* record the event.
|
|
10
|
+
*
|
|
11
|
+
* Implementation decision — Option A (worker thread per exec):
|
|
12
|
+
* - No native dependency (compare: `re2` would add a native build step and a
|
|
13
|
+
* second regex dialect to keep consistent with JS defaults).
|
|
14
|
+
* - Timeout is authoritative: terminating the worker is a hard kill, unlike
|
|
15
|
+
* interpreter-level heuristics.
|
|
16
|
+
* - Overhead is ~1ms per call. That is acceptable for gateway payloads; if it
|
|
17
|
+
* ever becomes measurable we can pool workers in a later release without
|
|
18
|
+
* changing the public `SafeRegex` surface.
|
|
19
|
+
* - Rejected Option B (re2): native dep + a different regex dialect than the
|
|
20
|
+
* rest of the codebase assumes. Rejected Option C (length cap only): caps
|
|
21
|
+
* bound worst-case cost but don't eliminate catastrophic backtracking.
|
|
22
|
+
*
|
|
23
|
+
* SECURITY: The `onTimeout` callback is invoked with the pattern and input. The
|
|
24
|
+
* callback contract (enforced by callers, not here) is that the input is used
|
|
25
|
+
* only for size accounting — the middleware must NEVER log the input text, only
|
|
26
|
+
* its byte length and pattern id. See `redact.ts` / `injection.ts` for the
|
|
27
|
+
* audit-event shape.
|
|
28
|
+
*/
|
|
29
|
+
export interface MatchTimeoutOptions {
|
|
30
|
+
/** Per-call timeout budget in milliseconds. Default 100ms. */
|
|
31
|
+
timeoutMs?: number;
|
|
32
|
+
/**
|
|
33
|
+
* Invoked exactly once when a match exceeds the timeout. Callers record an
|
|
34
|
+
* audit event here. The input text MUST NOT be logged — only size.
|
|
35
|
+
*/
|
|
36
|
+
onTimeout?: (pattern: RegExp, input: string) => void;
|
|
37
|
+
}
|
|
38
|
+
export interface SafeRegexTestResult {
|
|
39
|
+
matched: boolean;
|
|
40
|
+
timedOut: boolean;
|
|
41
|
+
}
|
|
42
|
+
export interface SafeRegexReplaceResult {
|
|
43
|
+
output: string;
|
|
44
|
+
timedOut: boolean;
|
|
45
|
+
}
|
|
46
|
+
export interface SafeRegexMatchAllResult {
|
|
47
|
+
matches: string[];
|
|
48
|
+
timedOut: boolean;
|
|
49
|
+
}
|
|
50
|
+
export interface SafeRegex {
|
|
51
|
+
readonly pattern: RegExp;
|
|
52
|
+
/**
|
|
53
|
+
* Test whether the pattern matches `input`. On timeout returns
|
|
54
|
+
* `{ matched: false, timedOut: true }` and invokes `onTimeout`.
|
|
55
|
+
*/
|
|
56
|
+
test(input: string): SafeRegexTestResult;
|
|
57
|
+
/**
|
|
58
|
+
* Replace pattern matches in `input` with `replacer`. On timeout returns
|
|
59
|
+
* `{ output: input, timedOut: true }` (unchanged input) and invokes
|
|
60
|
+
* `onTimeout`. The input is NEVER passed through unredacted when a timeout
|
|
61
|
+
* fires at a higher layer — the middleware substitutes a sentinel. See
|
|
62
|
+
* `redact.ts` for the sentinel contract.
|
|
63
|
+
*/
|
|
64
|
+
replace(input: string, replacer: string): SafeRegexReplaceResult;
|
|
65
|
+
/**
|
|
66
|
+
* Return all full-string matches of the pattern in `input`. The pattern is
|
|
67
|
+
* compiled inside the worker with the global flag forced on so matchAll is
|
|
68
|
+
* meaningful regardless of how the original pattern was specified. On
|
|
69
|
+
* timeout returns `{ matches: [], timedOut: true }` and invokes `onTimeout`.
|
|
70
|
+
*/
|
|
71
|
+
matchAll(input: string): SafeRegexMatchAllResult;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Wrap a RegExp in a timeout-enforced `SafeRegex`. Compilation happens both in
|
|
75
|
+
* the parent (to catch syntax errors early) and inside the worker (so a
|
|
76
|
+
* catastrophic compile or match spends only worker CPU).
|
|
77
|
+
*
|
|
78
|
+
* SECURITY: callers should pass regexes that have ALSO been cleared by
|
|
79
|
+
* `safe-regex` at load time — the timeout is a defense-in-depth backstop, not
|
|
80
|
+
* a replacement for static analysis. See `scripts/lint-safe-regex.mjs` and the
|
|
81
|
+
* load-time check in `src/policy/loader.ts`.
|
|
82
|
+
*/
|
|
83
|
+
export declare function wrapRegex(pattern: RegExp, opts?: MatchTimeoutOptions): SafeRegex;
|