@bookedsolid/rea 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/.husky/commit-msg +130 -0
  2. package/.husky/pre-push +128 -0
  3. package/README.md +5 -5
  4. package/agents/codex-adversarial.md +23 -8
  5. package/commands/codex-review.md +2 -2
  6. package/dist/audit/append.d.ts +62 -0
  7. package/dist/audit/append.js +189 -0
  8. package/dist/audit/codex-event.d.ts +28 -0
  9. package/dist/audit/codex-event.js +15 -0
  10. package/dist/cli/doctor.d.ts +60 -1
  11. package/dist/cli/doctor.js +459 -20
  12. package/dist/cli/index.js +35 -5
  13. package/dist/cli/init.d.ts +13 -0
  14. package/dist/cli/init.js +278 -67
  15. package/dist/cli/install/canonical.d.ts +43 -0
  16. package/dist/cli/install/canonical.js +101 -0
  17. package/dist/cli/install/claude-md.d.ts +48 -0
  18. package/dist/cli/install/claude-md.js +93 -0
  19. package/dist/cli/install/commit-msg.d.ts +30 -0
  20. package/dist/cli/install/commit-msg.js +102 -0
  21. package/dist/cli/install/copy.d.ts +169 -0
  22. package/dist/cli/install/copy.js +455 -0
  23. package/dist/cli/install/fs-safe.d.ts +91 -0
  24. package/dist/cli/install/fs-safe.js +347 -0
  25. package/dist/cli/install/manifest-io.d.ts +12 -0
  26. package/dist/cli/install/manifest-io.js +44 -0
  27. package/dist/cli/install/manifest-schema.d.ts +83 -0
  28. package/dist/cli/install/manifest-schema.js +80 -0
  29. package/dist/cli/install/reagent.d.ts +59 -0
  30. package/dist/cli/install/reagent.js +160 -0
  31. package/dist/cli/install/settings-merge.d.ts +91 -0
  32. package/dist/cli/install/settings-merge.js +239 -0
  33. package/dist/cli/install/sha.d.ts +9 -0
  34. package/dist/cli/install/sha.js +21 -0
  35. package/dist/cli/serve.d.ts +11 -0
  36. package/dist/cli/serve.js +72 -6
  37. package/dist/cli/upgrade.d.ts +67 -0
  38. package/dist/cli/upgrade.js +509 -0
  39. package/dist/gateway/downstream-pool.d.ts +39 -0
  40. package/dist/gateway/downstream-pool.js +93 -0
  41. package/dist/gateway/downstream.d.ts +80 -0
  42. package/dist/gateway/downstream.js +196 -0
  43. package/dist/gateway/middleware/audit-types.d.ts +10 -0
  44. package/dist/gateway/middleware/audit.js +14 -0
  45. package/dist/gateway/middleware/injection.d.ts +59 -2
  46. package/dist/gateway/middleware/injection.js +91 -14
  47. package/dist/gateway/middleware/kill-switch.d.ts +20 -5
  48. package/dist/gateway/middleware/kill-switch.js +57 -35
  49. package/dist/gateway/middleware/redact.d.ts +83 -6
  50. package/dist/gateway/middleware/redact.js +133 -46
  51. package/dist/gateway/observability/codex-probe.d.ts +110 -0
  52. package/dist/gateway/observability/codex-probe.js +234 -0
  53. package/dist/gateway/observability/codex-telemetry.d.ts +93 -0
  54. package/dist/gateway/observability/codex-telemetry.js +221 -0
  55. package/dist/gateway/redact-safe/match-timeout.d.ts +83 -0
  56. package/dist/gateway/redact-safe/match-timeout.js +179 -0
  57. package/dist/gateway/reviewers/claude-self.d.ts +99 -0
  58. package/dist/gateway/reviewers/claude-self.js +316 -0
  59. package/dist/gateway/reviewers/codex.d.ts +64 -0
  60. package/dist/gateway/reviewers/codex.js +80 -0
  61. package/dist/gateway/reviewers/select.d.ts +64 -0
  62. package/dist/gateway/reviewers/select.js +102 -0
  63. package/dist/gateway/reviewers/types.d.ts +85 -0
  64. package/dist/gateway/reviewers/types.js +14 -0
  65. package/dist/gateway/server.d.ts +51 -0
  66. package/dist/gateway/server.js +258 -0
  67. package/dist/gateway/session.d.ts +9 -0
  68. package/dist/gateway/session.js +17 -0
  69. package/dist/policy/loader.d.ts +59 -0
  70. package/dist/policy/loader.js +65 -0
  71. package/dist/policy/profiles.d.ts +80 -0
  72. package/dist/policy/profiles.js +94 -0
  73. package/dist/policy/types.d.ts +38 -0
  74. package/dist/registry/loader.d.ts +98 -0
  75. package/dist/registry/loader.js +153 -0
  76. package/dist/registry/types.d.ts +44 -0
  77. package/dist/registry/types.js +6 -0
  78. package/dist/scripts/read-policy-field.d.ts +36 -0
  79. package/dist/scripts/read-policy-field.js +96 -0
  80. package/hooks/push-review-gate.sh +627 -17
  81. package/package.json +13 -2
  82. package/profiles/bst-internal-no-codex.yaml +40 -0
  83. package/profiles/bst-internal.yaml +23 -0
  84. package/profiles/client-engagement.yaml +23 -0
  85. package/profiles/lit-wc.yaml +17 -0
  86. package/profiles/minimal.yaml +11 -0
  87. package/profiles/open-source-no-codex.yaml +33 -0
  88. package/profiles/open-source.yaml +18 -0
  89. package/scripts/lint-safe-regex.mjs +78 -0
  90. package/scripts/postinstall.mjs +131 -0
@@ -0,0 +1,179 @@
1
+ import { MessageChannel, Worker, receiveMessageOnPort } from 'node:worker_threads';
2
+ const DEFAULT_TIMEOUT_MS = 100;
3
+ /**
4
+ * Worker source — one script handles both `test` and `replace` ops. The worker
5
+ * receives the request + a SharedArrayBuffer for synchronization via
6
+ * `workerData`, compiles the regex inside the worker (so a catastrophic
7
+ * pattern burns worker CPU only), writes the result payload into a parentPort
8
+ * message, and then signals completion by writing `1` into the SAB and calling
9
+ * `Atomics.notify`. The parent blocks on `Atomics.wait(sab, 0, 0, timeoutMs)`
10
+ * and wakes when the worker notifies — OR when the timeout expires, in which
11
+ * case the parent terminates the worker.
12
+ *
13
+ * SECURITY: The parent must NOT rely on the `message` event alone, because
14
+ * `Atomics.wait` blocks the main thread's event loop. The SAB signal is the
15
+ * authoritative wake source. The parent reads the reply AFTER wake by draining
16
+ * the worker's `receiveMessageOnPort` queue.
17
+ */
18
+ const WORKER_SOURCE = `
19
+ const { workerData } = require('node:worker_threads');
20
+ const { signalSab, replyPort, req } = workerData;
21
+ const view = new Int32Array(signalSab);
22
+ try {
23
+ const re = new RegExp(req.source, req.flags);
24
+ let reply;
25
+ if (req.op === 'test') {
26
+ re.lastIndex = 0;
27
+ reply = { ok: true, op: 'test', matched: re.test(req.input) };
28
+ } else if (req.op === 'replace') {
29
+ re.lastIndex = 0;
30
+ reply = { ok: true, op: 'replace', output: req.input.replace(re, req.replacer) };
31
+ } else if (req.op === 'matchAll') {
32
+ // Force the global flag on so matchAll is meaningful.
33
+ const flags = req.flags.includes('g') ? req.flags : req.flags + 'g';
34
+ const gre = new RegExp(req.source, flags);
35
+ const out = [];
36
+ for (const m of req.input.matchAll(gre)) {
37
+ out.push(m[0]);
38
+ }
39
+ reply = { ok: true, op: 'matchAll', matches: out };
40
+ } else {
41
+ reply = { ok: false, error: 'unknown op: ' + req.op };
42
+ }
43
+ replyPort.postMessage(reply);
44
+ } catch (err) {
45
+ replyPort.postMessage({ ok: false, error: err && err.message ? err.message : String(err) });
46
+ } finally {
47
+ // Signal completion via SAB so the parent's Atomics.wait unblocks. The parent
48
+ // then drains the replyPort synchronously via receiveMessageOnPort.
49
+ Atomics.store(view, 0, 1);
50
+ Atomics.notify(view, 0);
51
+ }
52
+ `;
53
+ /**
54
+ * Synchronous wrapper around the worker. Middleware hot paths call `.test()`
55
+ * and `.replace()` inside tight synchronous loops (see `redactSecrets`), so the
56
+ * public `SafeRegex` surface has to be synchronous to be a drop-in replacement.
57
+ *
58
+ * How it works:
59
+ * 1. Allocate a 4-byte SharedArrayBuffer. The worker and parent both see it.
60
+ * 2. Spawn the worker with `workerData: { signalSab, req }`.
61
+ * 3. Parent blocks on `Atomics.wait(view, 0, 0, timeoutMs)` — allowed on the
62
+ * Node main thread (unlike the browser).
63
+ * 4. Worker computes the result, posts the reply message, then writes `1` to
64
+ * the SAB and calls `Atomics.notify`. The SAB notify is the authoritative
65
+ * wake — the message event cannot fire because the event loop is blocked.
66
+ * 5. Parent wakes, drains the worker's message queue synchronously via
67
+ * `receiveMessageOnPort`, then terminates the worker.
68
+ *
69
+ * On timeout the parent `terminate()`s the worker — a hard kill that stops a
70
+ * catastrophic backtracker cold.
71
+ */
72
+ function runInWorkerSync(req, timeoutMs) {
73
+ const signalSab = new SharedArrayBuffer(4);
74
+ const view = new Int32Array(signalSab);
75
+ // Create a MessageChannel so the parent can drain the reply synchronously
76
+ // via `receiveMessageOnPort`. We give the worker the `port1` end and keep
77
+ // `port2` on the parent side.
78
+ const { port1: workerSendPort, port2: parentRecvPort } = new MessageChannel();
79
+ const worker = new Worker(WORKER_SOURCE, {
80
+ eval: true,
81
+ workerData: { signalSab, replyPort: workerSendPort, req },
82
+ transferList: [workerSendPort],
83
+ });
84
+ // Don't pin the process on the worker's existence.
85
+ worker.unref();
86
+ // Block this thread until the worker signals completion OR the timeout
87
+ // expires. `Atomics.wait` is allowed on the Node main thread (unlike the
88
+ // browser, where it is blocked on the UI thread).
89
+ const waitResult = Atomics.wait(view, 0, 0, timeoutMs);
90
+ if (waitResult === 'timed-out') {
91
+ // Worker is still running — kill it and report timeout.
92
+ void worker.terminate();
93
+ parentRecvPort.close();
94
+ return { reply: null, timedOut: true };
95
+ }
96
+ // Worker signaled completion. Drain the port queue synchronously to
97
+ // recover the reply payload. `receiveMessageOnPort` returns `undefined` if
98
+ // no message is queued — that should not happen on the happy path because
99
+ // the worker posts the message BEFORE notifying the SAB, but we guard
100
+ // defensively.
101
+ let reply = null;
102
+ const msg = receiveMessageOnPort(parentRecvPort);
103
+ if (msg !== undefined) {
104
+ reply = msg.message;
105
+ }
106
+ // Release the worker thread and close the port.
107
+ void worker.terminate();
108
+ parentRecvPort.close();
109
+ if (reply !== null) {
110
+ return { reply, timedOut: false };
111
+ }
112
+ return { reply: { ok: false, error: 'worker produced no result' }, timedOut: false };
113
+ }
114
+ /**
115
+ * Wrap a RegExp in a timeout-enforced `SafeRegex`. Compilation happens both in
116
+ * the parent (to catch syntax errors early) and inside the worker (so a
117
+ * catastrophic compile or match spends only worker CPU).
118
+ *
119
+ * SECURITY: callers should pass regexes that have ALSO been cleared by
120
+ * `safe-regex` at load time — the timeout is a defense-in-depth backstop, not
121
+ * a replacement for static analysis. See `scripts/lint-safe-regex.mjs` and the
122
+ * load-time check in `src/policy/loader.ts`.
123
+ */
124
+ export function wrapRegex(pattern, opts) {
125
+ const timeoutMs = opts?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
126
+ const onTimeout = opts?.onTimeout;
127
+ const source = pattern.source;
128
+ const flags = pattern.flags;
129
+ const emitTimeout = (input) => {
130
+ if (onTimeout) {
131
+ try {
132
+ onTimeout(pattern, input);
133
+ }
134
+ catch {
135
+ // Callback errors MUST NOT break middleware. Swallow silently — the
136
+ // middleware has its own audit path if it cares.
137
+ }
138
+ }
139
+ };
140
+ return {
141
+ pattern,
142
+ test(input) {
143
+ const { reply, timedOut } = runInWorkerSync({ op: 'test', source, flags, input }, timeoutMs);
144
+ if (timedOut) {
145
+ emitTimeout(input);
146
+ return { matched: false, timedOut: true };
147
+ }
148
+ if (reply && reply.ok && reply.op === 'test') {
149
+ return { matched: reply.matched, timedOut: false };
150
+ }
151
+ // Worker errored (compile error, etc.) — treat as no match, no timeout.
152
+ return { matched: false, timedOut: false };
153
+ },
154
+ replace(input, replacer) {
155
+ const { reply, timedOut } = runInWorkerSync({ op: 'replace', source, flags, input, replacer }, timeoutMs);
156
+ if (timedOut) {
157
+ emitTimeout(input);
158
+ return { output: input, timedOut: true };
159
+ }
160
+ if (reply && reply.ok && reply.op === 'replace') {
161
+ return { output: reply.output, timedOut: false };
162
+ }
163
+ // Worker errored — preserve input unchanged (never corrupt payload).
164
+ return { output: input, timedOut: false };
165
+ },
166
+ matchAll(input) {
167
+ const { reply, timedOut } = runInWorkerSync({ op: 'matchAll', source, flags, input }, timeoutMs);
168
+ if (timedOut) {
169
+ emitTimeout(input);
170
+ return { matches: [], timedOut: true };
171
+ }
172
+ if (reply && reply.ok && reply.op === 'matchAll') {
173
+ return { matches: reply.matches, timedOut: false };
174
+ }
175
+ // Worker errored — return empty match set.
176
+ return { matches: [], timedOut: false };
177
+ },
178
+ };
179
+ }
@@ -0,0 +1,99 @@
1
+ /**
2
+ * ClaudeSelfReviewer — the runtime fallback for the adversarial reviewer
3
+ * slot (G11.2).
4
+ *
5
+ * When Codex is unreachable (rate-limited, unauthenticated, CLI missing)
6
+ * and the operator hasn't opted into a first-class no-Codex policy, we
7
+ * still want SOMETHING pushing back on the diff before it lands. A fresh-
8
+ * context Opus call with a review-only system prompt is not a cross-model
9
+ * check — it's the same family reviewing its own output — so we label every
10
+ * result `degraded: true` so the audit trail is honest about it.
11
+ *
12
+ * ## Design notes
13
+ *
14
+ * - One-shot, no conversation history. The SDK call is synchronous from
15
+ * our caller's perspective.
16
+ * - We pin the model id in `version` so older audit entries stay
17
+ * reproducible when we bump the default.
18
+ * - The model is prompted to return STRICT JSON matching `ReviewResult`.
19
+ * If parsing fails we return `verdict: 'error'` rather than guessing —
20
+ * the operator gets a clear signal that the fallback didn't work.
21
+ * - Rate-limit / 5xx errors bubble up as `verdict: 'error'` with the raw
22
+ * message; callers can decide to retry, prompt the human, or abort.
23
+ * - We cap the diff at 200KB and note the truncation in the summary. The
24
+ * inbound token budget for a big Opus call is much larger, but a 200KB
25
+ * diff is already a red flag on its own and we don't want to silently
26
+ * eat massive payloads.
27
+ */
28
+ import { recordTelemetry } from '../observability/codex-telemetry.js';
29
+ import type { AdversarialReviewer, ReviewRequest, ReviewResult } from './types.js';
30
+ /**
31
+ * Thin shape of the one SDK method we call. Lets the tests swap in a fake
32
+ * without pulling the full Anthropic client into the unit test. Shape
33
+ * mirrors `client.messages.create` closely enough for our purposes.
34
+ */
35
+ export interface MessagesCreateFn {
36
+ (params: {
37
+ model: string;
38
+ max_tokens: number;
39
+ system: string;
40
+ messages: Array<{
41
+ role: 'user';
42
+ content: string;
43
+ }>;
44
+ }): Promise<{
45
+ content: Array<{
46
+ type: string;
47
+ text?: string;
48
+ }>;
49
+ }>;
50
+ }
51
+ /**
52
+ * Constructor seams let tests inject a fake exec/SDK without stubbing the
53
+ * module registry. Production callers use the defaults.
54
+ */
55
+ export interface ClaudeSelfReviewerOptions {
56
+ apiKey?: string;
57
+ model?: string;
58
+ create?: MessagesCreateFn;
59
+ /**
60
+ * Repo root used for the telemetry write path (`<baseDir>/.rea/metrics.jsonl`).
61
+ * Defaults to `process.cwd()`. Tests inject a temp dir so they don't scribble
62
+ * into the repo's real metrics file.
63
+ */
64
+ baseDir?: string;
65
+ /**
66
+ * Test seam — override the telemetry-record function so unit tests can
67
+ * assert on the exact shape without hitting disk. Defaults to the real
68
+ * `recordTelemetry` helper.
69
+ */
70
+ recordTelemetryFn?: typeof recordTelemetry;
71
+ }
72
+ export declare class ClaudeSelfReviewer implements AdversarialReviewer {
73
+ readonly name = "claude-self";
74
+ readonly version: string;
75
+ private readonly apiKey;
76
+ private readonly createFn;
77
+ private readonly baseDir;
78
+ private readonly recordTelemetryFn;
79
+ constructor(opts?: ClaudeSelfReviewerOptions);
80
+ /**
81
+ * Cheap check. We don't actually ping the API — the selector only needs
82
+ * to know whether we CAN try. If the key is bogus we'll find out in
83
+ * `review()` and surface it as `verdict: 'error'`.
84
+ */
85
+ isAvailable(): Promise<boolean>;
86
+ review(req: ReviewRequest): Promise<ReviewResult>;
87
+ /**
88
+ * Fire-and-forget telemetry write. The helper itself is fail-soft, but a
89
+ * misbehaving injected `recordTelemetryFn` (synchronous throw) could
90
+ * still escape a bare `void fn(...)`. This wrapper catches both sync and
91
+ * async rejections so a telemetry bug never breaks a review.
92
+ */
93
+ private emitTelemetry;
94
+ /**
95
+ * Resolve the create() closure lazily so we only build the real client
96
+ * when there's an API key and nothing was injected.
97
+ */
98
+ private getCreateFn;
99
+ }
@@ -0,0 +1,316 @@
1
+ /**
2
+ * ClaudeSelfReviewer — the runtime fallback for the adversarial reviewer
3
+ * slot (G11.2).
4
+ *
5
+ * When Codex is unreachable (rate-limited, unauthenticated, CLI missing)
6
+ * and the operator hasn't opted into a first-class no-Codex policy, we
7
+ * still want SOMETHING pushing back on the diff before it lands. A fresh-
8
+ * context Opus call with a review-only system prompt is not a cross-model
9
+ * check — it's the same family reviewing its own output — so we label every
10
+ * result `degraded: true` so the audit trail is honest about it.
11
+ *
12
+ * ## Design notes
13
+ *
14
+ * - One-shot, no conversation history. The SDK call is synchronous from
15
+ * our caller's perspective.
16
+ * - We pin the model id in `version` so older audit entries stay
17
+ * reproducible when we bump the default.
18
+ * - The model is prompted to return STRICT JSON matching `ReviewResult`.
19
+ * If parsing fails we return `verdict: 'error'` rather than guessing —
20
+ * the operator gets a clear signal that the fallback didn't work.
21
+ * - Rate-limit / 5xx errors bubble up as `verdict: 'error'` with the raw
22
+ * message; callers can decide to retry, prompt the human, or abort.
23
+ * - We cap the diff at 200KB and note the truncation in the summary. The
24
+ * inbound token budget for a big Opus call is much larger, but a 200KB
25
+ * diff is already a red flag on its own and we don't want to silently
26
+ * eat massive payloads.
27
+ */
28
+ import Anthropic, { APIError } from '@anthropic-ai/sdk';
29
+ import { recordTelemetry } from '../observability/codex-telemetry.js';
30
+ /** Pin the model id — audit entries reference this verbatim. */
31
+ const CLAUDE_MODEL_ID = 'claude-opus-4-7';
32
+ /** 200KB cap on the diff before we truncate and flag degraded. */
33
+ const DIFF_TRUNCATE_BYTES = 200 * 1024;
34
+ /** Bounded output so a runaway model can't exhaust our token budget. */
35
+ const MAX_OUTPUT_TOKENS = 4096;
36
+ const SYSTEM_PROMPT = `You are an adversarial code reviewer. A diff will be provided along with
37
+ commit metadata. Identify high-impact security, correctness, edge-case,
38
+ test-gap, api-design, or performance issues. Do not restate what the diff
39
+ does; surface what is wrong or risky.
40
+
41
+ Respond with STRICT JSON matching exactly this schema. Do not include
42
+ markdown fences, commentary, or any text outside the JSON object:
43
+
44
+ {
45
+ "verdict": "pass" | "concerns" | "blocking" | "error",
46
+ "summary": "one sentence",
47
+ "findings": [
48
+ {
49
+ "category": "security" | "correctness" | "edge-case" | "test-gap" | "api-design" | "performance",
50
+ "severity": "high" | "medium" | "low",
51
+ "file": "relative/path",
52
+ "line": 123,
53
+ "issue": "short problem statement",
54
+ "evidence": "optional quote from the diff",
55
+ "suggested_fix": "optional one-line fix hint"
56
+ }
57
+ ]
58
+ }
59
+
60
+ Return an empty findings array for a clean pass. Use "blocking" only for
61
+ issues that must be fixed before merge.`;
62
+ function buildUserMessage(req, diffWasTruncated) {
63
+ const truncNote = diffWasTruncated
64
+ ? '\n\nNOTE: The diff was truncated to 200KB. The review is necessarily partial.'
65
+ : '';
66
+ return [
67
+ `Branch: ${req.branch}`,
68
+ `Head SHA: ${req.head_sha}`,
69
+ `Diffed against: ${req.target}`,
70
+ '',
71
+ '## Commit log',
72
+ req.commit_log || '(empty)',
73
+ '',
74
+ '## Diff',
75
+ req.diff || '(empty)',
76
+ truncNote,
77
+ ].join('\n');
78
+ }
79
+ /**
80
+ * Safe parse — we don't want a malformed model response to crash the push
81
+ * gate. Any parse failure or shape mismatch folds into an error verdict.
82
+ */
83
+ function parseModelJson(raw) {
84
+ let parsed;
85
+ try {
86
+ parsed = JSON.parse(raw);
87
+ }
88
+ catch (err) {
89
+ return {
90
+ error: `unparseable JSON from model: ${err instanceof Error ? err.message : String(err)}`,
91
+ };
92
+ }
93
+ if (typeof parsed !== 'object' || parsed === null) {
94
+ return { error: 'model response was not a JSON object' };
95
+ }
96
+ const obj = parsed;
97
+ const verdict = obj['verdict'];
98
+ const summary = obj['summary'];
99
+ const findings = obj['findings'];
100
+ const validVerdicts = ['pass', 'concerns', 'blocking', 'error'];
101
+ if (typeof verdict !== 'string' || !validVerdicts.includes(verdict)) {
102
+ return { error: `invalid verdict: ${String(verdict)}` };
103
+ }
104
+ if (typeof summary !== 'string') {
105
+ return { error: 'missing or non-string summary' };
106
+ }
107
+ if (!Array.isArray(findings)) {
108
+ return { error: 'missing or non-array findings' };
109
+ }
110
+ // Findings get shallow validation — we pass each through a narrow guard
111
+ // and drop any entries that can't be coerced. A noisy model is better
112
+ // handled by discarding junk than by erroring the whole review.
113
+ const cleanFindings = [];
114
+ for (const f of findings) {
115
+ const finding = toReviewFinding(f);
116
+ if (finding !== undefined)
117
+ cleanFindings.push(finding);
118
+ }
119
+ return {
120
+ reviewer_name: 'claude-self',
121
+ reviewer_version: CLAUDE_MODEL_ID,
122
+ verdict: verdict,
123
+ findings: cleanFindings,
124
+ summary,
125
+ // Always true for this reviewer — same-model is structurally degraded.
126
+ // Callers that compose results should keep this value, not overwrite it.
127
+ degraded: true,
128
+ };
129
+ }
130
+ function toReviewFinding(input) {
131
+ if (typeof input !== 'object' || input === null)
132
+ return undefined;
133
+ const o = input;
134
+ const validCategories = [
135
+ 'security',
136
+ 'correctness',
137
+ 'edge-case',
138
+ 'test-gap',
139
+ 'api-design',
140
+ 'performance',
141
+ ];
142
+ const validSeverities = ['high', 'medium', 'low'];
143
+ if (typeof o['category'] !== 'string' || !validCategories.includes(o['category'])) {
144
+ return undefined;
145
+ }
146
+ if (typeof o['severity'] !== 'string' || !validSeverities.includes(o['severity'])) {
147
+ return undefined;
148
+ }
149
+ if (typeof o['file'] !== 'string')
150
+ return undefined;
151
+ if (typeof o['issue'] !== 'string')
152
+ return undefined;
153
+ const out = {
154
+ category: o['category'],
155
+ severity: o['severity'],
156
+ file: o['file'],
157
+ issue: o['issue'],
158
+ };
159
+ if (typeof o['line'] === 'number')
160
+ out.line = o['line'];
161
+ if (typeof o['start_line'] === 'number')
162
+ out.start_line = o['start_line'];
163
+ if (typeof o['evidence'] === 'string')
164
+ out.evidence = o['evidence'];
165
+ if (typeof o['suggested_fix'] === 'string')
166
+ out.suggested_fix = o['suggested_fix'];
167
+ return out;
168
+ }
169
+ function errorResult(message, summary, degradedNote) {
170
+ return {
171
+ reviewer_name: 'claude-self',
172
+ reviewer_version: CLAUDE_MODEL_ID,
173
+ verdict: 'error',
174
+ findings: [],
175
+ summary: `${summary}${degradedNote}`,
176
+ degraded: true,
177
+ error: message,
178
+ };
179
+ }
180
+ export class ClaudeSelfReviewer {
181
+ name = 'claude-self';
182
+ version;
183
+ apiKey;
184
+ createFn;
185
+ baseDir;
186
+ recordTelemetryFn;
187
+ constructor(opts = {}) {
188
+ this.version = opts.model ?? CLAUDE_MODEL_ID;
189
+ this.apiKey = opts.apiKey ?? process.env['ANTHROPIC_API_KEY'];
190
+ this.createFn = opts.create;
191
+ this.baseDir = opts.baseDir ?? process.cwd();
192
+ this.recordTelemetryFn = opts.recordTelemetryFn ?? recordTelemetry;
193
+ }
194
+ /**
195
+ * Cheap check. We don't actually ping the API — the selector only needs
196
+ * to know whether we CAN try. If the key is bogus we'll find out in
197
+ * `review()` and surface it as `verdict: 'error'`.
198
+ */
199
+ async isAvailable() {
200
+ // When a test injects `create`, treat the reviewer as available so we
201
+ // don't need to juggle fake env in every test.
202
+ if (this.createFn !== undefined)
203
+ return true;
204
+ return this.apiKey !== undefined && this.apiKey.length > 0;
205
+ }
206
+ async review(req) {
207
+ const create = this.getCreateFn();
208
+ if (create === undefined) {
209
+ return errorResult('ANTHROPIC_API_KEY not set', 'claude-self fallback unavailable: no API key', '');
210
+ }
211
+ const diffBytes = Buffer.byteLength(req.diff, 'utf8');
212
+ const truncated = diffBytes > DIFF_TRUNCATE_BYTES;
213
+ const effectiveDiff = truncated
214
+ ? req.diff.slice(0, DIFF_TRUNCATE_BYTES)
215
+ : req.diff;
216
+ const userMessage = buildUserMessage({ ...req, diff: effectiveDiff }, truncated);
217
+ // G11.5 — measure the SDK call. The telemetry write is best-effort and
218
+ // fire-and-forget; it MUST NOT block or fail the review. We call
219
+ // `recordTelemetryFn` in a fire-and-forget void expression; the helper
220
+ // itself is already fail-soft (single stderr warn on error).
221
+ const startedAt = Date.now();
222
+ let response;
223
+ try {
224
+ response = await create({
225
+ model: this.version,
226
+ max_tokens: MAX_OUTPUT_TOKENS,
227
+ system: SYSTEM_PROMPT,
228
+ messages: [
229
+ {
230
+ role: 'user',
231
+ content: userMessage,
232
+ },
233
+ ],
234
+ });
235
+ }
236
+ catch (err) {
237
+ // Rate-limits, 5xx, network errors all land here. Surface the raw
238
+ // message so operators can act on it; the caller decides whether
239
+ // to retry or abort.
240
+ const message = err instanceof APIError ? `API ${err.status ?? '?'}: ${err.message}` : err instanceof Error ? err.message : String(err);
241
+ this.emitTelemetry({
242
+ invocation_type: 'adversarial-review',
243
+ input_text: userMessage,
244
+ output_text: '',
245
+ duration_ms: Date.now() - startedAt,
246
+ exit_code: 1,
247
+ stderr: message,
248
+ });
249
+ return errorResult(message, 'claude-self review failed', '');
250
+ }
251
+ const text = response.content
252
+ .filter((c) => c.type === 'text')
253
+ .map((c) => c.text ?? '')
254
+ .join('');
255
+ const parsed = parseModelJson(text);
256
+ if ('error' in parsed) {
257
+ this.emitTelemetry({
258
+ invocation_type: 'adversarial-review',
259
+ input_text: userMessage,
260
+ output_text: text,
261
+ duration_ms: Date.now() - startedAt,
262
+ exit_code: 1,
263
+ stderr: parsed.error,
264
+ });
265
+ return errorResult(parsed.error, 'claude-self produced unparseable output', '');
266
+ }
267
+ if (truncated) {
268
+ parsed.summary = `[diff truncated to 200KB] ${parsed.summary}`;
269
+ }
270
+ // Defense in depth — parseModelJson should always set degraded=true
271
+ // for this reviewer, but this reviewer is the canonical authority on
272
+ // that flag so re-pin it.
273
+ parsed.degraded = true;
274
+ this.emitTelemetry({
275
+ invocation_type: 'adversarial-review',
276
+ input_text: userMessage,
277
+ output_text: text,
278
+ duration_ms: Date.now() - startedAt,
279
+ exit_code: 0,
280
+ });
281
+ return parsed;
282
+ }
283
+ /**
284
+ * Fire-and-forget telemetry write. The helper itself is fail-soft, but a
285
+ * misbehaving injected `recordTelemetryFn` (synchronous throw) could
286
+ * still escape a bare `void fn(...)`. This wrapper catches both sync and
287
+ * async rejections so a telemetry bug never breaks a review.
288
+ */
289
+ emitTelemetry(input) {
290
+ try {
291
+ void Promise.resolve(this.recordTelemetryFn(this.baseDir, input)).catch(() => {
292
+ /* swallowed — telemetry is observational, never fatal */
293
+ });
294
+ }
295
+ catch {
296
+ /* same rationale — a sync throw from the injected fn is contained */
297
+ }
298
+ }
299
+ /**
300
+ * Resolve the create() closure lazily so we only build the real client
301
+ * when there's an API key and nothing was injected.
302
+ */
303
+ getCreateFn() {
304
+ if (this.createFn !== undefined)
305
+ return this.createFn;
306
+ if (this.apiKey === undefined || this.apiKey.length === 0)
307
+ return undefined;
308
+ const client = new Anthropic({ apiKey: this.apiKey });
309
+ return async (params) => {
310
+ const res = await client.messages.create(params);
311
+ return {
312
+ content: res.content.map((block) => block.type === 'text' ? { type: 'text', text: block.text } : { type: block.type }),
313
+ };
314
+ };
315
+ }
316
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Codex adversarial reviewer adapter (G11.2).
3
+ *
4
+ * ## Why this class throws from `review()`
5
+ *
6
+ * The actual Codex review path is the `codex-adversarial` agent shipped under
7
+ * `.claude/agents/`, invoked from Claude Code via the `/codex-review` slash
8
+ * command (which eventually reaches the Codex plugin's
9
+ * `/codex:adversarial-review`). None of that is importable from TS — the
10
+ * agent runtime is the harness, not a library.
11
+ *
12
+ * `CodexReviewer` exists so:
13
+ *
14
+ * 1. `selectReviewer()` can return a typed reviewer handle with a stable
15
+ * `name`/`version` that the audit log and CLI can surface.
16
+ * 2. `isAvailable()` can cheaply probe the CLI without invoking a review.
17
+ * 3. G11.3 (startup probe) and G11.4 (no-Codex policy) have something to
18
+ * type-check against now, so the broader flow can land without waiting
19
+ * for an in-process Codex SDK that may never ship.
20
+ *
21
+ * If we ever get a native Codex TS client, `review()` becomes real and this
22
+ * comment block goes away. Until then: treat a Codex selection as
23
+ * "dispatch to the agent", not "await reviewer.review(...)".
24
+ */
25
+ import type { AdversarialReviewer, ReviewRequest, ReviewResult } from './types.js';
26
+ /**
27
+ * Narrow test seam: the unit tests swap the exec implementation via the
28
+ * constructor so we don't have to hit the real CLI. Kept internal to this
29
+ * file — production callers always use the default.
30
+ */
31
+ export type ExecFileFn = (file: string, args: readonly string[], options: {
32
+ timeout: number;
33
+ }) => Promise<{
34
+ stdout: string;
35
+ stderr: string;
36
+ }>;
37
+ export declare class CodexReviewer implements AdversarialReviewer {
38
+ readonly name = "codex";
39
+ private readonly exec;
40
+ private cachedVersion;
41
+ constructor(opts?: {
42
+ exec?: ExecFileFn;
43
+ });
44
+ /**
45
+ * Lazily populated via `codex --version`. We don't block construction on
46
+ * the probe because the selector calls `isAvailable()` before it commits
47
+ * to Codex, so we'll have a fresh value by the time anything reads it.
48
+ */
49
+ get version(): string;
50
+ isAvailable(): Promise<boolean>;
51
+ /**
52
+ * Not invokable from TS — see the file header. The selector contract is
53
+ * "CodexReviewer handles mean dispatch to the codex-adversarial agent";
54
+ * if a caller ignores that and awaits this, we throw loudly rather than
55
+ * silently produce a bad `ReviewResult`.
56
+ *
57
+ * TODO(0.3.0): when Codex ships a native TS client, this path will
58
+ * actually run the review. At that point, instrument with
59
+ * `recordTelemetry` the same way `ClaudeSelfReviewer.review()` does
60
+ * today (G11.5). The throwing placeholder below is deliberately NOT
61
+ * instrumented — there is nothing to measure.
62
+ */
63
+ review(_req: ReviewRequest): Promise<ReviewResult>;
64
+ }