@bookedsolid/rea 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.husky/commit-msg +130 -0
- package/.husky/pre-push +128 -0
- package/README.md +5 -5
- package/agents/codex-adversarial.md +23 -8
- package/commands/codex-review.md +2 -2
- package/dist/audit/append.d.ts +62 -0
- package/dist/audit/append.js +189 -0
- package/dist/audit/codex-event.d.ts +28 -0
- package/dist/audit/codex-event.js +15 -0
- package/dist/cli/doctor.d.ts +60 -1
- package/dist/cli/doctor.js +459 -20
- package/dist/cli/index.js +35 -5
- package/dist/cli/init.d.ts +13 -0
- package/dist/cli/init.js +278 -67
- package/dist/cli/install/canonical.d.ts +43 -0
- package/dist/cli/install/canonical.js +101 -0
- package/dist/cli/install/claude-md.d.ts +48 -0
- package/dist/cli/install/claude-md.js +93 -0
- package/dist/cli/install/commit-msg.d.ts +30 -0
- package/dist/cli/install/commit-msg.js +102 -0
- package/dist/cli/install/copy.d.ts +169 -0
- package/dist/cli/install/copy.js +455 -0
- package/dist/cli/install/fs-safe.d.ts +91 -0
- package/dist/cli/install/fs-safe.js +347 -0
- package/dist/cli/install/manifest-io.d.ts +12 -0
- package/dist/cli/install/manifest-io.js +44 -0
- package/dist/cli/install/manifest-schema.d.ts +83 -0
- package/dist/cli/install/manifest-schema.js +80 -0
- package/dist/cli/install/reagent.d.ts +59 -0
- package/dist/cli/install/reagent.js +160 -0
- package/dist/cli/install/settings-merge.d.ts +91 -0
- package/dist/cli/install/settings-merge.js +239 -0
- package/dist/cli/install/sha.d.ts +9 -0
- package/dist/cli/install/sha.js +21 -0
- package/dist/cli/serve.d.ts +11 -0
- package/dist/cli/serve.js +72 -6
- package/dist/cli/upgrade.d.ts +67 -0
- package/dist/cli/upgrade.js +509 -0
- package/dist/gateway/downstream-pool.d.ts +39 -0
- package/dist/gateway/downstream-pool.js +93 -0
- package/dist/gateway/downstream.d.ts +80 -0
- package/dist/gateway/downstream.js +196 -0
- package/dist/gateway/middleware/audit-types.d.ts +10 -0
- package/dist/gateway/middleware/audit.js +14 -0
- package/dist/gateway/middleware/injection.d.ts +59 -2
- package/dist/gateway/middleware/injection.js +91 -14
- package/dist/gateway/middleware/kill-switch.d.ts +20 -5
- package/dist/gateway/middleware/kill-switch.js +57 -35
- package/dist/gateway/middleware/redact.d.ts +83 -6
- package/dist/gateway/middleware/redact.js +133 -46
- package/dist/gateway/observability/codex-probe.d.ts +110 -0
- package/dist/gateway/observability/codex-probe.js +234 -0
- package/dist/gateway/observability/codex-telemetry.d.ts +93 -0
- package/dist/gateway/observability/codex-telemetry.js +221 -0
- package/dist/gateway/redact-safe/match-timeout.d.ts +83 -0
- package/dist/gateway/redact-safe/match-timeout.js +179 -0
- package/dist/gateway/reviewers/claude-self.d.ts +99 -0
- package/dist/gateway/reviewers/claude-self.js +316 -0
- package/dist/gateway/reviewers/codex.d.ts +64 -0
- package/dist/gateway/reviewers/codex.js +80 -0
- package/dist/gateway/reviewers/select.d.ts +64 -0
- package/dist/gateway/reviewers/select.js +102 -0
- package/dist/gateway/reviewers/types.d.ts +85 -0
- package/dist/gateway/reviewers/types.js +14 -0
- package/dist/gateway/server.d.ts +51 -0
- package/dist/gateway/server.js +258 -0
- package/dist/gateway/session.d.ts +9 -0
- package/dist/gateway/session.js +17 -0
- package/dist/policy/loader.d.ts +59 -0
- package/dist/policy/loader.js +65 -0
- package/dist/policy/profiles.d.ts +80 -0
- package/dist/policy/profiles.js +94 -0
- package/dist/policy/types.d.ts +38 -0
- package/dist/registry/loader.d.ts +98 -0
- package/dist/registry/loader.js +153 -0
- package/dist/registry/types.d.ts +44 -0
- package/dist/registry/types.js +6 -0
- package/dist/scripts/read-policy-field.d.ts +36 -0
- package/dist/scripts/read-policy-field.js +96 -0
- package/hooks/push-review-gate.sh +627 -17
- package/package.json +13 -2
- package/profiles/bst-internal-no-codex.yaml +40 -0
- package/profiles/bst-internal.yaml +23 -0
- package/profiles/client-engagement.yaml +23 -0
- package/profiles/lit-wc.yaml +17 -0
- package/profiles/minimal.yaml +11 -0
- package/profiles/open-source-no-codex.yaml +33 -0
- package/profiles/open-source.yaml +18 -0
- package/scripts/lint-safe-regex.mjs +78 -0
- package/scripts/postinstall.mjs +131 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codex adversarial reviewer adapter (G11.2).
|
|
3
|
+
*
|
|
4
|
+
* ## Why this class throws from `review()`
|
|
5
|
+
*
|
|
6
|
+
* The actual Codex review path is the `codex-adversarial` agent shipped under
|
|
7
|
+
* `.claude/agents/`, invoked from Claude Code via the `/codex-review` slash
|
|
8
|
+
* command (which eventually reaches the Codex plugin's
|
|
9
|
+
* `/codex:adversarial-review`). None of that is importable from TS — the
|
|
10
|
+
* agent runtime is the harness, not a library.
|
|
11
|
+
*
|
|
12
|
+
* `CodexReviewer` exists so:
|
|
13
|
+
*
|
|
14
|
+
* 1. `selectReviewer()` can return a typed reviewer handle with a stable
|
|
15
|
+
* `name`/`version` that the audit log and CLI can surface.
|
|
16
|
+
* 2. `isAvailable()` can cheaply probe the CLI without invoking a review.
|
|
17
|
+
* 3. G11.3 (startup probe) and G11.4 (no-Codex policy) have something to
|
|
18
|
+
* type-check against now, so the broader flow can land without waiting
|
|
19
|
+
* for an in-process Codex SDK that may never ship.
|
|
20
|
+
*
|
|
21
|
+
* If we ever get a native Codex TS client, `review()` becomes real and this
|
|
22
|
+
* comment block goes away. Until then: treat a Codex selection as
|
|
23
|
+
* "dispatch to the agent", not "await reviewer.review(...)".
|
|
24
|
+
*/
|
|
25
|
+
import { execFile } from 'node:child_process';
|
|
26
|
+
import { promisify } from 'node:util';
|
|
27
|
+
const execFileAsync = promisify(execFile);
|
|
28
|
+
/** Upper bound on `codex --version` so a hung CLI can't stall the push gate. */
|
|
29
|
+
const VERSION_PROBE_TIMEOUT_MS = 2_000;
|
|
30
|
+
/** Token used as `version` when we never successfully read one. */
|
|
31
|
+
const UNKNOWN_VERSION = 'unknown';
|
|
32
|
+
const defaultExec = (file, args, options) => execFileAsync(file, [...args], options);
|
|
33
|
+
export class CodexReviewer {
|
|
34
|
+
name = 'codex';
|
|
35
|
+
exec;
|
|
36
|
+
cachedVersion;
|
|
37
|
+
constructor(opts = {}) {
|
|
38
|
+
this.exec = opts.exec ?? defaultExec;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Lazily populated via `codex --version`. We don't block construction on
|
|
42
|
+
* the probe because the selector calls `isAvailable()` before it commits
|
|
43
|
+
* to Codex, so we'll have a fresh value by the time anything reads it.
|
|
44
|
+
*/
|
|
45
|
+
get version() {
|
|
46
|
+
return this.cachedVersion ?? UNKNOWN_VERSION;
|
|
47
|
+
}
|
|
48
|
+
async isAvailable() {
|
|
49
|
+
try {
|
|
50
|
+
const { stdout } = await this.exec('codex', ['--version'], {
|
|
51
|
+
timeout: VERSION_PROBE_TIMEOUT_MS,
|
|
52
|
+
});
|
|
53
|
+
// Cache on success so `version` is non-`unknown` the moment the
|
|
54
|
+
// selector picks this reviewer.
|
|
55
|
+
this.cachedVersion = stdout.trim() || UNKNOWN_VERSION;
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
// Any failure — timeout, ENOENT, non-zero exit — means we shouldn't
|
|
60
|
+
// route through Codex. Callers don't need the reason; the selector
|
|
61
|
+
// logs why it fell back.
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Not invokable from TS — see the file header. The selector contract is
|
|
67
|
+
* "CodexReviewer handles mean dispatch to the codex-adversarial agent";
|
|
68
|
+
* if a caller ignores that and awaits this, we throw loudly rather than
|
|
69
|
+
* silently produce a bad `ReviewResult`.
|
|
70
|
+
*
|
|
71
|
+
* TODO(0.3.0): when Codex ships a native TS client, this path will
|
|
72
|
+
* actually run the review. At that point, instrument with
|
|
73
|
+
* `recordTelemetry` the same way `ClaudeSelfReviewer.review()` does
|
|
74
|
+
* today (G11.5). The throwing placeholder below is deliberately NOT
|
|
75
|
+
* instrumented — there is nothing to measure.
|
|
76
|
+
*/
|
|
77
|
+
async review(_req) {
|
|
78
|
+
throw new Error('CodexReviewer.review() is invoked via the codex-adversarial agent, not directly from TS');
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reviewer selector (G11.2).
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "which adversarial reviewer should run against
|
|
5
|
+
* this branch?" Downstream callers get back a reviewer handle plus two
|
|
6
|
+
* audit-friendly signals: `degraded` (is this a fallback?) and `reason`
|
|
7
|
+
* (why did we pick this one?).
|
|
8
|
+
*
|
|
9
|
+
* Precedence, high to low:
|
|
10
|
+
*
|
|
11
|
+
* 1. `REA_REVIEWER` env var — explicit operator choice wins over all policy
|
|
12
|
+
* 2. `registry.reviewer` — second-wins operator pin in `.rea/registry.yaml`
|
|
13
|
+
* 3. `policy.review.codex_required === false` — first-class no-Codex mode
|
|
14
|
+
* (G11.4 semantics). ClaudeSelfReviewer is NOT degraded here because
|
|
15
|
+
* the operator explicitly chose this lane.
|
|
16
|
+
* 4. Default: prefer Codex, fall back to ClaudeSelfReviewer with
|
|
17
|
+
* `degraded: true` if Codex is unavailable.
|
|
18
|
+
* 5. Both unavailable → throw. The push gate has an audited escape hatch
|
|
19
|
+
* (`REA_SKIP_CODEX_REVIEW`, G11.1) for when that's legitimately the
|
|
20
|
+
* operator's intent.
|
|
21
|
+
*
|
|
22
|
+
* The caller decides what to do with the result. The audit record should
|
|
23
|
+
* always capture `reviewer.name`, `reviewer.version`, `degraded`, and
|
|
24
|
+
* `reason` verbatim.
|
|
25
|
+
*/
|
|
26
|
+
import type { Policy } from '../../policy/types.js';
|
|
27
|
+
import type { Registry } from '../../registry/types.js';
|
|
28
|
+
import type { AdversarialReviewer } from './types.js';
|
|
29
|
+
export interface SelectionResult {
|
|
30
|
+
reviewer: AdversarialReviewer;
|
|
31
|
+
/**
|
|
32
|
+
* `true` iff we fell back to a less-preferred reviewer than the operator
|
|
33
|
+
* would have gotten in the default Codex-available case.
|
|
34
|
+
*/
|
|
35
|
+
degraded: boolean;
|
|
36
|
+
/** Short machine-readable code — one of the literals below. */
|
|
37
|
+
reason: SelectionReason;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Closed enum of reasons so downstream code can switch on it without
|
|
41
|
+
* stringly-typed comparisons. Stable — adding a new case is fine; renaming
|
|
42
|
+
* is a breaking change to audit consumers.
|
|
43
|
+
*/
|
|
44
|
+
export type SelectionReason = 'env:REA_REVIEWER' | 'registry.reviewer' | 'policy.review.codex_required=false' | 'default:codex-available' | 'default:codex-unavailable-fallback';
|
|
45
|
+
/**
|
|
46
|
+
* Narrow seam so tests can stub reviewer construction without touching
|
|
47
|
+
* process env or the Anthropic SDK.
|
|
48
|
+
*/
|
|
49
|
+
export interface SelectorDeps {
|
|
50
|
+
makeCodex: () => AdversarialReviewer;
|
|
51
|
+
makeClaudeSelf: () => AdversarialReviewer;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Thrown when neither Codex nor ClaudeSelfReviewer can run. Keep the
|
|
55
|
+
* message actionable — the operator should know which knobs to flip.
|
|
56
|
+
*/
|
|
57
|
+
export declare class NoReviewerAvailableError extends Error {
|
|
58
|
+
constructor();
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Pick the reviewer for the current branch. Callers MUST await — the
|
|
62
|
+
* Codex availability probe is an exec, not a sync call.
|
|
63
|
+
*/
|
|
64
|
+
export declare function selectReviewer(policy: Policy, registry: Registry, env?: NodeJS.ProcessEnv, deps?: SelectorDeps): Promise<SelectionResult>;
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reviewer selector (G11.2).
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "which adversarial reviewer should run against
|
|
5
|
+
* this branch?" Downstream callers get back a reviewer handle plus two
|
|
6
|
+
* audit-friendly signals: `degraded` (is this a fallback?) and `reason`
|
|
7
|
+
* (why did we pick this one?).
|
|
8
|
+
*
|
|
9
|
+
* Precedence, high to low:
|
|
10
|
+
*
|
|
11
|
+
* 1. `REA_REVIEWER` env var — explicit operator choice wins over all policy
|
|
12
|
+
* 2. `registry.reviewer` — second-wins operator pin in `.rea/registry.yaml`
|
|
13
|
+
* 3. `policy.review.codex_required === false` — first-class no-Codex mode
|
|
14
|
+
* (G11.4 semantics). ClaudeSelfReviewer is NOT degraded here because
|
|
15
|
+
* the operator explicitly chose this lane.
|
|
16
|
+
* 4. Default: prefer Codex, fall back to ClaudeSelfReviewer with
|
|
17
|
+
* `degraded: true` if Codex is unavailable.
|
|
18
|
+
* 5. Both unavailable → throw. The push gate has an audited escape hatch
|
|
19
|
+
* (`REA_SKIP_CODEX_REVIEW`, G11.1) for when that's legitimately the
|
|
20
|
+
* operator's intent.
|
|
21
|
+
*
|
|
22
|
+
* The caller decides what to do with the result. The audit record should
|
|
23
|
+
* always capture `reviewer.name`, `reviewer.version`, `degraded`, and
|
|
24
|
+
* `reason` verbatim.
|
|
25
|
+
*/
|
|
26
|
+
import { ClaudeSelfReviewer } from './claude-self.js';
|
|
27
|
+
import { CodexReviewer } from './codex.js';
|
|
28
|
+
const defaultDeps = {
|
|
29
|
+
makeCodex: () => new CodexReviewer(),
|
|
30
|
+
makeClaudeSelf: () => new ClaudeSelfReviewer(),
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Thrown when neither Codex nor ClaudeSelfReviewer can run. Keep the
|
|
34
|
+
* message actionable — the operator should know which knobs to flip.
|
|
35
|
+
*/
|
|
36
|
+
export class NoReviewerAvailableError extends Error {
|
|
37
|
+
constructor() {
|
|
38
|
+
super('No adversarial reviewer is available: Codex CLI is unreachable AND ' +
|
|
39
|
+
'ANTHROPIC_API_KEY is unset. Either install/authenticate the Codex ' +
|
|
40
|
+
'CLI, export ANTHROPIC_API_KEY, or use the REA_SKIP_CODEX_REVIEW ' +
|
|
41
|
+
'audited escape hatch (G11.1) for this push.');
|
|
42
|
+
this.name = 'NoReviewerAvailableError';
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
function isKnownReviewer(value) {
|
|
46
|
+
return value === 'codex' || value === 'claude-self';
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Pick the reviewer for the current branch. Callers MUST await — the
|
|
50
|
+
* Codex availability probe is an exec, not a sync call.
|
|
51
|
+
*/
|
|
52
|
+
export async function selectReviewer(policy, registry, env = process.env, deps = defaultDeps) {
|
|
53
|
+
// 1. Env override — operator explicitly chose. We do NOT probe
|
|
54
|
+
// availability here; if the operator said "use X", respect it and let
|
|
55
|
+
// the reviewer's own error path surface any config problem.
|
|
56
|
+
const envChoice = env['REA_REVIEWER'];
|
|
57
|
+
if (typeof envChoice === 'string' && envChoice.length > 0) {
|
|
58
|
+
if (!isKnownReviewer(envChoice)) {
|
|
59
|
+
throw new Error(`REA_REVIEWER=${envChoice} is not a known reviewer. Valid values: codex, claude-self.`);
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
reviewer: envChoice === 'codex' ? deps.makeCodex() : deps.makeClaudeSelf(),
|
|
63
|
+
degraded: false,
|
|
64
|
+
reason: 'env:REA_REVIEWER',
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
// 2. Registry pin — same trust level as env, just written down.
|
|
68
|
+
if (registry.reviewer !== undefined) {
|
|
69
|
+
return {
|
|
70
|
+
reviewer: registry.reviewer === 'codex' ? deps.makeCodex() : deps.makeClaudeSelf(),
|
|
71
|
+
degraded: false,
|
|
72
|
+
reason: 'registry.reviewer',
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
// 3. Policy opt-in to no-Codex mode. Per G11.4, this is a first-class
|
|
76
|
+
// choice — NOT degraded. The operator has declared ClaudeSelfReviewer
|
|
77
|
+
// is good enough for this project.
|
|
78
|
+
if (policy.review?.codex_required === false) {
|
|
79
|
+
return {
|
|
80
|
+
reviewer: deps.makeClaudeSelf(),
|
|
81
|
+
degraded: false,
|
|
82
|
+
reason: 'policy.review.codex_required=false',
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// 4. Default path — try Codex first.
|
|
86
|
+
const codex = deps.makeCodex();
|
|
87
|
+
if (await codex.isAvailable()) {
|
|
88
|
+
return { reviewer: codex, degraded: false, reason: 'default:codex-available' };
|
|
89
|
+
}
|
|
90
|
+
// 5. Codex unavailable — fall back to ClaudeSelfReviewer if we can.
|
|
91
|
+
const claude = deps.makeClaudeSelf();
|
|
92
|
+
if (await claude.isAvailable()) {
|
|
93
|
+
return {
|
|
94
|
+
reviewer: claude,
|
|
95
|
+
// Crucial: in this branch the operator wanted Codex and got a
|
|
96
|
+
// same-model fallback instead. Audit must flag it.
|
|
97
|
+
degraded: true,
|
|
98
|
+
reason: 'default:codex-unavailable-fallback',
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
throw new NoReviewerAvailableError();
|
|
102
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adversarial reviewer interface — shared shape for every pluggable reviewer
|
|
3
|
+
* rea knows how to dispatch through `hooks/push-review-gate.sh`.
|
|
4
|
+
*
|
|
5
|
+
* The push gate today hard-codes Codex. G11.2 abstracts the reviewer so we
|
|
6
|
+
* have a real fallback (same-model Claude self-review) when Codex is
|
|
7
|
+
* rate-limited or otherwise unavailable, and so G11.3/G11.4/G11.5 have a
|
|
8
|
+
* stable contract to type-check against.
|
|
9
|
+
*
|
|
10
|
+
* The types live in their own file so callers (the selector, future
|
|
11
|
+
* middleware, docs tooling) can import them without dragging in the runtime
|
|
12
|
+
* adapters (`codex.ts`, `claude-self.ts`).
|
|
13
|
+
*/
|
|
14
|
+
/** Verdict returned by a reviewer after inspecting a diff. */
|
|
15
|
+
export type ReviewVerdict = 'pass' | 'concerns' | 'blocking' | 'error';
|
|
16
|
+
/**
|
|
17
|
+
* One finding surfaced by a reviewer. Intentionally loose on optional
|
|
18
|
+
* positional fields — `line` and `start_line` are mutually useful but not
|
|
19
|
+
* every reviewer surfaces both, and we accept either (or neither when the
|
|
20
|
+
* finding is whole-file).
|
|
21
|
+
*/
|
|
22
|
+
export interface ReviewFinding {
|
|
23
|
+
category: 'security' | 'correctness' | 'edge-case' | 'test-gap' | 'api-design' | 'performance';
|
|
24
|
+
severity: 'high' | 'medium' | 'low';
|
|
25
|
+
file: string;
|
|
26
|
+
line?: number;
|
|
27
|
+
start_line?: number;
|
|
28
|
+
issue: string;
|
|
29
|
+
evidence?: string;
|
|
30
|
+
suggested_fix?: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Single-reviewer result. One of these is what the push gate ultimately acts
|
|
34
|
+
* on. `degraded` is a first-class signal — `false` means the operator is
|
|
35
|
+
* getting the review they asked for; `true` means we fell back (same-model
|
|
36
|
+
* check, truncated diff, etc.) and the audit log should reflect that.
|
|
37
|
+
*/
|
|
38
|
+
export interface ReviewResult {
|
|
39
|
+
/** Short reviewer id — e.g. `codex`, `claude-self`. Matches the class `name`. */
|
|
40
|
+
reviewer_name: string;
|
|
41
|
+
/** Model / plugin version used to produce this review. */
|
|
42
|
+
reviewer_version: string;
|
|
43
|
+
verdict: ReviewVerdict;
|
|
44
|
+
findings: ReviewFinding[];
|
|
45
|
+
/** One-sentence summary. */
|
|
46
|
+
summary: string;
|
|
47
|
+
/** `true` when this reviewer is a fallback for a preferred one. */
|
|
48
|
+
degraded: boolean;
|
|
49
|
+
/** Populated when `verdict === 'error'`. Never set on a successful review. */
|
|
50
|
+
error?: string;
|
|
51
|
+
}
|
|
52
|
+
/** Input shape passed to every reviewer. */
|
|
53
|
+
export interface ReviewRequest {
|
|
54
|
+
diff: string;
|
|
55
|
+
commit_log: string;
|
|
56
|
+
branch: string;
|
|
57
|
+
head_sha: string;
|
|
58
|
+
/** Ref the branch was diffed against (e.g. `origin/main`). */
|
|
59
|
+
target: string;
|
|
60
|
+
/** Optional extra paths the reviewer may want to pull into context. */
|
|
61
|
+
context_hints?: string[];
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Implementations live in `codex.ts` and `claude-self.ts`. Future entries
|
|
65
|
+
* (e.g. a cross-model OpenAI GPT-5 reviewer) land as additional classes that
|
|
66
|
+
* conform to this interface — the selector decides which one runs.
|
|
67
|
+
*/
|
|
68
|
+
export interface AdversarialReviewer {
|
|
69
|
+
/** Short id — `codex`, `claude-self`, etc. Used in audit records. */
|
|
70
|
+
readonly name: string;
|
|
71
|
+
/** Model id or plugin version. Cached at construction. */
|
|
72
|
+
readonly version: string;
|
|
73
|
+
/**
|
|
74
|
+
* Cheap reachability check. MUST be side-effect-free beyond a bounded
|
|
75
|
+
* syscall / env read, and MUST resolve within a couple of seconds so the
|
|
76
|
+
* selector can fall back quickly.
|
|
77
|
+
*/
|
|
78
|
+
isAvailable(): Promise<boolean>;
|
|
79
|
+
/**
|
|
80
|
+
* Run the review. Implementations that delegate to an out-of-process agent
|
|
81
|
+
* (see `CodexReviewer`) may throw rather than return `ReviewResult` — the
|
|
82
|
+
* caller is expected to check the reviewer's class before dispatching.
|
|
83
|
+
*/
|
|
84
|
+
review(req: ReviewRequest): Promise<ReviewResult>;
|
|
85
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adversarial reviewer interface — shared shape for every pluggable reviewer
|
|
3
|
+
* rea knows how to dispatch through `hooks/push-review-gate.sh`.
|
|
4
|
+
*
|
|
5
|
+
* The push gate today hard-codes Codex. G11.2 abstracts the reviewer so we
|
|
6
|
+
* have a real fallback (same-model Claude self-review) when Codex is
|
|
7
|
+
* rate-limited or otherwise unavailable, and so G11.3/G11.4/G11.5 have a
|
|
8
|
+
* stable contract to type-check against.
|
|
9
|
+
*
|
|
10
|
+
* The types live in their own file so callers (the selector, future
|
|
11
|
+
* middleware, docs tooling) can import them without dragging in the runtime
|
|
12
|
+
* adapters (`codex.ts`, `claude-self.ts`).
|
|
13
|
+
*/
|
|
14
|
+
export {};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Upstream MCP server for `rea serve`.
|
|
3
|
+
*
|
|
4
|
+
* Architecture:
|
|
5
|
+
*
|
|
6
|
+
* Claude Code (MCP client over stdio)
|
|
7
|
+
* ↔ this Server (StdioServerTransport)
|
|
8
|
+
* ↔ middleware chain
|
|
9
|
+
* ↔ DownstreamPool
|
|
10
|
+
* ↔ per-server StdioClientTransport
|
|
11
|
+
* ↔ child MCP processes
|
|
12
|
+
*
|
|
13
|
+
* Every downstream tool call flows through the full middleware chain:
|
|
14
|
+
*
|
|
15
|
+
* audit → kill-switch → tier → policy → blocked-paths → rate-limit →
|
|
16
|
+
* circuit-breaker → injection → redact → result-size-cap → terminal
|
|
17
|
+
*
|
|
18
|
+
* The terminal middleware is a thin closure that dispatches to the pool and
|
|
19
|
+
* stores the response on `ctx.result`.
|
|
20
|
+
*
|
|
21
|
+
* Shutdown discipline: SIGTERM / SIGINT → stop accepting new calls, drain
|
|
22
|
+
* in-flight work, close the pool, exit 0. No orphaned child processes.
|
|
23
|
+
*
|
|
24
|
+
* ## Zero-server mode
|
|
25
|
+
*
|
|
26
|
+
* A gateway with zero downstream servers is a valid state — it means the
|
|
27
|
+
* consumer just ran `rea init` and has not yet populated `.rea/registry.yaml`.
|
|
28
|
+
* We boot normally, respond to `listTools` with an empty catalog, and log
|
|
29
|
+
* a pointer. Do not crash — breaking the daemon on an empty registry would
|
|
30
|
+
* turn first-run into a puzzle.
|
|
31
|
+
*/
|
|
32
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
33
|
+
import { DownstreamPool } from './downstream-pool.js';
|
|
34
|
+
import type { Registry } from '../registry/types.js';
|
|
35
|
+
import type { Policy } from '../policy/types.js';
|
|
36
|
+
export interface GatewayOptions {
|
|
37
|
+
baseDir: string;
|
|
38
|
+
policy: Policy;
|
|
39
|
+
registry: Registry;
|
|
40
|
+
}
|
|
41
|
+
export interface GatewayHandle {
|
|
42
|
+
/** Expose the Server for test harnesses that attach InMemoryTransport. */
|
|
43
|
+
server: Server;
|
|
44
|
+
/** Connect the Server to the provided transport (defaults to stdio). */
|
|
45
|
+
start(transport?: unknown): Promise<void>;
|
|
46
|
+
/** Graceful shutdown — drain in-flight, close pool, close server. */
|
|
47
|
+
stop(): Promise<void>;
|
|
48
|
+
/** Exposed for tests. */
|
|
49
|
+
pool: DownstreamPool;
|
|
50
|
+
}
|
|
51
|
+
export declare function createGateway(opts: GatewayOptions): GatewayHandle;
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Upstream MCP server for `rea serve`.
|
|
3
|
+
*
|
|
4
|
+
* Architecture:
|
|
5
|
+
*
|
|
6
|
+
* Claude Code (MCP client over stdio)
|
|
7
|
+
* ↔ this Server (StdioServerTransport)
|
|
8
|
+
* ↔ middleware chain
|
|
9
|
+
* ↔ DownstreamPool
|
|
10
|
+
* ↔ per-server StdioClientTransport
|
|
11
|
+
* ↔ child MCP processes
|
|
12
|
+
*
|
|
13
|
+
* Every downstream tool call flows through the full middleware chain:
|
|
14
|
+
*
|
|
15
|
+
* audit → kill-switch → tier → policy → blocked-paths → rate-limit →
|
|
16
|
+
* circuit-breaker → injection → redact → result-size-cap → terminal
|
|
17
|
+
*
|
|
18
|
+
* The terminal middleware is a thin closure that dispatches to the pool and
|
|
19
|
+
* stores the response on `ctx.result`.
|
|
20
|
+
*
|
|
21
|
+
* Shutdown discipline: SIGTERM / SIGINT → stop accepting new calls, drain
|
|
22
|
+
* in-flight work, close the pool, exit 0. No orphaned child processes.
|
|
23
|
+
*
|
|
24
|
+
* ## Zero-server mode
|
|
25
|
+
*
|
|
26
|
+
* A gateway with zero downstream servers is a valid state — it means the
|
|
27
|
+
* consumer just ran `rea init` and has not yet populated `.rea/registry.yaml`.
|
|
28
|
+
* We boot normally, respond to `listTools` with an empty catalog, and log
|
|
29
|
+
* a pointer. Do not crash — breaking the daemon on an empty registry would
|
|
30
|
+
* turn first-run into a puzzle.
|
|
31
|
+
*/
|
|
32
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
33
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
34
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
35
|
+
import { DownstreamPool, splitPrefixed } from './downstream-pool.js';
|
|
36
|
+
import { createAuditMiddleware } from './middleware/audit.js';
|
|
37
|
+
import { createKillSwitchMiddleware } from './middleware/kill-switch.js';
|
|
38
|
+
import { createTierMiddleware } from './middleware/tier.js';
|
|
39
|
+
import { createPolicyMiddleware } from './middleware/policy.js';
|
|
40
|
+
import { createBlockedPathsMiddleware } from './middleware/blocked-paths.js';
|
|
41
|
+
import { createRateLimitMiddleware } from './middleware/rate-limit.js';
|
|
42
|
+
import { createCircuitBreakerMiddleware } from './middleware/circuit-breaker.js';
|
|
43
|
+
import { createInjectionMiddleware } from './middleware/injection.js';
|
|
44
|
+
import { createRedactMiddleware, } from './middleware/redact.js';
|
|
45
|
+
import { wrapRegex } from './redact-safe/match-timeout.js';
|
|
46
|
+
import { createResultSizeCapMiddleware } from './middleware/result-size-cap.js';
|
|
47
|
+
import { executeChain } from './middleware/chain.js';
|
|
48
|
+
import { RateLimiter } from './rate-limiter.js';
|
|
49
|
+
import { CircuitBreaker } from './circuit-breaker.js';
|
|
50
|
+
import { currentSessionId } from './session.js';
|
|
51
|
+
import { InvocationStatus, Tier } from '../policy/types.js';
|
|
52
|
+
import { log } from '../cli/utils.js';
|
|
53
|
+
/**
|
|
54
|
+
* Build the ordered middleware chain used on every CallToolRequest.
|
|
55
|
+
* Order is prescriptive — DO NOT reorder without reading THREAT_MODEL.md §
|
|
56
|
+
* "Middleware ordering". The existing unit tests in
|
|
57
|
+
* `src/gateway/middleware/chain.test.ts` encode the semantic contract.
|
|
58
|
+
*/
|
|
59
|
+
/**
|
|
60
|
+
* G3: compile user-supplied redact patterns (already safe-regex-cleared by
|
|
61
|
+
* the policy loader) into `SafeRegex` instances with the configured timeout.
|
|
62
|
+
* The loader guarantees the regex source compiles, so we only catch errors
|
|
63
|
+
* defensively.
|
|
64
|
+
*/
|
|
65
|
+
function compileUserRedactPatterns(policy, matchTimeoutMs) {
|
|
66
|
+
const entries = policy.redact?.patterns ?? [];
|
|
67
|
+
const out = [];
|
|
68
|
+
for (const entry of entries) {
|
|
69
|
+
try {
|
|
70
|
+
const compiled = new RegExp(entry.regex, entry.flags);
|
|
71
|
+
out.push({
|
|
72
|
+
name: entry.name,
|
|
73
|
+
source: 'user',
|
|
74
|
+
safe: wrapRegex(compiled, { timeoutMs: matchTimeoutMs }),
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
// Loader already validated these — warn and drop if an unreachable
|
|
79
|
+
// corner case ever slips through.
|
|
80
|
+
log(`[rea] WARN: skipping malformed user redact pattern "${entry.name}": ${err instanceof Error ? err.message : String(err)}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return out;
|
|
84
|
+
}
|
|
85
|
+
function buildMiddlewareChain(opts) {
|
|
86
|
+
const { baseDir, policy } = opts;
|
|
87
|
+
const matchTimeoutMs = policy.redact?.match_timeout_ms ?? 100;
|
|
88
|
+
const userPatterns = compileUserRedactPatterns(policy, matchTimeoutMs);
|
|
89
|
+
return [
|
|
90
|
+
createAuditMiddleware(baseDir, policy),
|
|
91
|
+
createKillSwitchMiddleware(baseDir),
|
|
92
|
+
createTierMiddleware(),
|
|
93
|
+
createPolicyMiddleware(policy, undefined, baseDir),
|
|
94
|
+
createBlockedPathsMiddleware(policy, baseDir),
|
|
95
|
+
createRateLimitMiddleware(new RateLimiter()),
|
|
96
|
+
createCircuitBreakerMiddleware(new CircuitBreaker()),
|
|
97
|
+
createInjectionMiddleware(policy.injection_detection === 'warn' ? 'warn' : 'block', {
|
|
98
|
+
matchTimeoutMs,
|
|
99
|
+
}),
|
|
100
|
+
createRedactMiddleware({ matchTimeoutMs, userPatterns }),
|
|
101
|
+
createResultSizeCapMiddleware(),
|
|
102
|
+
];
|
|
103
|
+
}
|
|
104
|
+
export function createGateway(opts) {
|
|
105
|
+
const { registry } = opts;
|
|
106
|
+
const pool = new DownstreamPool(registry);
|
|
107
|
+
const server = new Server({ name: 'rea', version: '0.2.0' }, { capabilities: { tools: {} } });
|
|
108
|
+
const staticChain = buildMiddlewareChain(opts);
|
|
109
|
+
// ── Handlers ─────────────────────────────────────────────────────────────
|
|
110
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
111
|
+
if (pool.size === 0)
|
|
112
|
+
return { tools: [] };
|
|
113
|
+
const prefixed = await pool.listAllTools();
|
|
114
|
+
return {
|
|
115
|
+
tools: prefixed.map((t) => ({
|
|
116
|
+
name: t.name,
|
|
117
|
+
description: t.description ?? `${t.server} → ${t.name.slice(t.server.length + 2)}`,
|
|
118
|
+
inputSchema: t.inputSchema ?? { type: 'object' },
|
|
119
|
+
})),
|
|
120
|
+
};
|
|
121
|
+
});
|
|
122
|
+
server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
123
|
+
const prefixed = req.params.name;
|
|
124
|
+
const args = (req.params.arguments ?? {});
|
|
125
|
+
// Split prefix for downstream dispatch; the terminal middleware uses the
|
|
126
|
+
// full prefixed name to call the pool (which re-splits internally).
|
|
127
|
+
let serverName;
|
|
128
|
+
let toolName;
|
|
129
|
+
try {
|
|
130
|
+
const split = splitPrefixed(prefixed);
|
|
131
|
+
serverName = split.server;
|
|
132
|
+
toolName = split.tool;
|
|
133
|
+
}
|
|
134
|
+
catch (err) {
|
|
135
|
+
return {
|
|
136
|
+
isError: true,
|
|
137
|
+
content: [
|
|
138
|
+
{
|
|
139
|
+
type: 'text',
|
|
140
|
+
text: err instanceof Error ? err.message : String(err),
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
const ctx = {
|
|
146
|
+
tool_name: toolName,
|
|
147
|
+
server_name: serverName,
|
|
148
|
+
arguments: args,
|
|
149
|
+
session_id: currentSessionId(),
|
|
150
|
+
status: InvocationStatus.Allowed,
|
|
151
|
+
start_time: Date.now(),
|
|
152
|
+
metadata: {},
|
|
153
|
+
};
|
|
154
|
+
const terminal = async (context) => {
|
|
155
|
+
if (context.status !== InvocationStatus.Allowed)
|
|
156
|
+
return;
|
|
157
|
+
if (pool.size === 0) {
|
|
158
|
+
context.status = InvocationStatus.Denied;
|
|
159
|
+
context.error = 'No downstream servers in .rea/registry.yaml — add one to enable proxying';
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
try {
|
|
163
|
+
context.result = await pool.callTool(prefixed, context.arguments);
|
|
164
|
+
}
|
|
165
|
+
catch (err) {
|
|
166
|
+
context.status = InvocationStatus.Error;
|
|
167
|
+
context.error = err instanceof Error ? err.message : String(err);
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
try {
|
|
171
|
+
await executeChain([...staticChain, terminal], ctx);
|
|
172
|
+
}
|
|
173
|
+
catch (err) {
|
|
174
|
+
// executeChain will have run the audit middleware's try/finally; any
|
|
175
|
+
// error that escapes is bubbled here. Convert to an isError response.
|
|
176
|
+
ctx.status = InvocationStatus.Error;
|
|
177
|
+
ctx.error = err instanceof Error ? err.message : String(err);
|
|
178
|
+
}
|
|
179
|
+
// ── Response mapping ──────────────────────────────────────────────────
|
|
180
|
+
if (ctx.status === InvocationStatus.Denied) {
|
|
181
|
+
return {
|
|
182
|
+
isError: true,
|
|
183
|
+
content: [
|
|
184
|
+
{
|
|
185
|
+
type: 'text',
|
|
186
|
+
text: ctx.error ?? 'denied',
|
|
187
|
+
},
|
|
188
|
+
],
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
if (ctx.status === InvocationStatus.Error) {
|
|
192
|
+
return {
|
|
193
|
+
isError: true,
|
|
194
|
+
content: [
|
|
195
|
+
{
|
|
196
|
+
type: 'text',
|
|
197
|
+
text: ctx.error ?? 'error',
|
|
198
|
+
},
|
|
199
|
+
],
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
// Allowed — return the downstream's raw result. Most MCP servers return
|
|
203
|
+
// a `{ content: [...] }` object already; if not, wrap in a text content.
|
|
204
|
+
if (ctx.result !== null &&
|
|
205
|
+
typeof ctx.result === 'object' &&
|
|
206
|
+
'content' in ctx.result) {
|
|
207
|
+
return ctx.result;
|
|
208
|
+
}
|
|
209
|
+
return {
|
|
210
|
+
content: [
|
|
211
|
+
{
|
|
212
|
+
type: 'text',
|
|
213
|
+
text: typeof ctx.result === 'string' ? ctx.result : JSON.stringify(ctx.result),
|
|
214
|
+
},
|
|
215
|
+
],
|
|
216
|
+
};
|
|
217
|
+
});
|
|
218
|
+
let started = false;
|
|
219
|
+
let stopping = false;
|
|
220
|
+
async function start(transport) {
|
|
221
|
+
if (started)
|
|
222
|
+
return;
|
|
223
|
+
started = true;
|
|
224
|
+
// Connect to downstream children first so the `listTools` catalog is ready
|
|
225
|
+
// by the time the upstream client connects.
|
|
226
|
+
if (pool.size === 0) {
|
|
227
|
+
log('rea serve: no downstream servers in .rea/registry.yaml — running in no-op mode. Add servers to enable proxying.');
|
|
228
|
+
}
|
|
229
|
+
else {
|
|
230
|
+
try {
|
|
231
|
+
await pool.connectAll();
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
log(`rea serve: downstream connect error: ${err instanceof Error ? err.message : err}`);
|
|
235
|
+
// Continue — individual connections may still be healthy.
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
const activeTransport = transport ?? new StdioServerTransport();
|
|
239
|
+
await server.connect(activeTransport);
|
|
240
|
+
}
|
|
241
|
+
async function stop() {
|
|
242
|
+
if (stopping)
|
|
243
|
+
return;
|
|
244
|
+
stopping = true;
|
|
245
|
+
try {
|
|
246
|
+
await server.close();
|
|
247
|
+
}
|
|
248
|
+
catch {
|
|
249
|
+
// Best-effort — may already be closed.
|
|
250
|
+
}
|
|
251
|
+
await pool.close();
|
|
252
|
+
}
|
|
253
|
+
return { server, start, stop, pool };
|
|
254
|
+
}
|
|
255
|
+
// Prevent TS from complaining about the unused `Tier` import when the file is
|
|
256
|
+
// compiled in isolation; keeping the import pins the semantic dependency edge
|
|
257
|
+
// for future middleware that may want to inspect the tier in terminal.
|
|
258
|
+
void Tier;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-process session identifier. One `rea serve` invocation = one session_id.
|
|
3
|
+
* Matches how Claude Code's long-running sessions work today. Revisit when we
|
|
4
|
+
* add a streamable-HTTP transport that might serve multiple reconnecting
|
|
5
|
+
* clients.
|
|
6
|
+
*/
|
|
7
|
+
export declare function currentSessionId(): string;
|
|
8
|
+
/** Exposed for tests only — resets the module-level id. */
|
|
9
|
+
export declare function __resetSessionForTests(): void;
|