@quantiya/codevibe-core 1.0.18 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/appsync/appsync-client.d.ts +16 -84
  2. package/dist/appsync/queries.d.ts +2 -8
  3. package/dist/audit-keys/__tests__/audit-keys-parity.test.d.ts +1 -0
  4. package/dist/audit-keys/index.d.ts +41 -0
  5. package/dist/auth/auth-telemetry.d.ts +0 -9
  6. package/dist/index.d.ts +4 -0
  7. package/dist/index.js +72 -45
  8. package/dist/orchestration/detect-agents.d.ts +56 -0
  9. package/dist/orchestration/index.d.ts +2 -0
  10. package/dist/orchestration/orchestration-cli.d.ts +9 -0
  11. package/dist/reviewer/__tests__/integration.test.d.ts +1 -0
  12. package/dist/reviewer/__tests__/mocks.test.d.ts +1 -0
  13. package/dist/reviewer/__tests__/output-parser.test.d.ts +1 -0
  14. package/dist/reviewer/__tests__/registry.test.d.ts +1 -0
  15. package/dist/reviewer/__tests__/subprocess.test.d.ts +1 -0
  16. package/dist/reviewer/index.d.ts +15 -0
  17. package/dist/reviewer/mocks.d.ts +80 -0
  18. package/dist/reviewer/output-parser.d.ts +95 -0
  19. package/dist/reviewer/provider.d.ts +153 -0
  20. package/dist/reviewer/providers/__tests__/claude-live-smoke.test.d.ts +1 -0
  21. package/dist/reviewer/providers/__tests__/claude.test.d.ts +1 -0
  22. package/dist/reviewer/providers/__tests__/codex-live-smoke.test.d.ts +1 -0
  23. package/dist/reviewer/providers/__tests__/codex.test.d.ts +1 -0
  24. package/dist/reviewer/providers/__tests__/gemini-live-smoke.test.d.ts +1 -0
  25. package/dist/reviewer/providers/__tests__/gemini.test.d.ts +1 -0
  26. package/dist/reviewer/providers/claude.d.ts +59 -0
  27. package/dist/reviewer/providers/codex.d.ts +67 -0
  28. package/dist/reviewer/providers/common.d.ts +25 -0
  29. package/dist/reviewer/providers/gemini.d.ts +108 -0
  30. package/dist/reviewer/registry.d.ts +87 -0
  31. package/dist/reviewer/subprocess.d.ts +117 -0
  32. package/dist/reviewer/types.d.ts +101 -0
  33. package/dist/types/index.d.ts +1 -0
  34. package/dist/types/reviewer.d.ts +67 -0
  35. package/dist/types/session.d.ts +16 -0
  36. package/package.json +6 -3
@@ -0,0 +1,15 @@
1
+ export type { AgentKind, ReviewerRole, ReviewerVerdict, VerdictId, VerdictKind, } from './types.js';
2
+ export type { ReviewerError, ReviewerProvider, ReviewerSpec, } from './provider.js';
3
+ export { ReviewerErrorClass } from './provider.js';
4
+ export type { ParseResult, ParsedVerdict, VerdictParseError, } from './output-parser.js';
5
+ export { parseVerdictOutput, VerdictParseErrorClass } from './output-parser.js';
6
+ export type { RunReviewerOptions, SubprocessError, SubprocessOutcome, } from './subprocess.js';
7
+ export { runReviewer, SubprocessErrorClass } from './subprocess.js';
8
+ export type { ClaudeReviewerProviderOptions } from './providers/claude.js';
9
+ export { ClaudeReviewerProvider } from './providers/claude.js';
10
+ export type { GeminiEnvelope, GeminiModelStats, GeminiReviewerProviderOptions, GeminiStats, } from './providers/gemini.js';
11
+ export { GeminiReviewerProvider } from './providers/gemini.js';
12
+ export type { CodexReviewerProviderOptions } from './providers/codex.js';
13
+ export { CodexReviewerProvider } from './providers/codex.js';
14
+ export { ReviewerRegistry, createSubprocessReviewerRegistry, } from './registry.js';
15
+ export { MockReviewerSpawner, StaticReviewerMock } from './mocks.js';
@@ -0,0 +1,80 @@
1
+ import { type ReviewerProvider, type ReviewerSpec } from './provider.js';
2
+ import type { AgentKind, ReviewerVerdict, VerdictKind } from './types.js';
3
+ import { type ReviewerError } from './provider.js';
4
+ /**
5
+ * Scripted `ReviewerProvider` for tests. Each `evaluate(spec, gateId)`
6
+ * pops the next response from the FIFO queue keyed by
7
+ * `(spec.agent, gateId)`. If no script remains, returns a SpawnFailed
8
+ * error with a diagnostic so test setup bugs surface loudly.
9
+ *
10
+ * Use the `script_*` methods to queue responses before tests call
11
+ * `evaluate`.
12
+ */
13
+ export declare class MockReviewerSpawner implements ReviewerProvider {
14
+ private readonly scripts;
15
+ private static key;
16
+ /**
17
+ * Script a verdict (with empty `suggested_changes`) for the next
18
+ * `evaluate(agent, gateId)` call. Use `scriptVerdictWithChanges`
19
+ * when `kind === 'REVISE'` (the parser-locked invariant requires
20
+ * non-empty changes for REVISE).
21
+ */
22
+ scriptVerdict(agent: AgentKind, gateId: string, kind: VerdictKind, reasoning: string): void;
23
+ /**
24
+ * Script a verdict with explicit `suggested_changes`. Required when
25
+ * `kind === 'REVISE'`.
26
+ */
27
+ scriptVerdictWithChanges(agent: AgentKind, gateId: string, kind: VerdictKind, reasoning: string, suggested_changes: string[]): void;
28
+ /** Script an error to return on the next `evaluate(agent, gateId)` call. */
29
+ scriptError(agent: AgentKind, gateId: string, error: ReviewerError): void;
30
+ /**
31
+ * How many scripted responses remain for the given key. Useful for
32
+ * test post-conditions ("all scripts were consumed").
33
+ */
34
+ remaining(agent: AgentKind, gateId: string): number;
35
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
36
+ }
37
+ /**
38
+ * Stateless `ReviewerProvider` for engine-level integration tests and
39
+ * smoke tests. Ignores `gateId`; returns a fixed verdict per agent
40
+ * (or a global default).
41
+ *
42
+ * Construct via static factories:
43
+ * - `StaticReviewerMock.allApprove()` / `.allReject()` / `.allRevise(changes)`
44
+ * / `.allEscalate()` — global default, every agent returns the same.
45
+ * - `StaticReviewerMock.allError(err)` — every agent returns the same error.
46
+ *
47
+ * Stack per-agent overrides on top:
48
+ * - `.withAgentVerdict(agent, kind)` — override one agent's verdict.
49
+ * - `.withAgentError(agent, err)` — override one agent's error.
50
+ *
51
+ * Stacking models mixed-verdict scenarios: e.g.
52
+ * `StaticReviewerMock.allApprove().withAgentVerdict('gemini', 'REJECT')`
53
+ * → Claude approves + Gemini rejects + Codex approves → engine escalates.
54
+ */
55
+ export declare class StaticReviewerMock implements ReviewerProvider {
56
+ private defaultResponse;
57
+ private readonly perAgent;
58
+ /** Empty mock. Returns SpawnFailed with a diagnostic on every
59
+ * `evaluate` call until a default or per-agent override is set. */
60
+ static new(): StaticReviewerMock;
61
+ /** All reviewers return APPROVE. */
62
+ static allApprove(): StaticReviewerMock;
63
+ /** All reviewers return REJECT. */
64
+ static allReject(): StaticReviewerMock;
65
+ /** All reviewers return REVISE with the given suggested changes.
66
+ * Empty `changes` falls back to a placeholder. */
67
+ static allRevise(changes: string[]): StaticReviewerMock;
68
+ /** All reviewers return ESCALATE. */
69
+ static allEscalate(): StaticReviewerMock;
70
+ /** All reviewers return the given error. */
71
+ static allError(err: ReviewerError): StaticReviewerMock;
72
+ /**
73
+ * Override the response for one agent. Stacks on top of whatever
74
+ * default was configured. Returns `this` for fluent chaining.
75
+ */
76
+ withAgentVerdict(agent: AgentKind, kind: VerdictKind): this;
77
+ /** Override to return an error for one specific agent. */
78
+ withAgentError(agent: AgentKind, err: ReviewerError): this;
79
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
80
+ }
@@ -0,0 +1,95 @@
1
+ import type { VerdictKind } from './types.js';
2
+ /**
3
+ * Successfully parsed reviewer reply. The engine's fan-out layer lifts
4
+ * these fields into a `ReviewerVerdict` alongside telemetry (agent, tokens,
5
+ * latency, model) supplied by the subprocess layer.
6
+ */
7
+ export interface ParsedVerdict {
8
+ /** The verdict the reviewer returned. */
9
+ kind: VerdictKind;
10
+ /**
11
+ * Free-form reasoning text (joined with blank-line paragraph separators
12
+ * preserved). Never empty by the time we reach here — reviewers that
13
+ * return only a verdict line fail parsing via `reasoning_missing`.
14
+ */
15
+ reasoning: string;
16
+ /**
17
+ * Ordered list of specific changes. Always non-empty when
18
+ * `kind === 'REVISE'`; always empty otherwise (enforced by the parser
19
+ * via `revise_missing_changes` / `suggested_changes_require_revise`).
20
+ */
21
+ suggested_changes: string[];
22
+ }
23
+ /**
24
+ * Why a reviewer's reply failed the locked-format check. Discriminated
25
+ * union; the subprocess layer maps any of these into `ReviewerError` with
26
+ * `kind: 'parse_failure'` and the raw output attached.
27
+ *
28
+ * Mirrors Rust's `VerdictParseError` enum 1:1.
29
+ */
30
+ export type VerdictParseError = {
31
+ /** The entire reply was whitespace or empty. A reviewer that wrote
32
+ * nothing has effectively timed out. */
33
+ kind: 'empty_output';
34
+ } | {
35
+ /**
36
+ * The first non-blank line did not start with one of the four
37
+ * verdict keywords (case-insensitive). Includes any leading markdown
38
+ * formatting, quotation, or `VERDICT:` prefix that makes the line
39
+ * diverge from the locked contract.
40
+ *
41
+ * Also fired when a non-blank, non-indented line of prose appears
42
+ * after the bullet section starts (the format reserves post-bullet
43
+ * lines for blank lines or indented continuations only).
44
+ */
45
+ kind: 'invalid_verdict';
46
+ /** The offending line, trimmed but otherwise verbatim. */
47
+ found: string;
48
+ } | {
49
+ /** Verdict was parsed but no reasoning followed. The locked contract
50
+ * requires reasoning text so the audit record and user-facing
51
+ * disagreement UI have something to show; a bare APPROVE / REJECT
52
+ * line is a parse failure. */
53
+ kind: 'reasoning_missing';
54
+ } | {
55
+ /** Verdict was REVISE but no bulleted suggested-changes list followed.
56
+ * Design-locked: REVISE requires at least one concrete change. */
57
+ kind: 'revise_missing_changes';
58
+ } | {
59
+ /** A non-REVISE verdict was followed by a bulleted list, which the
60
+ * locked contract reserves for REVISE only. */
61
+ kind: 'suggested_changes_require_revise';
62
+ /** The non-REVISE verdict that erroneously had bullets. */
63
+ found: VerdictKind;
64
+ };
65
+ /**
66
+ * Class wrapper around a `VerdictParseError` so callers can `throw`/`catch`
67
+ * structured errors via JS idioms. `.detail` carries the discriminated
68
+ * union; `Error.message` is a human-readable formatting.
69
+ */
70
+ export declare class VerdictParseErrorClass extends Error {
71
+ readonly detail: VerdictParseError;
72
+ constructor(detail: VerdictParseError);
73
+ }
74
+ /**
75
+ * Result type for `parseVerdictOutput`. Discriminated by `ok`. Mirrors
76
+ * Rust's `Result<ParsedVerdict, VerdictParseError>` without forcing TS
77
+ * callers to `try/catch` — most call sites want to inspect failure types
78
+ * directly to feed the audit log.
79
+ */
80
+ export type ParseResult = {
81
+ ok: true;
82
+ verdict: ParsedVerdict;
83
+ } | {
84
+ ok: false;
85
+ error: VerdictParseError;
86
+ };
87
+ /**
88
+ * Parse a reviewer reply. Strict: any deviation from the locked format
89
+ * returns `{ ok: false, error: ... }` which the subprocess layer routes to
90
+ * a parse-failure `ReviewerError`.
91
+ *
92
+ * Mirrors Rust's `parse_verdict_output` byte-for-byte. Test coverage
93
+ * matches the Rust unit-test set verbatim.
94
+ */
95
+ export declare function parseVerdictOutput(raw: string): ParseResult;
@@ -0,0 +1,153 @@
1
+ import type { AgentKind, ReviewerRole, ReviewerVerdict } from './types.js';
2
+ /**
3
+ * Spec for spawning one reviewer at one gate. Constructed by the engine
4
+ * from `ReviewerAgentSpec` (from `PolicySnapshot`) + the context bundle for
5
+ * the gate.
6
+ *
7
+ * # Identity
8
+ *
9
+ * Per the 2026-04-23 seat/role pivot, the spec's primary identity is
10
+ * `seat_id` — position in the review panel — NOT `agent`. `agent` may
11
+ * repeat across seats within a single gate (single-vendor case: two Claude
12
+ * seats with different roles). Downstream consensus + verdict validation
13
+ * dedup on `seat_id`, never on `agent`. Providers echo `seat_id` + `role`
14
+ * back on the returned `ReviewerVerdict` so the audit trail can attribute
15
+ * verdicts by lens rather than by agent kind.
16
+ */
17
+ export interface ReviewerSpec {
18
+ /**
19
+ * Position in the review panel, 0-indexed. For an N-seat policy this is
20
+ * in `0..N`. The primary identity key for this reviewer across the
21
+ * engine, audit log, and FFI — providers MUST echo this value back on
22
+ * the produced `ReviewerVerdict.seat_id`.
23
+ */
24
+ seat_id: number;
25
+ /**
26
+ * The lens this seat reviews through — Architecture / Correctness /
27
+ * Security for code; Accuracy / Clarity / Completeness for docs;
28
+ * composites for mixed. Drives the role-specific prompt prefix and is
29
+ * echoed back on the verdict for audit attribution. Unique within a
30
+ * policy (a duplicate role defeats the orthogonality purpose).
31
+ */
32
+ role: ReviewerRole;
33
+ /**
34
+ * Which agent to spawn. MAY repeat across seats when roles differ — the
35
+ * single-vendor case the 2026-04-23 pivot enables.
36
+ */
37
+ agent: AgentKind;
38
+ /**
39
+ * Tool names the reviewer is allowed to invoke. Typically
40
+ * `["Read", "Grep", "Glob"]` per the read-only reviewer invariant. Shape
41
+ * is agent-agnostic here; each provider enforces the sandbox in its own
42
+ * way:
43
+ * - Claude: `--allowed-tools Read,Grep,Glob` CLI flag.
44
+ * - Gemini: `--approval-mode plan` (Gemini's first-class read-only mode).
45
+ * The `tool_allowlist` is intentionally unused by the Gemini provider;
46
+ * plan mode is the sandbox contract. 2.0.x task #62 tracks adding
47
+ * `--policy <tempfile>` as defense-in-depth.
48
+ * - Codex: `--sandbox read-only --skip-git-repo-check` flags + per-spawn
49
+ * ephemeral output file.
50
+ */
51
+ tool_allowlist: string[];
52
+ /**
53
+ * Pre-rendered reviewer prompt. Engine renders per artifact type
54
+ * (code / docs / mixed) with a role-specific prefix prepended before
55
+ * calling `evaluate`.
56
+ */
57
+ prompt_template: string;
58
+ /**
59
+ * Wall-clock timeout for one reviewer's verdict. After this elapses the
60
+ * provider should throw `ReviewerError` with `kind: 'timeout'` and
61
+ * cancel the underlying process.
62
+ */
63
+ timeout_ms: number;
64
+ /**
65
+ * Optional per-reviewer model preference. `null` defers to the agent's
66
+ * CLI default.
67
+ */
68
+ model_hint: string | null;
69
+ }
70
+ /**
71
+ * Typed error thrown by `ReviewerProvider.evaluate`. Discriminated union
72
+ * matching Rust's `#[serde(tag = "kind", rename_all = "snake_case")]` enum.
73
+ *
74
+ * The engine's consensus path treats any thrown error as equivalent to a
75
+ * `VerdictKind::Escalate` for routing (so safety-defaults-to-escalation
76
+ * holds), but the distinct variants matter for the audit log and the
77
+ * user-facing error message.
78
+ */
79
+ export type ReviewerError = {
80
+ /** Reviewer exceeded its `timeout_ms` budget. */
81
+ kind: 'timeout';
82
+ /** Which agent timed out (carried so the audit entry can attribute
83
+ * cost / reliability back to the specific agent). */
84
+ agent: AgentKind;
85
+ /** Wall-clock ms elapsed before timeout fired. */
86
+ elapsed_ms: number;
87
+ } | {
88
+ /** Reviewer process could not be launched (CLI missing, spawn syscall
89
+ * failed, etc.). */
90
+ kind: 'spawn_failed';
91
+ agent: AgentKind;
92
+ /** Human-readable cause. */
93
+ reason: string;
94
+ } | {
95
+ /** Reviewer returned but its output couldn't be parsed into a valid
96
+ * verdict. Raw output is preserved for the audit log. */
97
+ kind: 'parse_failure';
98
+ agent: AgentKind;
99
+ /** Raw output (truncated to a reasonable length by the caller if
100
+ * needed). */
101
+ raw_output: string;
102
+ } | {
103
+ /** Reviewer was cancelled by the engine before completing (e.g.,
104
+ * user abort, a sibling reviewer already hard-rejected). No
105
+ * per-agent attribution because cancellation can fire on any
106
+ * reviewer in flight. */
107
+ kind: 'cancelled';
108
+ } | {
109
+ /** A reviewer's task panicked or was aborted before returning a
110
+ * verdict. Mirror of the Rust `InternalJoinFailure` variant. The
111
+ * variant deliberately has no `agent` field — attributing a panic
112
+ * to a specific agent would be a telemetry lie. */
113
+ kind: 'internal_join_failure';
114
+ reason: string;
115
+ };
116
+ /**
117
+ * Class wrapper around a `ReviewerError` so callers can `throw` typed
118
+ * errors and `try { ... } catch (e) { if (e instanceof ReviewerErrorClass) {
119
+ * /* type-narrow on e.detail.kind *\/ } }` from JS-idiom code paths.
120
+ *
121
+ * The `.detail` property carries the discriminated union; `Error.message`
122
+ * is a human-readable formatting matching Rust's `#[error(...)]` strings.
123
+ */
124
+ export declare class ReviewerErrorClass extends Error {
125
+ /** The structured error detail. Use `.kind` to narrow. */
126
+ readonly detail: ReviewerError;
127
+ constructor(detail: ReviewerError);
128
+ }
129
+ /**
130
+ * Engine's contract with reviewer implementations.
131
+ *
132
+ * **Engine owns lifecycle:** implementations must spawn, await verdict,
133
+ * enforce timeout, cancel, and capture logs internally. The engine calls
134
+ * `evaluate` and awaits — it never holds a handle that could outlive the
135
+ * call.
136
+ *
137
+ * Errors are thrown as `ReviewerErrorClass` instances; callers narrow via
138
+ * the `.detail.kind` discriminator.
139
+ */
140
+ export interface ReviewerProvider {
141
+ /**
142
+ * Spawn one reviewer per the spec, collect its verdict, enforce timeout.
143
+ *
144
+ * @param spec - the reviewer to spawn (seat_id + role + agent + prompt + timeout)
145
+ * @param gateId - the `ReviewGate` UUID this verdict attaches to. Stored
146
+ * on the returned `ReviewerVerdict.gate_id`.
147
+ * @returns the parsed `ReviewerVerdict` on success.
148
+ * @throws `ReviewerErrorClass` on timeout, spawn failure, parse failure,
149
+ * cancellation, or internal join failure. Use `e.detail.kind` to
150
+ * narrow.
151
+ */
152
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
153
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,59 @@
1
+ import { type ReviewerProvider, type ReviewerSpec } from '../provider.js';
2
+ import { type SubprocessOutcome } from '../subprocess.js';
3
+ import type { ReviewerVerdict } from '../types.js';
4
+ /**
5
+ * Built command shape — split out so tests can inspect args / env without
6
+ * spawning a process. `runReviewer` consumes this shape directly.
7
+ */
8
+ export interface BuiltCommand {
9
+ command: string;
10
+ args: string[];
11
+ env: NodeJS.ProcessEnv;
12
+ }
13
+ /** Construction options. */
14
+ export interface ClaudeReviewerProviderOptions {
15
+ /** Override the `claude` executable path. Production callers pass nothing
16
+ * (PATH lookup); integration tests can stub Claude with a shell-script
17
+ * fixture. */
18
+ executable?: string;
19
+ }
20
+ /**
21
+ * Real Claude Code reviewer provider. Wraps the `claude` CLI.
22
+ *
23
+ * Construct with `new ClaudeReviewerProvider()` for the standard `claude`
24
+ * binary, or `new ClaudeReviewerProvider({ executable: '/opt/bin/mock-claude' })`
25
+ * for tests that point at a fixture CLI on a controlled PATH.
26
+ */
27
+ export declare class ClaudeReviewerProvider implements ReviewerProvider {
28
+ private readonly executable;
29
+ constructor(opts?: ClaudeReviewerProviderOptions);
30
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
31
+ }
32
+ /**
33
+ * Construct the Claude CLI invocation. Split out from `evaluate` so unit
34
+ * tests can assert on the built arg list + env without actually spawning a
35
+ * process.
36
+ *
37
+ * Locked flag set:
38
+ * - `--print` — non-interactive mode; emit the response and exit, rather
39
+ * than waiting for further turns. Required so the subprocess actually
40
+ * terminates on its own.
41
+ * - `--allowed-tools <CSV>` — design-locked sandbox. `ReviewerSpec.tool_allowlist`
42
+ * is populated from the engine's policy; the provider trusts the spec
43
+ * rather than hard-coding the list.
44
+ * - `--model <hint>` — optional; omitted when `spec.model_hint` is `null`,
45
+ * deferring to Claude's default.
46
+ *
47
+ * # QUORUM_REVIEWER_SUBPROCESS env var
48
+ *
49
+ * Set unconditionally to `'1'` in the child's environment. See module docs
50
+ * for the rationale and why `--bare` was rejected as the primary defense.
51
+ */
52
+ export declare function buildCommand(executable: string, spec: ReviewerSpec): BuiltCommand;
53
+ /**
54
+ * Map the raw subprocess outcome into a `ReviewerVerdict` or throw a
55
+ * structured `ReviewerErrorClass`. Exit code is checked first — a crashed
56
+ * CLI that happened to print something valid on stdout should still be
57
+ * treated as a spawn failure, not a silently-accepted verdict.
58
+ */
59
+ export declare function buildVerdict(spec: ReviewerSpec, gateId: string, outcome: SubprocessOutcome): ReviewerVerdict;
@@ -0,0 +1,67 @@
1
+ import { type ReviewerProvider, type ReviewerSpec } from '../provider.js';
2
+ import { type SubprocessOutcome } from '../subprocess.js';
3
+ import type { ReviewerVerdict } from '../types.js';
4
+ import type { BuiltCommand } from './claude.js';
5
+ /** Construction options. */
6
+ export interface CodexReviewerProviderOptions {
7
+ /** Override the `codex` executable path. Production callers pass nothing
8
+ * (PATH lookup); integration tests can stub Codex with a shell-script
9
+ * fixture. */
10
+ executable?: string;
11
+ }
12
+ /**
13
+ * Real Codex CLI reviewer provider. Wraps the `codex exec` subcommand.
14
+ */
15
+ export declare class CodexReviewerProvider implements ReviewerProvider {
16
+ private readonly executable;
17
+ constructor(opts?: CodexReviewerProviderOptions);
18
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
19
+ }
20
+ /**
21
+ * Construct the Codex CLI invocation. Split out from `evaluate` so unit
22
+ * tests can assert on the built arg list without actually spawning a
23
+ * process.
24
+ *
25
+ * Locked flag set:
26
+ * - `exec` — the non-interactive subcommand (the interactive default
27
+ * would block forever waiting for user input).
28
+ * - `--sandbox read-only` — design-locked read-only sandbox.
29
+ * - `--skip-git-repo-check` — reviewer subprocesses must run in any cwd,
30
+ * not just git working trees.
31
+ * - `--color never` — strip ANSI escapes from any incidental stdout
32
+ * output (the JSONL stream is not affected, but logs and error
33
+ * messages are).
34
+ * - `--json` — emit JSONL events to stdout for token-count parsing.
35
+ * - `--ephemeral` — do NOT write a session JSONL under
36
+ * `~/.codex/sessions/`.
37
+ * - `--output-last-message <path>` — write the model's final agent
38
+ * message verbatim to `path`.
39
+ * - `--model <hint>` — optional; omitted when `spec.model_hint` is `null`.
40
+ * - `-` (final arg) — read prompt from stdin.
41
+ */
42
+ export declare function buildCommand(executable: string, spec: ReviewerSpec, lastMessagePath: string): BuiltCommand;
43
+ /**
44
+ * Map the raw subprocess outcome + the file Codex wrote into a
45
+ * `ReviewerVerdict` or throw a structured `ReviewerErrorClass`. See
46
+ * `claude.ts::buildVerdict` for the shared safety rule (non-zero exit
47
+ * overrides any parseable stdout / file).
48
+ */
49
+ export declare function buildVerdict(spec: ReviewerSpec, gateId: string, outcome: SubprocessOutcome, lastMessage: string): ReviewerVerdict;
50
+ /**
51
+ * Sum `usage.input_tokens + usage.output_tokens` across every
52
+ * `turn.completed` JSONL event in `stdout`. Returns `null` when no
53
+ * `turn.completed` event reported any token count, so dashboards can
54
+ * distinguish "no data" from a real zero.
55
+ *
56
+ * Lenient: malformed JSONL lines are silently skipped (Codex's stream is
57
+ * designed to be append-only, so partial flushes during timeout could
58
+ * leave a final truncated line).
59
+ */
60
+ export declare function sumCodexTokens(stdout: string): number | null;
61
+ /**
62
+ * Generate a unique-per-spawn path for `--output-last-message`. Lives in
63
+ * the OS temp dir; we own its lifecycle (create on codex's side, read +
64
+ * delete on ours). Per-process pid + UUID is enough — no race risk across
65
+ * concurrent reviewers in the same engine run.
66
+ */
67
+ export declare function makeLastMessagePath(): string;
@@ -0,0 +1,25 @@
1
+ import type { AgentKind } from '../types.js';
2
+ import type { ReviewerError } from '../provider.js';
3
+ import type { SubprocessError } from '../subprocess.js';
4
+ /**
5
+ * Map a subprocess-layer error into a `ReviewerError` for the given agent.
6
+ * Mirrors Rust's `providers::claude::map_subprocess_error` byte-for-byte:
7
+ *
8
+ * - `spawn_failed` → `ReviewerError::SpawnFailed { agent, reason }`
9
+ * - `timeout` → `ReviewerError::Timeout { agent, elapsed_ms }`
10
+ * - `io` → `ReviewerError::SpawnFailed { agent, reason: "io error: <msg>" }`
11
+ * (deliberate; an IO failure during spawn is a spawn failure from the
12
+ * audit log's perspective — the reviewer never produced a verdict)
13
+ * - `cancelled` → `ReviewerError` cancelled (no agent attribution; the
14
+ * Rust enum omits the agent field on Cancelled because cancellation
15
+ * can fire on any reviewer in flight)
16
+ *
17
+ * The Rust source's `map_subprocess_error` handles only three variants
18
+ * (`SpawnFailed`, `Timeout`, `Io`) because the Rust subprocess layer
19
+ * doesn't have a Cancelled variant — engine-driven cancellation in Rust
20
+ * is handled at a higher layer via `tokio::select!` against the abort
21
+ * future, never surfacing as a `SubprocessError`. The TS port carries
22
+ * cancellation in the subprocess layer (via `AbortSignal`), so we
23
+ * include it here.
24
+ */
25
+ export declare function mapSubprocessError(agent: AgentKind, err: SubprocessError): ReviewerError;
@@ -0,0 +1,108 @@
1
+ import { type ReviewerProvider, type ReviewerSpec } from '../provider.js';
2
+ import { type SubprocessOutcome } from '../subprocess.js';
3
+ import type { ReviewerVerdict } from '../types.js';
4
+ import type { BuiltCommand } from './claude.js';
5
+ /** Construction options. */
6
+ export interface GeminiReviewerProviderOptions {
7
+ /** Override the `gemini` executable path. Production callers pass nothing
8
+ * (PATH lookup); integration tests can stub Gemini with a shell-script
9
+ * fixture. */
10
+ executable?: string;
11
+ }
12
+ /**
13
+ * JSON envelope shape emitted by `gemini --output-format json`. We only care
14
+ * about `response`; the other fields (`session_id`, `stats`) are consumed by
15
+ * the JSON deserializer but not surfaced on `ReviewerVerdict` directly.
16
+ *
17
+ * Tolerates missing or extra top-level keys for cross-version compatibility.
18
+ */
19
+ export interface GeminiEnvelope {
20
+ response: string;
21
+ /** Gemini 0.38.2 emits this; older versions may not. */
22
+ session_id?: string;
23
+ stats?: GeminiStats;
24
+ }
25
+ export interface GeminiStats {
26
+ /** Map of model name → per-model stats. Token usage MUST be summed across
27
+ * every entry (R2 "telemetry timebomb" lesson — picking an arbitrary one
28
+ * via `Object.values()[0]` would under-report when Gemini reports
29
+ * auxiliary models). */
30
+ models?: Record<string, GeminiModelStats>;
31
+ }
32
+ export interface GeminiModelStats {
33
+ tokens?: {
34
+ /** Prompt + response tokens combined. Populated by Gemini's
35
+ * `stats.models.<name>.tokens.total`; used for cost telemetry. */
36
+ total?: number;
37
+ };
38
+ }
39
+ /**
40
+ * Real Gemini CLI reviewer provider. Wraps the `gemini` binary.
41
+ */
42
+ export declare class GeminiReviewerProvider implements ReviewerProvider {
43
+ private readonly executable;
44
+ constructor(opts?: GeminiReviewerProviderOptions);
45
+ evaluate(spec: ReviewerSpec, gateId: string): Promise<ReviewerVerdict>;
46
+ }
47
+ /**
48
+ * Construct the Gemini CLI invocation. Split out from `evaluate` so unit
49
+ * tests can assert on the built arg list + env without actually spawning a
50
+ * process.
51
+ *
52
+ * Locked flag set:
53
+ * - `-p ''` — non-interactive mode, empty inline prompt so the full prompt
54
+ * is read from stdin (consistent with Claude path).
55
+ * - `--approval-mode plan` — read-only sandbox.
56
+ * - `--output-format json` — structured reply envelope that survives
57
+ * user-hook stdout pollution.
58
+ * - `--model <hint>` — optional; omitted when `spec.model_hint` is `null`.
59
+ *
60
+ * `spec.tool_allowlist` is **intentionally unused** for Gemini. Plan mode
61
+ * is the design-locked sandbox; mapping the Claude-style allowlist to
62
+ * Gemini's (deprecated) `--allowed-tools` flag would hit the known
63
+ * non-interactive bugs flagged in the module docs.
64
+ *
65
+ * # QUORUM_REVIEWER_SUBPROCESS env var
66
+ *
67
+ * Set unconditionally to `'1'` in the child's environment. The user's
68
+ * Gemini plugin checks this at the top of `hooks/common.sh` and
69
+ * short-circuits every hook — without this gate, reviewer spawns would
70
+ * each create a new backend session and mark the user's primary Gemini
71
+ * session INACTIVE (empirically observed 2026-04-21 during Phase 2c.2
72
+ * local testing).
73
+ */
74
+ export declare function buildCommand(executable: string, spec: ReviewerSpec): BuiltCommand;
75
+ /**
76
+ * Map the raw subprocess outcome into a `ReviewerVerdict` or throw a
77
+ * structured `ReviewerErrorClass`. See `claude.ts::buildVerdict` for the
78
+ * shared safety rule (non-zero exit overrides any parseable stdout).
79
+ */
80
+ export declare function buildVerdict(spec: ReviewerSpec, gateId: string, outcome: SubprocessOutcome): ReviewerVerdict;
81
+ /**
82
+ * Parse the FIRST complete JSON object in `stdout`, discarding any trailing
83
+ * bytes. User-level hooks (e.g., CodeVibe's own Gemini plugin hooks
84
+ * installed in `~/.gemini/settings.json`) append log lines to stdout AFTER
85
+ * the model's reply envelope. This walks the text byte-by-byte tracking
86
+ * brace depth + string escapes to find where the first JSON object ends,
87
+ * then `JSON.parse` that slice — the equivalent of Rust's
88
+ * `serde_json::Deserializer::into_iter().next()` for our purposes.
89
+ *
90
+ * Returns `null` if stdout contains no valid JSON value at all, the
91
+ * extracted slice fails strict JSON parsing, OR the parsed value isn't a
92
+ * shape-conformant `GeminiEnvelope` (i.e. missing or non-string
93
+ * `response`). The caller routes any of these to ParseFailure with raw
94
+ * stdout — matching Rust's serde-deserialization-failure path.
95
+ *
96
+ * # Why runtime shape validation
97
+ *
98
+ * Rust's `serde_json::Deserializer` rejects `{}` or `{"response":123}`
99
+ * during deserialization because `GeminiEnvelope { response: String, ... }`
100
+ * makes the field required at the type-system layer. TypeScript's
101
+ * `JSON.parse(slice) as GeminiEnvelope` is a compile-time-only assertion
102
+ * with no runtime check, so we reproduce serde's behavior with an
103
+ * explicit `validateGeminiEnvelope` step. Without this, a syntactically-
104
+ * valid-but-shape-bad envelope would land an `undefined.replace()`
105
+ * TypeError up the call stack instead of a structured ParseFailure
106
+ * (R1 finding on Phase 2f.1.b round 1).
107
+ */
108
+ export declare function parseFirstJsonEnvelope(stdout: string): GeminiEnvelope | null;