ctx-cc 4.0.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,19 +7,21 @@ import { runAgent } from './runner.js';
7
7
  const MAX_REVIEW_CYCLES = 3;
8
8
 
9
9
  /**
10
- * Run the two-stage review gate.
10
+ * Run the three-stage review gate.
11
11
  *
12
12
  * Stage 1: ctx-reviewer checks spec compliance (acceptance criteria)
13
- * Stage 2: ctx-auditor checks code quality (security, performance, style)
13
+ * Stage 2: ctx-reviewer (quality framing) checks code quality (security, performance, style)
14
+ * Stage 3: ctx-codex-reviewer performs cross-model review via OpenAI Codex MCP
15
+ * (only runs if Stages 1 and 2 pass, and `config.codexReview !== false`).
14
16
  *
15
- * If either fails, returns feedback for re-execution.
17
+ * If any stage fails, returns feedback for re-execution.
16
18
  * Max cycles before requiring human intervention.
17
19
  *
18
20
  * Options:
19
21
  * ctxDir, projectDir, agentsDir, streaming, timeout, config
20
22
  *
21
23
  * Returns:
22
- * { passed: boolean, stage1: {...}, stage2: {...}, cycle: number, feedback: string|null }
24
+ * { passed, stage1, stage2, stage3, cycle, feedback, escalated }
23
25
  */
24
26
  export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming = true, timeout = 300000, config = {} }) {
25
27
  const state = readState(ctxDir);
@@ -49,19 +51,49 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
49
51
  ctxDir, projectDir, agentsDir, streaming, timeout,
50
52
  });
51
53
 
52
- // Stage 2: Code quality (auditor) — only if Stage 1 passes
54
+ // Stage 2: Code quality — only if Stage 1 passes. Reuses ctx-reviewer with quality framing;
55
+ // ctx-auditor is an audit-trail agent, not a code reviewer, so using it here was a miscast.
53
56
  let stage2 = null;
54
57
  if (stage1.passed) {
55
58
  stage2 = await runReviewStage({
56
59
  stageName: 'code-quality',
57
- agentFile: 'ctx-auditor.md',
58
- agentCommand: 'audit',
60
+ agentFile: 'ctx-reviewer.md',
61
+ agentCommand: 'review',
59
62
  prompt: buildReviewPrompt(state, 'quality'),
60
63
  ctxDir, projectDir, agentsDir, streaming, timeout,
61
64
  });
62
65
  }
63
66
 
64
- const passed = stage1.passed && (stage2 ? stage2.passed : false);
67
+ // Stage 3: Cross-model review via Codex only if Stages 1 and 2 pass and not disabled.
68
+ // The agent may return VERDICT: SKIP (trivial changes, MCP unavailable, rate-limited);
69
+ // SKIP is treated as pass-through so infrastructure issues never block the gate.
70
+ // Across retry cycles we pipe the prior Codex threadId forward so the agent can
71
+ // reuse the cheaper codex-reply path instead of starting a fresh session.
72
+ let stage3 = null;
73
+ if (stage1.passed && stage2 && stage2.passed && config.codexReview !== false) {
74
+ const priorThreadId = priorCodexThreadId(reviewState);
75
+ stage3 = await runReviewStage({
76
+ stageName: 'codex-review',
77
+ agentFile: 'ctx-codex-reviewer.md',
78
+ agentCommand: 'review',
79
+ prompt: buildReviewPrompt(state, 'codex', { priorThreadId }),
80
+ ctxDir, projectDir, agentsDir, streaming, timeout,
81
+ });
82
+ const { skipped, threadId } = parseStage3Markers(stage3.output);
83
+ stage3.threadId = threadId;
84
+ if (skipped) {
85
+ stage3.passed = true;
86
+ stage3.skipped = true;
87
+ stage3.issues = null;
88
+ }
89
+ }
90
+
91
+ // stage2 defaults to false when null (stage1 failed → never ran → not passed).
92
+ // stage3 defaults to true when null (disabled or earlier stage failed → absence is not a fail).
93
+ const passed =
94
+ stage1.passed &&
95
+ (stage2 ? stage2.passed : false) &&
96
+ (stage3 ? stage3.passed : true);
65
97
 
66
98
  // Build feedback for re-execution if failed
67
99
  let feedback = null;
@@ -69,15 +101,26 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
69
101
  const issues = [];
70
102
  if (!stage1.passed) issues.push(`Spec compliance: ${stage1.issues}`);
71
103
  if (stage2 && !stage2.passed) issues.push(`Code quality: ${stage2.issues}`);
104
+ if (stage3 && !stage3.passed) issues.push(`Codex review: ${stage3.issues}`);
72
105
  feedback = issues.join('\n');
73
106
  }
74
107
 
108
+ const stage3History = stage3
109
+ ? {
110
+ passed: stage3.passed,
111
+ issues: stage3.issues,
112
+ skipped: stage3.skipped || false,
113
+ threadId: stage3.threadId || null,
114
+ }
115
+ : null;
116
+
75
117
  // Record in state
76
118
  reviewState.history.push({
77
119
  cycle: reviewState.cycle,
78
120
  timestamp: new Date().toISOString(),
79
121
  stage1: { passed: stage1.passed, issues: stage1.issues },
80
122
  stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
123
+ stage3: stage3History,
81
124
  result: passed ? 'pass' : 'fail',
82
125
  });
83
126
 
@@ -91,6 +134,7 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
91
134
  passed,
92
135
  stage1: { passed: stage1.passed, issues: stage1.issues },
93
136
  stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
137
+ stage3: stage3History,
94
138
  cycle: reviewState.cycle,
95
139
  feedback,
96
140
  escalated: false,
@@ -104,6 +148,22 @@ export function isReviewGateEnabled(config) {
104
148
  return config.reviewGate !== false;
105
149
  }
106
150
 
151
+ /**
152
+ * Parse Stage 3 output markers.
153
+ * - `skipped` is true when the agent emitted `VERDICT: SKIP` (trivial change,
154
+ * MCP unavailable, auth expired, rate-limited).
155
+ * - `threadId` is the value after `THREAD: <id>`, used to resume cheaper
156
+ * `codex-reply` sessions across review cycles.
157
+ *
158
+ * Exported for unit testing; consumed by runReviewGate internally.
159
+ */
160
+ export function parseStage3Markers(output) {
161
+ const text = output || '';
162
+ const skipped = /verdict:\s*skip/i.test(text);
163
+ const threadMatch = /THREAD:\s*([^\s]+)/i.exec(text);
164
+ return { skipped, threadId: threadMatch ? threadMatch[1] : null };
165
+ }
166
+
107
167
  /**
108
168
  * Get review history from state.
109
169
  */
@@ -130,6 +190,14 @@ export function formatReviewResult(result) {
130
190
  const s2Icon = result.stage2.passed ? '✓' : '✗';
131
191
  lines.push(` ${s2Icon} Stage 2 (code quality): ${result.stage2.passed ? 'pass' : result.stage2.issues || 'fail'}`);
132
192
  }
193
+ if (result.stage3) {
194
+ if (result.stage3.skipped) {
195
+ lines.push(` ○ Stage 3 (codex review): skipped`);
196
+ } else {
197
+ const s3Icon = result.stage3.passed ? '✓' : '✗';
198
+ lines.push(` ${s3Icon} Stage 3 (codex review): ${result.stage3.passed ? 'pass' : result.stage3.issues || 'fail'}`);
199
+ }
200
+ }
133
201
 
134
202
  if (result.escalated) {
135
203
  lines.push('');
@@ -182,7 +250,17 @@ async function runReviewStage({ stageName, agentFile, agentCommand, prompt, ctxD
182
250
  }
183
251
  }
184
252
 
185
- function buildReviewPrompt(state, type) {
253
+ function priorCodexThreadId(reviewState) {
254
+ const hist = reviewState?.history;
255
+ if (!Array.isArray(hist)) return null;
256
+ for (let i = hist.length - 1; i >= 0; i--) {
257
+ const tid = hist[i]?.stage3?.threadId;
258
+ if (tid) return tid;
259
+ }
260
+ return null;
261
+ }
262
+
263
+ function buildReviewPrompt(state, type, opts = {}) {
186
264
  if (type === 'spec') {
187
265
  return [
188
266
  'Review the recent code changes for SPEC COMPLIANCE.',
@@ -197,6 +275,22 @@ function buildReviewPrompt(state, type) {
197
275
  ].join('\n');
198
276
  }
199
277
 
278
+ if (type === 'codex') {
279
+ const lines = [
280
+ 'Stage 3 — cross-model review via OpenAI Codex.',
281
+ 'Stages 1 (spec) and 2 (quality) already passed under Claude review.',
282
+ `Active story: ${state.activeStory || 'unknown'}`,
283
+ ];
284
+ if (opts.priorThreadId) {
285
+ lines.push(`Prior Codex thread: ${opts.priorThreadId} — reuse via mcp__codex__codex-reply if context is still relevant.`);
286
+ }
287
+ lines.push(
288
+ '',
289
+ 'Run your playbook and output VERDICT: PASS | FAIL | SKIP on the final line. Append `THREAD: <id>` if a new thread was opened.',
290
+ );
291
+ return lines.join('\n');
292
+ }
293
+
200
294
  return [
201
295
  'Review the recent code changes for CODE QUALITY.',
202
296
  'Check: security vulnerabilities, performance issues, code style, error handling.',
@@ -91,6 +91,9 @@
91
91
 
92
92
  "activeProfile": "balanced",
93
93
 
94
+ "reviewGate": true,
95
+ "codexReview": true,
96
+
94
97
  "git": {
95
98
  "autoCommit": true,
96
99
  "commitPerTask": true,