ctx-cc 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ description: Self-healing deployments - connect to error tracking (Sentry/LogRoc
4
4
  ---
5
5
 
6
6
  <objective>
7
- CTX 3.5 Self-Healing Deployments - Monitor production errors and automatically create fix stories or even auto-fix with PR creation.
7
+ CTX 4.0 Self-Healing Deployments - Monitor production errors and automatically create fix stories or even auto-fix with PR creation.
8
8
  </objective>
9
9
 
10
10
  <usage>
package/commands/voice.md CHANGED
@@ -4,7 +4,7 @@ description: Voice control for CTX - speak your requirements and commands using
4
4
  ---
5
5
 
6
6
  <objective>
7
- CTX 3.5 Voice Control - Speak your requirements instead of typing. Natural language processing converts speech to CTX commands and story descriptions.
7
+ CTX 4.0 Voice Control - Speak your requirements instead of typing. Natural language processing converts speech to CTX commands and story descriptions.
8
8
  </objective>
9
9
 
10
10
  <usage>
@@ -63,7 +63,8 @@ if (agentName && agentName.startsWith('ctx-')) {
63
63
  if (fs.existsSync(manifestPath)) {
64
64
  const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
65
65
  for (const [category, cfg] of Object.entries(manifest)) {
66
- if (cfg.agents.includes(agentName + '.md') && cfg.denied.includes(toolName)) {
66
+ if (category.startsWith('_')) continue; // skip metadata keys like _version
67
+ if (cfg?.agents?.includes(agentName + '.md') && cfg.denied.includes(toolName)) {
67
68
  process.stderr.write(`CTX: Tool "${toolName}" blocked for ${category} agent "${agentName}".\n`);
68
69
  fs.appendFileSync(
69
70
  path.join(ctxDir, 'violations.log'),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "ctx-cc",
3
- "version": "4.0.0",
4
- "description": "CTX 4.0 — Intelligent workflow orchestration for Claude Code. 21 subagents, 3 skills, deterministic hooks. Phase-based lifecycle with autonomous execution.",
3
+ "version": "4.1.0",
4
+ "description": "CTX 4.0 — Intelligent workflow orchestration for Claude Code. 26 subagents, 7 skills, deterministic hooks. Phase-based lifecycle with autonomous execution.",
5
5
  "keywords": [
6
6
  "claude",
7
7
  "claude-code",
package/plugin.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "ctx",
3
3
  "version": "4.0.0",
4
- "description": "CTX — Intelligent workflow orchestration for Claude Code. 21 specialized agents, phase-based lifecycle, two-stage review gate, autonomous execution.",
4
+ "description": "CTX — Intelligent workflow orchestration for Claude Code. Specialized agents, phase-based lifecycle, three-stage review gate with OpenAI Codex cross-model review, autonomous execution.",
5
5
  "author": "jufjuf",
6
6
  "license": "MIT",
7
7
  "homepage": "https://github.com/jufjuf/CTX",
@@ -38,6 +38,7 @@
38
38
  },
39
39
  "settings": {
40
40
  "reviewGate": true,
41
+ "codexReview": true,
41
42
  "tddMode": "off",
42
43
  "maxReviewCycles": 3,
43
44
  "maxAutoIterations": 5
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  name: ctx-review-gate
3
3
  description: |
4
- WHEN: Code has been implemented and needs quality verification before marking a story complete. Runs two-stage review: spec compliance then code quality.
4
+ WHEN: Code has been implemented and needs quality verification before marking a story complete. Runs three-stage review: spec compliance, code quality, and optional cross-model adversarial review via OpenAI Codex.
5
5
  WHEN NOT: During planning, research, or when review gate is disabled in config.
6
6
  ---
7
7
 
8
- # CTX Two-Stage Review Gate
8
+ # CTX Three-Stage Review Gate
9
9
 
10
10
  Automated quality gate that runs after execution and before verification.
11
11
 
12
- ## Two Stages
12
+ ## Three Stages
13
13
 
14
14
  ### Stage 1: Spec Compliance (ctx-reviewer)
15
15
  Checks whether the code satisfies the story's acceptance criteria.
@@ -23,18 +23,39 @@ Agent({
23
23
  })
24
24
  ```
25
25
 
26
- ### Stage 2: Code Quality (ctx-auditor)
27
- Checks security, performance, and code quality. **Only runs if Stage 1 passes.**
26
+ ### Stage 2: Code Quality (ctx-reviewer)
27
+ Reuses ctx-reviewer with a quality-focused prompt: security, performance, error handling, style. **Only runs if Stage 1 passes.**
28
+
29
+ (Note: earlier versions of this skill called `ctx-auditor` here. That was a miscast — `ctx-auditor` is an audit-trail/compliance agent, not a code-quality reviewer. `ctx-reviewer` already covers type checks, imports, security scans, and best-practice enforcement, so it handles both stages with different framings.)
28
30
 
29
31
  Spawn:
30
32
  ```
31
33
  Agent({
32
- subagent_type: "ctx-auditor",
34
+ subagent_type: "ctx-reviewer",
33
35
  prompt: "Review recent changes for CODE QUALITY. Check: security vulnerabilities, performance, error handling, style. Output VERDICT: PASS or FAIL with ISSUES list.",
34
- description: "Code quality audit"
36
+ description: "Code quality review"
35
37
  })
36
38
  ```
37
39
 
40
+ ### Stage 3: Cross-Model Review (ctx-codex-reviewer) — optional
41
+ Sends the diff to OpenAI Codex via MCP for a second-pair-of-eyes review with different model priors. **Only runs if Stage 2 passes AND `config.codexReview !== false`.**
42
+
43
+ Short-circuits on docs-only, test-only, or trivial (<20 LOC) diffs. Fails soft — if the Codex MCP is unavailable, rate-limited, or unauthenticated, returns `SKIP` rather than `FAIL` so infrastructure problems never block the gate.
44
+
45
+ Spawn:
46
+ ```
47
+ Agent({
48
+ subagent_type: "ctx-codex-reviewer",
49
+ prompt: "Cross-model review story <ID>. Dispatch the current diff to Codex via mcp__codex__codex with sandbox=read-only. Acceptance criteria: <list>. Output VERDICT: PASS, FAIL, or SKIP.",
50
+ description: "Codex adversarial review"
51
+ })
52
+ ```
53
+
54
+ Prerequisites (user-side, not automated by CTX):
55
+ - Codex CLI installed (`npm i -g @openai/codex`)
56
+ - Signed in via ChatGPT subscription (`codex login` — no `--api-key` flag)
57
+ - MCP registered (`claude mcp add codex -- codex mcp-server`)
58
+
38
59
  ## Flow
39
60
 
40
61
  ```
@@ -51,6 +72,12 @@ Stage 2: ctx-auditor (code quality)
51
72
  ├── FAIL → Feed issues back to executor, increment cycle
52
73
 
53
74
  ▼ PASS
75
+ Stage 3: ctx-codex-reviewer (cross-model, if enabled)
76
+
77
+ ├── FAIL → Feed issues back to executor, increment cycle
78
+ ├── SKIP → Treat as pass (infra problem, not code problem)
79
+
80
+ ▼ PASS
54
81
  Mark story for verification
55
82
  ```
56
83
 
@@ -85,27 +112,36 @@ Update `.ctx/STATE.json`:
85
112
  "reviewGate": {
86
113
  "cycle": 2,
87
114
  "history": [
88
- { "cycle": 1, "timestamp": "ISO", "stage1": { "passed": true }, "stage2": { "passed": false, "issues": "..." }, "result": "fail" },
89
- { "cycle": 2, "timestamp": "ISO", "stage1": { "passed": true }, "stage2": { "passed": true }, "result": "pass" }
115
+ { "cycle": 1, "timestamp": "ISO", "stage1": { "passed": true }, "stage2": { "passed": false, "issues": "..." }, "stage3": null, "result": "fail" },
116
+ { "cycle": 2, "timestamp": "ISO", "stage1": { "passed": true }, "stage2": { "passed": true }, "stage3": { "passed": true, "threadId": "thr_...", "skipped": false }, "result": "pass" }
90
117
  ]
91
118
  }
92
119
  }
93
120
  ```
94
121
 
122
+ `stage3` is `null` when Stage 2 fails (not reached) or when `codexReview` is disabled. When Stage 3 runs, record `threadId` so follow-ups reuse the same Codex session.
123
+
95
124
  ## Save Review Artifacts
96
125
 
97
126
  Write review results to `.ctx/reviews/<story-id>-<timestamp>.json`.
98
127
 
99
128
  ## Configuration
100
129
 
101
- Review gate can be disabled:
130
+ Review gate can be disabled entirely:
102
131
  - Check `.ctx/config.json` for `"reviewGate": false`
103
132
  - If disabled, skip directly to verification
104
133
 
134
+ Stage 3 (Codex cross-review) can be disabled independently:
135
+ - Check `.ctx/config.json` for `"codexReview": false`
136
+ - Useful when offline, when the ChatGPT rate-limit budget is depleted, or when the change is trivial
137
+ - Stages 1 and 2 continue to run normally
138
+
105
139
  ## Rules
106
140
 
107
- - ALWAYS run Stage 1 before Stage 2
108
- - NEVER run Stage 2 if Stage 1 fails (fail-fast)
141
+ - ALWAYS run Stage 1 before Stage 2, Stage 2 before Stage 3 (fail-fast ordering)
142
+ - NEVER run Stage 2 if Stage 1 fails
143
+ - NEVER run Stage 3 if Stage 2 fails, or if `codexReview === false`
144
+ - Stage 3 SKIP (infrastructure failure) is NOT a gate failure — treat as pass
109
145
  - ALWAYS feed review issues back to executor as context on retry
110
146
  - Max 3 cycles — then escalate to human
111
- - Record every cycle in state
147
+ - Record every cycle in state, including `stage3: null` when not reached
@@ -1,9 +1,26 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
 
4
+ /**
5
+ * Schema version for the on-disk capability manifest.
6
+ * Bump when adding categories, renaming fields, or changing policy semantics
7
+ * so stale project manifests can be detected and regenerated.
8
+ */
9
+ export const MANIFEST_VERSION = 1;
10
+
4
11
  /**
5
12
  * Default capability manifests per agent category.
6
- * Defines which tools each agent type is allowed to use.
13
+ * Defines which tools each ctx-* agent category is allowed to use.
14
+ *
15
+ * The runtime enforcement point is `hooks/pre-tool-use.js`, which reads
16
+ * `.ctx/capability-manifest.json` (written at project init from this table)
17
+ * and blocks tool calls whose name appears in the agent's `denied` list.
18
+ *
19
+ * `allowed` is the declared whitelist and is used for documentation and tests;
20
+ * the hook itself is denylist-driven so unknown tools default to permissive.
21
+ *
22
+ * Iterators over a loaded manifest MUST skip keys starting with `_`
23
+ * (reserved for metadata like `_version`).
7
24
  */
8
25
  const DEFAULT_CAPABILITIES = {
9
26
  // Planning agents — read-only + write plans
@@ -22,14 +39,22 @@ const DEFAULT_CAPABILITIES = {
22
39
  reason: 'Execution agents should not spawn other agents.',
23
40
  },
24
41
 
25
- // Review agents — read + run tests, no modifications
42
+ // Review agents — read + run tests + Codex cross-review, no modifications
26
43
  review: {
27
- agents: ['ctx-reviewer.md', 'ctx-auditor.md', 'ctx-verifier.md'],
28
- allowed: ['Read', 'Glob', 'Grep', 'Bash'],
44
+ agents: ['ctx-reviewer.md', 'ctx-verifier.md', 'ctx-codex-reviewer.md', 'ctx-ml-reviewer.md'],
45
+ allowed: ['Read', 'Glob', 'Grep', 'Bash', 'mcp__codex__codex'],
29
46
  denied: ['Write', 'Edit', 'NotebookEdit'],
30
47
  reason: 'Review agents should not modify code.',
31
48
  },
32
49
 
50
+ // Audit agents — write audit trails, but never modify source
51
+ audit: {
52
+ agents: ['ctx-auditor.md'],
53
+ allowed: ['Read', 'Write', 'Bash', 'Glob', 'Grep'],
54
+ denied: ['Edit', 'Agent', 'NotebookEdit'],
55
+ reason: 'Audit agents record trails but should not modify source or spawn agents.',
56
+ },
57
+
33
58
  // Mapper agents — read-only analysis
34
59
  mapping: {
35
60
  agents: ['ctx-mapper.md', 'ctx-arch-mapper.md', 'ctx-tech-mapper.md', 'ctx-quality-mapper.md', 'ctx-concerns-mapper.md'],
@@ -69,25 +94,23 @@ const DEFAULT_CAPABILITIES = {
69
94
  denied: ['Edit'],
70
95
  reason: 'QA agents test but should not fix code.',
71
96
  },
72
- };
73
97
 
74
- /**
75
- * Load capability manifest from file, or return defaults.
76
- */
77
- export function loadCapabilityManifest(ctxDir) {
78
- const manifestPath = path.join(ctxDir, 'capability-manifest.json');
79
- try {
80
- return JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
81
- } catch {
82
- return DEFAULT_CAPABILITIES;
83
- }
84
- }
98
+ // ML agents — implement and analyze ML pipelines
99
+ ml: {
100
+ agents: ['ctx-ml-scientist.md', 'ctx-ml-engineer.md', 'ctx-ml-analyst.md'],
101
+ allowed: ['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep'],
102
+ denied: ['Agent', 'NotebookEdit'],
103
+ reason: 'ML agents implement and analyze pipelines but should not orchestrate.',
104
+ },
105
+ };
85
106
 
86
107
  /**
87
108
  * Find the category for a given agent file.
109
+ * Skips metadata keys (prefix `_`) so a versioned on-disk manifest still works.
88
110
  */
89
111
  export function findAgentCategory(agentFile, manifest = DEFAULT_CAPABILITIES) {
90
112
  for (const [category, config] of Object.entries(manifest)) {
113
+ if (category.startsWith('_')) continue;
91
114
  if (config.agents.includes(agentFile)) {
92
115
  return { category, ...config };
93
116
  }
@@ -97,12 +120,14 @@ export function findAgentCategory(agentFile, manifest = DEFAULT_CAPABILITIES) {
97
120
 
98
121
  /**
99
122
  * Check if a tool is allowed for an agent.
123
+ * Denylist-driven (matches the runtime hook in hooks/pre-tool-use.js).
124
+ * Unknown agents are permissive by default.
125
+ *
100
126
  * Returns { allowed: boolean, reason: string|null }.
101
127
  */
102
128
  export function checkToolAllowed(agentFile, toolName, manifest = DEFAULT_CAPABILITIES) {
103
129
  const category = findAgentCategory(agentFile, manifest);
104
130
  if (!category) {
105
- // Unknown agent — allow everything (permissive for custom agents)
106
131
  return { allowed: true, reason: null };
107
132
  }
108
133
 
@@ -117,37 +142,66 @@ export function checkToolAllowed(agentFile, toolName, manifest = DEFAULT_CAPABIL
117
142
  }
118
143
 
119
144
  /**
120
- * Generate a PreToolUse hook command that enforces capability restrictions.
121
- * Returns the hook command string.
145
+ * Save the capability manifest to `<ctxDir>/capability-manifest.json`.
146
+ * Called from the install flow to seed the template and from the project
147
+ * init command to materialize the manifest that the PreToolUse hook reads.
122
148
  */
123
- export function generateCapabilityHookCommand(ctxDir) {
124
- return `node -e "
125
- const fs=require('fs'),p=require('path');
126
- const tool=process.env.TOOL_NAME||'';
127
- const agent=process.env.CURRENT_AGENT||'';
128
- if(!agent||!tool)process.exit(0);
129
- const mPath=p.join('${ctxDir}','capability-manifest.json');
130
- let manifest;
131
- try{manifest=JSON.parse(fs.readFileSync(mPath,'utf-8'));}catch{process.exit(0);}
132
- for(const[cat,cfg]of Object.entries(manifest)){
133
- if(cfg.agents.includes(agent)&&cfg.denied.includes(tool)){
134
- console.error('CTX: Tool '+tool+' blocked for '+cat+' agent '+agent);
135
- const logDir=p.join('${ctxDir}','violations.log');
136
- fs.appendFileSync(logDir,new Date().toISOString()+' | '+agent+' | '+tool+' | BLOCKED\\n');
137
- process.exit(2);
138
- }
139
- }
140
- "`.replace(/\n\s*/g, ' ').trim();
149
+ export function saveCapabilityManifest(ctxDir) {
150
+ const manifestPath = path.join(ctxDir, 'capability-manifest.json');
151
+ if (!fs.existsSync(ctxDir)) fs.mkdirSync(ctxDir, { recursive: true });
152
+ const payload = { _version: MANIFEST_VERSION, ...DEFAULT_CAPABILITIES };
153
+ fs.writeFileSync(manifestPath, JSON.stringify(payload, null, 2) + '\n');
154
+ return manifestPath;
141
155
  }
142
156
 
143
157
  /**
144
- * Save default capability manifest to .ctx/ for customization.
158
+ * Read the `_version` field from an on-disk manifest.
159
+ * Returns 0 for pre-versioned manifests, null if file missing/invalid.
160
+ * Callers compare against MANIFEST_VERSION to decide whether to regenerate.
145
161
  */
146
- export function saveCapabilityManifest(ctxDir) {
162
+ export function readManifestVersion(ctxDir) {
147
163
  const manifestPath = path.join(ctxDir, 'capability-manifest.json');
148
- if (!fs.existsSync(ctxDir)) fs.mkdirSync(ctxDir, { recursive: true });
149
- fs.writeFileSync(manifestPath, JSON.stringify(DEFAULT_CAPABILITIES, null, 2) + '\n');
150
- return manifestPath;
164
+ try {
165
+ const data = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
166
+ return typeof data._version === 'number' ? data._version : 0;
167
+ } catch {
168
+ return null;
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Migrate an existing project's capability manifest to the current version.
174
+ * - Missing: writes a fresh manifest, returns { action: 'created' }.
175
+ * - Current: no-op, returns { action: 'current' }.
176
+ * - Stale: backs up old manifest as `capability-manifest.v<N>.backup.json`
177
+ * and regenerates, returns { action: 'migrated', backup }.
178
+ *
179
+ * Used by the `ctx-cc update-manifest` CLI subcommand so projects that
180
+ * predate MANIFEST_VERSION can pick up policy changes without re-initting.
181
+ */
182
+ export function updateProjectManifest(ctxDir) {
183
+ const manifestPath = path.join(ctxDir, 'capability-manifest.json');
184
+ const current = readManifestVersion(ctxDir);
185
+
186
+ if (current === null) {
187
+ saveCapabilityManifest(ctxDir);
188
+ return { action: 'created', from: null, to: MANIFEST_VERSION, path: manifestPath };
189
+ }
190
+
191
+ if (current === MANIFEST_VERSION) {
192
+ return { action: 'current', from: current, to: current, path: manifestPath };
193
+ }
194
+
195
+ const backupPath = path.join(ctxDir, `capability-manifest.v${current}.backup.json`);
196
+ fs.copyFileSync(manifestPath, backupPath);
197
+ saveCapabilityManifest(ctxDir);
198
+ return {
199
+ action: 'migrated',
200
+ from: current,
201
+ to: MANIFEST_VERSION,
202
+ path: manifestPath,
203
+ backup: backupPath,
204
+ };
151
205
  }
152
206
 
153
207
  /**
@@ -156,6 +210,7 @@ export function saveCapabilityManifest(ctxDir) {
156
210
  export function formatCapabilities(manifest = DEFAULT_CAPABILITIES) {
157
211
  const lines = [];
158
212
  for (const [category, config] of Object.entries(manifest)) {
213
+ if (category.startsWith('_')) continue;
159
214
  lines.push(` ${category}:`);
160
215
  lines.push(` Agents: ${config.agents.map(a => a.replace('ctx-', '').replace('.md', '')).join(', ')}`);
161
216
  lines.push(` Allowed: ${config.allowed.join(', ')}`);
package/src/install.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import { fileURLToPath } from 'url';
4
+ import { saveCapabilityManifest } from './capabilities.js';
4
5
 
5
6
  const __filename = fileURLToPath(import.meta.url);
6
7
  const __dirname = path.dirname(__filename);
@@ -28,7 +29,7 @@ function printBanner() {
28
29
  `));
29
30
  console.log(` ${bold('CTX 4.0')} ${dim(`v${VERSION}`)}`);
30
31
  console.log(' Intelligent workflow orchestration for Claude Code.');
31
- console.log(' 21 agents. Skills. Hooks. Phase-based lifecycle.\n');
32
+ console.log(' 26 agents. 7 skills. Hooks. Phase-based lifecycle.\n');
32
33
  }
33
34
 
34
35
  function copyDir(src, dest) {
@@ -170,6 +171,12 @@ export async function install(options) {
170
171
  console.log(green(` ✓`) + ` Installed templates (${count} files)`);
171
172
  }
172
173
 
174
+ // Generate capability-manifest.json template from DEFAULT_CAPABILITIES.
175
+ // /ctx:init copies this into each project's .ctx/ so the PreToolUse hook
176
+ // (hooks/pre-tool-use.js) has a manifest to enforce against.
177
+ saveCapabilityManifest(destTemplates);
178
+ console.log(green(` ✓`) + ` Generated capability-manifest.json template`);
179
+
173
180
  // Write VERSION file
174
181
  fs.writeFileSync(path.join(ctxDir, 'VERSION'), VERSION);
175
182
  console.log(green(` ✓`) + ` Wrote VERSION (${VERSION})`);
@@ -178,8 +185,8 @@ export async function install(options) {
178
185
  console.log(`\n ${green('Done!')} Launch Claude Code and run ${cyan('/ctx:help')}.`);
179
186
  console.log(`
180
187
  ${bold('What was installed:')}
181
- ${dim('Agents:')} ~/.claude/agents/ctx-*.md (21 subagents)
182
- ${dim('Skills:')} ~/.claude/skills/ctx-*/ (3 skills)
188
+ ${dim('Agents:')} ~/.claude/agents/ctx-*.md (26 subagents)
189
+ ${dim('Skills:')} ~/.claude/skills/ctx-*/ (7 skills)
183
190
  ${dim('Commands:')} ~/.claude/commands/ctx/ (slash commands)
184
191
  ${dim('Hooks:')} ~/.claude/hooks/ctx-*.js (3 hook scripts)
185
192
  ${dim('Config:')} ~/.claude/settings.json (hooks registered)
@@ -7,19 +7,21 @@ import { runAgent } from './runner.js';
7
7
  const MAX_REVIEW_CYCLES = 3;
8
8
 
9
9
  /**
10
- * Run the two-stage review gate.
10
+ * Run the three-stage review gate.
11
11
  *
12
12
  * Stage 1: ctx-reviewer checks spec compliance (acceptance criteria)
13
- * Stage 2: ctx-auditor checks code quality (security, performance, style)
13
+ * Stage 2: ctx-reviewer (quality framing) checks code quality (security, performance, style)
14
+ * Stage 3: ctx-codex-reviewer performs cross-model review via OpenAI Codex MCP
15
+ * (only runs if Stages 1 and 2 pass, and `config.codexReview !== false`).
14
16
  *
15
- * If either fails, returns feedback for re-execution.
17
+ * If any stage fails, returns feedback for re-execution.
16
18
  * Max cycles before requiring human intervention.
17
19
  *
18
20
  * Options:
19
21
  * ctxDir, projectDir, agentsDir, streaming, timeout, config
20
22
  *
21
23
  * Returns:
22
- * { passed: boolean, stage1: {...}, stage2: {...}, cycle: number, feedback: string|null }
24
+ * { passed, stage1, stage2, stage3, cycle, feedback, escalated }
23
25
  */
24
26
  export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming = true, timeout = 300000, config = {} }) {
25
27
  const state = readState(ctxDir);
@@ -49,19 +51,49 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
49
51
  ctxDir, projectDir, agentsDir, streaming, timeout,
50
52
  });
51
53
 
52
- // Stage 2: Code quality (auditor) — only if Stage 1 passes
54
+ // Stage 2: Code quality — only if Stage 1 passes. Reuses ctx-reviewer with quality framing;
55
+ // ctx-auditor is an audit-trail agent, not a code reviewer, so using it here was a miscast.
53
56
  let stage2 = null;
54
57
  if (stage1.passed) {
55
58
  stage2 = await runReviewStage({
56
59
  stageName: 'code-quality',
57
- agentFile: 'ctx-auditor.md',
58
- agentCommand: 'audit',
60
+ agentFile: 'ctx-reviewer.md',
61
+ agentCommand: 'review',
59
62
  prompt: buildReviewPrompt(state, 'quality'),
60
63
  ctxDir, projectDir, agentsDir, streaming, timeout,
61
64
  });
62
65
  }
63
66
 
64
- const passed = stage1.passed && (stage2 ? stage2.passed : false);
67
+ // Stage 3: Cross-model review via Codex only if Stages 1 and 2 pass and not disabled.
68
+ // The agent may return VERDICT: SKIP (trivial changes, MCP unavailable, rate-limited);
69
+ // SKIP is treated as pass-through so infrastructure issues never block the gate.
70
+ // Across retry cycles we pipe the prior Codex threadId forward so the agent can
71
+ // reuse the cheaper codex-reply path instead of starting a fresh session.
72
+ let stage3 = null;
73
+ if (stage1.passed && stage2 && stage2.passed && config.codexReview !== false) {
74
+ const priorThreadId = priorCodexThreadId(reviewState);
75
+ stage3 = await runReviewStage({
76
+ stageName: 'codex-review',
77
+ agentFile: 'ctx-codex-reviewer.md',
78
+ agentCommand: 'review',
79
+ prompt: buildReviewPrompt(state, 'codex', { priorThreadId }),
80
+ ctxDir, projectDir, agentsDir, streaming, timeout,
81
+ });
82
+ const { skipped, threadId } = parseStage3Markers(stage3.output);
83
+ stage3.threadId = threadId;
84
+ if (skipped) {
85
+ stage3.passed = true;
86
+ stage3.skipped = true;
87
+ stage3.issues = null;
88
+ }
89
+ }
90
+
91
+ // stage2 defaults to false when null (stage1 failed → never ran → not passed).
92
+ // stage3 defaults to true when null (disabled or earlier stage failed → absence is not a fail).
93
+ const passed =
94
+ stage1.passed &&
95
+ (stage2 ? stage2.passed : false) &&
96
+ (stage3 ? stage3.passed : true);
65
97
 
66
98
  // Build feedback for re-execution if failed
67
99
  let feedback = null;
@@ -69,15 +101,26 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
69
101
  const issues = [];
70
102
  if (!stage1.passed) issues.push(`Spec compliance: ${stage1.issues}`);
71
103
  if (stage2 && !stage2.passed) issues.push(`Code quality: ${stage2.issues}`);
104
+ if (stage3 && !stage3.passed) issues.push(`Codex review: ${stage3.issues}`);
72
105
  feedback = issues.join('\n');
73
106
  }
74
107
 
108
+ const stage3History = stage3
109
+ ? {
110
+ passed: stage3.passed,
111
+ issues: stage3.issues,
112
+ skipped: stage3.skipped || false,
113
+ threadId: stage3.threadId || null,
114
+ }
115
+ : null;
116
+
75
117
  // Record in state
76
118
  reviewState.history.push({
77
119
  cycle: reviewState.cycle,
78
120
  timestamp: new Date().toISOString(),
79
121
  stage1: { passed: stage1.passed, issues: stage1.issues },
80
122
  stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
123
+ stage3: stage3History,
81
124
  result: passed ? 'pass' : 'fail',
82
125
  });
83
126
 
@@ -91,6 +134,7 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
91
134
  passed,
92
135
  stage1: { passed: stage1.passed, issues: stage1.issues },
93
136
  stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
137
+ stage3: stage3History,
94
138
  cycle: reviewState.cycle,
95
139
  feedback,
96
140
  escalated: false,
@@ -104,6 +148,22 @@ export function isReviewGateEnabled(config) {
104
148
  return config.reviewGate !== false;
105
149
  }
106
150
 
151
+ /**
152
+ * Parse Stage 3 output markers.
153
+ * - `skipped` is true when the agent emitted `VERDICT: SKIP` (trivial change,
154
+ * MCP unavailable, auth expired, rate-limited).
155
+ * - `threadId` is the value after `THREAD: <id>`, used to resume cheaper
156
+ * `codex-reply` sessions across review cycles.
157
+ *
158
+ * Exported for unit testing; consumed by runReviewGate internally.
159
+ */
160
+ export function parseStage3Markers(output) {
161
+ const text = output || '';
162
+ const skipped = /verdict:\s*skip/i.test(text);
163
+ const threadMatch = /THREAD:\s*([^\s]+)/i.exec(text);
164
+ return { skipped, threadId: threadMatch ? threadMatch[1] : null };
165
+ }
166
+
107
167
  /**
108
168
  * Get review history from state.
109
169
  */
@@ -130,6 +190,14 @@ export function formatReviewResult(result) {
130
190
  const s2Icon = result.stage2.passed ? '✓' : '✗';
131
191
  lines.push(` ${s2Icon} Stage 2 (code quality): ${result.stage2.passed ? 'pass' : result.stage2.issues || 'fail'}`);
132
192
  }
193
+ if (result.stage3) {
194
+ if (result.stage3.skipped) {
195
+ lines.push(` ○ Stage 3 (codex review): skipped`);
196
+ } else {
197
+ const s3Icon = result.stage3.passed ? '✓' : '✗';
198
+ lines.push(` ${s3Icon} Stage 3 (codex review): ${result.stage3.passed ? 'pass' : result.stage3.issues || 'fail'}`);
199
+ }
200
+ }
133
201
 
134
202
  if (result.escalated) {
135
203
  lines.push('');
@@ -182,7 +250,17 @@ async function runReviewStage({ stageName, agentFile, agentCommand, prompt, ctxD
182
250
  }
183
251
  }
184
252
 
185
- function buildReviewPrompt(state, type) {
253
+ function priorCodexThreadId(reviewState) {
254
+ const hist = reviewState?.history;
255
+ if (!Array.isArray(hist)) return null;
256
+ for (let i = hist.length - 1; i >= 0; i--) {
257
+ const tid = hist[i]?.stage3?.threadId;
258
+ if (tid) return tid;
259
+ }
260
+ return null;
261
+ }
262
+
263
+ function buildReviewPrompt(state, type, opts = {}) {
186
264
  if (type === 'spec') {
187
265
  return [
188
266
  'Review the recent code changes for SPEC COMPLIANCE.',
@@ -197,6 +275,22 @@ function buildReviewPrompt(state, type) {
197
275
  ].join('\n');
198
276
  }
199
277
 
278
+ if (type === 'codex') {
279
+ const lines = [
280
+ 'Stage 3 — cross-model review via OpenAI Codex.',
281
+ 'Stages 1 (spec) and 2 (quality) already passed under Claude review.',
282
+ `Active story: ${state.activeStory || 'unknown'}`,
283
+ ];
284
+ if (opts.priorThreadId) {
285
+ lines.push(`Prior Codex thread: ${opts.priorThreadId} — reuse via mcp__codex__codex-reply if context is still relevant.`);
286
+ }
287
+ lines.push(
288
+ '',
289
+ 'Run your playbook and output VERDICT: PASS | FAIL | SKIP on the final line. Append `THREAD: <id>` if a new thread was opened.',
290
+ );
291
+ return lines.join('\n');
292
+ }
293
+
200
294
  return [
201
295
  'Review the recent code changes for CODE QUALITY.',
202
296
  'Check: security vulnerabilities, performance issues, code style, error handling.',
@@ -91,6 +91,9 @@
91
91
 
92
92
  "activeProfile": "balanced",
93
93
 
94
+ "reviewGate": true,
95
+ "codexReview": true,
96
+
94
97
  "git": {
95
98
  "autoCommit": true,
96
99
  "commitPerTask": true,