ctx-cc 4.0.0 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +378 -424
- package/agents/ctx-arch-mapper.md +1 -1
- package/agents/ctx-auditor.md +1 -1
- package/agents/ctx-codex-reviewer.md +214 -0
- package/agents/ctx-concerns-mapper.md +1 -1
- package/agents/ctx-criteria-suggester.md +2 -2
- package/agents/ctx-debugger.md +1 -1
- package/agents/ctx-discusser.md +1 -1
- package/agents/ctx-executor.md +1 -1
- package/agents/ctx-handoff.md +2 -2
- package/agents/ctx-learner.md +1 -1
- package/agents/ctx-mapper.md +1 -1
- package/agents/ctx-parallelizer.md +1 -1
- package/agents/ctx-planner.md +1 -1
- package/agents/ctx-predictor.md +1 -1
- package/agents/ctx-quality-mapper.md +1 -1
- package/agents/ctx-researcher.md +1 -1
- package/agents/ctx-reviewer.md +2 -2
- package/agents/ctx-team-coordinator.md +1 -1
- package/agents/ctx-tech-mapper.md +1 -1
- package/agents/ctx-verifier.md +1 -1
- package/bin/ctx.js +33 -2
- package/commands/cross-review.md +142 -0
- package/commands/ctx.md +10 -10
- package/commands/help.md +6 -5
- package/commands/init.md +25 -0
- package/commands/metrics.md +1 -1
- package/commands/milestone.md +1 -1
- package/commands/monitor.md +1 -1
- package/commands/voice.md +1 -1
- package/hooks/pre-tool-use.js +2 -1
- package/package.json +2 -2
- package/plugin.json +2 -1
- package/skills/ctx-review-gate/SKILL.md +49 -13
- package/src/capabilities.js +97 -42
- package/src/install.js +10 -3
- package/src/review-gate.js +103 -9
- package/templates/config.json +3 -0
package/src/review-gate.js
CHANGED
|
@@ -7,19 +7,21 @@ import { runAgent } from './runner.js';
|
|
|
7
7
|
const MAX_REVIEW_CYCLES = 3;
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
|
-
* Run the
|
|
10
|
+
* Run the three-stage review gate.
|
|
11
11
|
*
|
|
12
12
|
* Stage 1: ctx-reviewer checks spec compliance (acceptance criteria)
|
|
13
|
-
* Stage 2: ctx-
|
|
13
|
+
* Stage 2: ctx-reviewer (quality framing) checks code quality (security, performance, style)
|
|
14
|
+
* Stage 3: ctx-codex-reviewer performs cross-model review via OpenAI Codex MCP
|
|
15
|
+
* (only runs if Stages 1 and 2 pass, and `config.codexReview !== false`).
|
|
14
16
|
*
|
|
15
|
-
* If
|
|
17
|
+
* If any stage fails, returns feedback for re-execution.
|
|
16
18
|
* Max cycles before requiring human intervention.
|
|
17
19
|
*
|
|
18
20
|
* Options:
|
|
19
21
|
* ctxDir, projectDir, agentsDir, streaming, timeout, config
|
|
20
22
|
*
|
|
21
23
|
* Returns:
|
|
22
|
-
* { passed
|
|
24
|
+
* { passed, stage1, stage2, stage3, cycle, feedback, escalated }
|
|
23
25
|
*/
|
|
24
26
|
export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming = true, timeout = 300000, config = {} }) {
|
|
25
27
|
const state = readState(ctxDir);
|
|
@@ -49,19 +51,49 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
|
|
|
49
51
|
ctxDir, projectDir, agentsDir, streaming, timeout,
|
|
50
52
|
});
|
|
51
53
|
|
|
52
|
-
// Stage 2: Code quality
|
|
54
|
+
// Stage 2: Code quality — only if Stage 1 passes. Reuses ctx-reviewer with quality framing;
|
|
55
|
+
// ctx-auditor is an audit-trail agent, not a code reviewer, so using it here was a miscast.
|
|
53
56
|
let stage2 = null;
|
|
54
57
|
if (stage1.passed) {
|
|
55
58
|
stage2 = await runReviewStage({
|
|
56
59
|
stageName: 'code-quality',
|
|
57
|
-
agentFile: 'ctx-
|
|
58
|
-
agentCommand: '
|
|
60
|
+
agentFile: 'ctx-reviewer.md',
|
|
61
|
+
agentCommand: 'review',
|
|
59
62
|
prompt: buildReviewPrompt(state, 'quality'),
|
|
60
63
|
ctxDir, projectDir, agentsDir, streaming, timeout,
|
|
61
64
|
});
|
|
62
65
|
}
|
|
63
66
|
|
|
64
|
-
|
|
67
|
+
// Stage 3: Cross-model review via Codex — only if Stages 1 and 2 pass and not disabled.
|
|
68
|
+
// The agent may return VERDICT: SKIP (trivial changes, MCP unavailable, rate-limited);
|
|
69
|
+
// SKIP is treated as pass-through so infrastructure issues never block the gate.
|
|
70
|
+
// Across retry cycles we pipe the prior Codex threadId forward so the agent can
|
|
71
|
+
// reuse the cheaper codex-reply path instead of starting a fresh session.
|
|
72
|
+
let stage3 = null;
|
|
73
|
+
if (stage1.passed && stage2 && stage2.passed && config.codexReview !== false) {
|
|
74
|
+
const priorThreadId = priorCodexThreadId(reviewState);
|
|
75
|
+
stage3 = await runReviewStage({
|
|
76
|
+
stageName: 'codex-review',
|
|
77
|
+
agentFile: 'ctx-codex-reviewer.md',
|
|
78
|
+
agentCommand: 'review',
|
|
79
|
+
prompt: buildReviewPrompt(state, 'codex', { priorThreadId }),
|
|
80
|
+
ctxDir, projectDir, agentsDir, streaming, timeout,
|
|
81
|
+
});
|
|
82
|
+
const { skipped, threadId } = parseStage3Markers(stage3.output);
|
|
83
|
+
stage3.threadId = threadId;
|
|
84
|
+
if (skipped) {
|
|
85
|
+
stage3.passed = true;
|
|
86
|
+
stage3.skipped = true;
|
|
87
|
+
stage3.issues = null;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// stage2 defaults to false when null (stage1 failed → never ran → not passed).
|
|
92
|
+
// stage3 defaults to true when null (disabled or earlier stage failed → absence is not a fail).
|
|
93
|
+
const passed =
|
|
94
|
+
stage1.passed &&
|
|
95
|
+
(stage2 ? stage2.passed : false) &&
|
|
96
|
+
(stage3 ? stage3.passed : true);
|
|
65
97
|
|
|
66
98
|
// Build feedback for re-execution if failed
|
|
67
99
|
let feedback = null;
|
|
@@ -69,15 +101,26 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
|
|
|
69
101
|
const issues = [];
|
|
70
102
|
if (!stage1.passed) issues.push(`Spec compliance: ${stage1.issues}`);
|
|
71
103
|
if (stage2 && !stage2.passed) issues.push(`Code quality: ${stage2.issues}`);
|
|
104
|
+
if (stage3 && !stage3.passed) issues.push(`Codex review: ${stage3.issues}`);
|
|
72
105
|
feedback = issues.join('\n');
|
|
73
106
|
}
|
|
74
107
|
|
|
108
|
+
const stage3History = stage3
|
|
109
|
+
? {
|
|
110
|
+
passed: stage3.passed,
|
|
111
|
+
issues: stage3.issues,
|
|
112
|
+
skipped: stage3.skipped || false,
|
|
113
|
+
threadId: stage3.threadId || null,
|
|
114
|
+
}
|
|
115
|
+
: null;
|
|
116
|
+
|
|
75
117
|
// Record in state
|
|
76
118
|
reviewState.history.push({
|
|
77
119
|
cycle: reviewState.cycle,
|
|
78
120
|
timestamp: new Date().toISOString(),
|
|
79
121
|
stage1: { passed: stage1.passed, issues: stage1.issues },
|
|
80
122
|
stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
|
|
123
|
+
stage3: stage3History,
|
|
81
124
|
result: passed ? 'pass' : 'fail',
|
|
82
125
|
});
|
|
83
126
|
|
|
@@ -91,6 +134,7 @@ export async function runReviewGate({ ctxDir, projectDir, agentsDir, streaming =
|
|
|
91
134
|
passed,
|
|
92
135
|
stage1: { passed: stage1.passed, issues: stage1.issues },
|
|
93
136
|
stage2: stage2 ? { passed: stage2.passed, issues: stage2.issues } : null,
|
|
137
|
+
stage3: stage3History,
|
|
94
138
|
cycle: reviewState.cycle,
|
|
95
139
|
feedback,
|
|
96
140
|
escalated: false,
|
|
@@ -104,6 +148,22 @@ export function isReviewGateEnabled(config) {
|
|
|
104
148
|
return config.reviewGate !== false;
|
|
105
149
|
}
|
|
106
150
|
|
|
151
|
+
/**
|
|
152
|
+
* Parse Stage 3 output markers.
|
|
153
|
+
* - `skipped` is true when the agent emitted `VERDICT: SKIP` (trivial change,
|
|
154
|
+
* MCP unavailable, auth expired, rate-limited).
|
|
155
|
+
* - `threadId` is the value after `THREAD: <id>`, used to resume cheaper
|
|
156
|
+
* `codex-reply` sessions across review cycles.
|
|
157
|
+
*
|
|
158
|
+
* Exported for unit testing; consumed by runReviewGate internally.
|
|
159
|
+
*/
|
|
160
|
+
export function parseStage3Markers(output) {
|
|
161
|
+
const text = output || '';
|
|
162
|
+
const skipped = /verdict:\s*skip/i.test(text);
|
|
163
|
+
const threadMatch = /THREAD:\s*([^\s]+)/i.exec(text);
|
|
164
|
+
return { skipped, threadId: threadMatch ? threadMatch[1] : null };
|
|
165
|
+
}
|
|
166
|
+
|
|
107
167
|
/**
|
|
108
168
|
* Get review history from state.
|
|
109
169
|
*/
|
|
@@ -130,6 +190,14 @@ export function formatReviewResult(result) {
|
|
|
130
190
|
const s2Icon = result.stage2.passed ? '✓' : '✗';
|
|
131
191
|
lines.push(` ${s2Icon} Stage 2 (code quality): ${result.stage2.passed ? 'pass' : result.stage2.issues || 'fail'}`);
|
|
132
192
|
}
|
|
193
|
+
if (result.stage3) {
|
|
194
|
+
if (result.stage3.skipped) {
|
|
195
|
+
lines.push(` ○ Stage 3 (codex review): skipped`);
|
|
196
|
+
} else {
|
|
197
|
+
const s3Icon = result.stage3.passed ? '✓' : '✗';
|
|
198
|
+
lines.push(` ${s3Icon} Stage 3 (codex review): ${result.stage3.passed ? 'pass' : result.stage3.issues || 'fail'}`);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
133
201
|
|
|
134
202
|
if (result.escalated) {
|
|
135
203
|
lines.push('');
|
|
@@ -182,7 +250,17 @@ async function runReviewStage({ stageName, agentFile, agentCommand, prompt, ctxD
|
|
|
182
250
|
}
|
|
183
251
|
}
|
|
184
252
|
|
|
185
|
-
function
|
|
253
|
+
function priorCodexThreadId(reviewState) {
|
|
254
|
+
const hist = reviewState?.history;
|
|
255
|
+
if (!Array.isArray(hist)) return null;
|
|
256
|
+
for (let i = hist.length - 1; i >= 0; i--) {
|
|
257
|
+
const tid = hist[i]?.stage3?.threadId;
|
|
258
|
+
if (tid) return tid;
|
|
259
|
+
}
|
|
260
|
+
return null;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function buildReviewPrompt(state, type, opts = {}) {
|
|
186
264
|
if (type === 'spec') {
|
|
187
265
|
return [
|
|
188
266
|
'Review the recent code changes for SPEC COMPLIANCE.',
|
|
@@ -197,6 +275,22 @@ function buildReviewPrompt(state, type) {
|
|
|
197
275
|
].join('\n');
|
|
198
276
|
}
|
|
199
277
|
|
|
278
|
+
if (type === 'codex') {
|
|
279
|
+
const lines = [
|
|
280
|
+
'Stage 3 — cross-model review via OpenAI Codex.',
|
|
281
|
+
'Stages 1 (spec) and 2 (quality) already passed under Claude review.',
|
|
282
|
+
`Active story: ${state.activeStory || 'unknown'}`,
|
|
283
|
+
];
|
|
284
|
+
if (opts.priorThreadId) {
|
|
285
|
+
lines.push(`Prior Codex thread: ${opts.priorThreadId} — reuse via mcp__codex__codex-reply if context is still relevant.`);
|
|
286
|
+
}
|
|
287
|
+
lines.push(
|
|
288
|
+
'',
|
|
289
|
+
'Run your playbook and output VERDICT: PASS | FAIL | SKIP on the final line. Append `THREAD: <id>` if a new thread was opened.',
|
|
290
|
+
);
|
|
291
|
+
return lines.join('\n');
|
|
292
|
+
}
|
|
293
|
+
|
|
200
294
|
return [
|
|
201
295
|
'Review the recent code changes for CODE QUALITY.',
|
|
202
296
|
'Check: security vulnerabilities, performance issues, code style, error handling.',
|