cc-reviewer 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@ Call `ask_codex` with:
21
21
  "taskType": "<infer from request: plan|debug|explain|question|fix|explore|general>",
22
22
  "relevantFiles": ["<files related to the question>"],
23
23
  "context": "<any error messages or prior analysis>",
24
- "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'priority'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit>"
24
+ "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'fast'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit>"
25
25
  }
26
26
  ```
27
27
 
@@ -20,7 +20,7 @@ Call `ask_multi` with:
20
20
  "taskType": "<infer from request: plan|debug|explain|question|fix|explore|general>",
21
21
  "relevantFiles": ["<files related to the question>"],
22
22
  "context": "<any error messages or prior analysis>",
23
- "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'priority'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit. Applies to Codex only.>"
23
+ "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'fast'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit. Applies to Codex only.>"
24
24
  }
25
25
  ```
26
26
 
@@ -18,7 +18,7 @@ Use the `codex_review` MCP tool with `reasoningEffort: "xhigh"` for deeper analy
18
18
  - `workingDir`: current working directory
19
19
  - `ccOutput`: brief summary of recent changes or context
20
20
  - `reasoningEffort`: "xhigh" (this is the key difference from /codex)
21
- - `serviceTier`: if user says "fast mode"/"fast"/"priority" → "priority"; if "flex"/"cheap"/"budget" → "flex"; otherwise omit
21
+ - `serviceTier`: if user says "fast mode"/"fast"/"priority" → "fast"; if "flex"/"cheap"/"budget" → "flex"; otherwise omit
22
22
  - `focus`: extracted from $ARGUMENTS if it's a known focus area
23
23
  - `customInstructions`: $ARGUMENTS if it's custom text
24
24
 
package/commands/codex.md CHANGED
@@ -49,7 +49,7 @@ Call `codex_review` with:
49
49
  ```
50
50
 
51
51
  ### Service Tier (from $ARGUMENTS)
52
- - If user says "fast mode", "fast", or "priority" → set `serviceTier: "priority"` (faster, ~2x cost)
52
+ - If user says "fast mode", "fast", or "priority" → set `serviceTier: "fast"` (priority processing, ~2x cost)
53
53
  - If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"` (50% cheaper, slower)
54
54
  - Otherwise → omit `serviceTier` (uses default tier)
55
55
 
package/commands/multi.md CHANGED
@@ -47,7 +47,7 @@ Call `multi_review` with:
47
47
  ```
48
48
 
49
49
  ### Service Tier (from $ARGUMENTS, applies to Codex only)
50
- - If user says "fast mode", "fast", or "priority" → set `serviceTier: "priority"`
50
+ - If user says "fast mode", "fast", or "priority" → set `serviceTier: "fast"`
51
51
  - If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"`
52
52
  - Otherwise → omit `serviceTier`
53
53
 
@@ -42,9 +42,19 @@ export interface ReviewRequest {
42
42
  reasoningEffort?: ReasoningEffort;
43
43
  /** Service tier (for models that support it: priority = fast, flex = cheap) */
44
44
  serviceTier?: ServiceTier;
45
- /** Expert role configuration (optional override) */
46
- expertRole?: ExpertRole;
47
45
  }
46
+ /** @deprecated Use handoff.ts roles instead */
47
+ export interface ExpertRole {
48
+ name: string;
49
+ description: string;
50
+ systemPrompt: string;
51
+ focusAreas: FocusArea[];
52
+ evaluationCriteria: string[];
53
+ }
54
+ /** @deprecated Use handoff.ts selectRole() instead */
55
+ export declare const EXPERT_ROLES: Record<string, ExpertRole>;
56
+ /** @deprecated Use handoff.ts selectRole() instead */
57
+ export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
48
58
  export interface PeerRequest {
49
59
  /** Working directory containing the code */
50
60
  workingDir: string;
@@ -65,18 +75,6 @@ export interface PeerRequest {
65
75
  /** Service tier (for models that support it: priority = fast, flex = cheap) */
66
76
  serviceTier?: ServiceTier;
67
77
  }
68
- export interface ExpertRole {
69
- name: string;
70
- description: string;
71
- systemPrompt: string;
72
- focusAreas: FocusArea[];
73
- evaluationCriteria: string[];
74
- }
75
- export declare const EXPERT_ROLES: Record<string, ExpertRole>;
76
- /**
77
- * Select the best expert role based on requested focus areas
78
- */
79
- export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
80
78
  export interface ReviewSuccess {
81
79
  success: true;
82
80
  output: ReviewOutput;
@@ -5,180 +5,47 @@
5
5
  * Makes it easy to add new models (Ollama, Azure, etc.) without
6
6
  * changing the core orchestration logic.
7
7
  */
8
+ /** @deprecated Use handoff.ts selectRole() instead */
8
9
  export const EXPERT_ROLES = {
9
10
  security_auditor: {
10
- name: 'Security Auditor',
11
- description: 'Specializes in security vulnerabilities and secure coding practices',
12
- systemPrompt: `You are a senior security auditor with expertise in:
13
- - OWASP Top 10 vulnerabilities (injection, broken auth, XSS, CSRF, etc.)
14
- - Authentication and authorization flaws
15
- - Input validation and sanitization
16
- - Cryptographic weaknesses and misuse
17
- - Sensitive data exposure
18
- - Security misconfigurations
19
- - Dependency vulnerabilities
20
-
21
- When reviewing code:
22
- 1. Identify specific vulnerability patterns with CWE IDs when applicable
23
- 2. Rate severity using CVSS-like scoring (critical/high/medium/low/info)
24
- 3. Provide concrete proof-of-concept or attack scenarios
25
- 4. Suggest specific remediations with code examples
26
- 5. Note any security best practices being followed (to validate CC's work)`,
27
- focusAreas: ['security'],
28
- evaluationCriteria: [
29
- 'SQL/NoSQL injection vectors',
30
- 'XSS (stored, reflected, DOM)',
31
- 'Authentication bypass',
32
- 'Authorization flaws (IDOR, privilege escalation)',
33
- 'Insecure deserialization',
34
- 'SSRF vulnerabilities',
35
- 'Path traversal',
36
- 'Command injection',
37
- 'Secrets in code',
38
- 'Insecure dependencies',
39
- ],
11
+ name: 'Security Auditor', description: 'Security vulnerabilities',
12
+ systemPrompt: 'Security auditor. Focus on injection, auth bypass, data exposure, input validation.',
13
+ focusAreas: ['security'], evaluationCriteria: ['Injection', 'Auth', 'Data exposure'],
40
14
  },
41
15
  performance_engineer: {
42
- name: 'Performance Engineer',
43
- description: 'Specializes in performance optimization and efficiency',
44
- systemPrompt: `You are a senior performance engineer with expertise in:
45
- - Algorithm complexity analysis (Big-O notation)
46
- - Memory management and leak detection
47
- - Database query optimization
48
- - Caching strategies
49
- - Concurrency and parallelism
50
- - I/O optimization
51
- - Bundle size and load time optimization
52
-
53
- When reviewing code:
54
- 1. Analyze algorithmic complexity with Big-O notation
55
- 2. Identify memory leaks, unnecessary allocations, or retention issues
56
- 3. Spot N+1 query problems and suggest batching/caching
57
- 4. Recommend specific optimizations with expected improvements
58
- 5. Validate any performance claims from CC with analysis`,
59
- focusAreas: ['performance', 'scalability'],
60
- evaluationCriteria: [
61
- 'Time complexity',
62
- 'Space complexity',
63
- 'Memory leaks',
64
- 'Unnecessary re-renders',
65
- 'N+1 queries',
66
- 'Missing indexes',
67
- 'Inefficient loops',
68
- 'Blocking operations',
69
- 'Cache invalidation',
70
- 'Resource pooling',
71
- ],
16
+ name: 'Performance Engineer', description: 'Performance optimization',
17
+ systemPrompt: 'Performance engineer. Focus on complexity, N+1 queries, memory leaks.',
18
+ focusAreas: ['performance', 'scalability'], evaluationCriteria: ['Complexity', 'Memory', 'I/O'],
72
19
  },
73
20
  architect: {
74
- name: 'Software Architect',
75
- description: 'Specializes in design patterns, architecture, and maintainability',
76
- systemPrompt: `You are a senior software architect with expertise in:
77
- - Design patterns (GoF, enterprise patterns)
78
- - SOLID principles
79
- - Clean architecture and DDD
80
- - API design and contracts
81
- - Dependency management
82
- - Code organization and modularity
83
- - Technical debt assessment
84
-
85
- When reviewing code:
86
- 1. Evaluate adherence to design patterns and principles
87
- 2. Identify coupling issues and suggest decoupling strategies
88
- 3. Assess abstraction levels and cohesion
89
- 4. Recommend refactoring opportunities with specific patterns
90
- 5. Evaluate API design for consistency and usability`,
91
- focusAreas: ['architecture', 'maintainability'],
92
- evaluationCriteria: [
93
- 'Single responsibility',
94
- 'Open/closed principle',
95
- 'Liskov substitution',
96
- 'Interface segregation',
97
- 'Dependency inversion',
98
- 'Coupling and cohesion',
99
- 'Abstraction levels',
100
- 'Error handling patterns',
101
- 'API consistency',
102
- 'Technical debt indicators',
103
- ],
21
+ name: 'Software Architect', description: 'Architecture and design',
22
+ systemPrompt: 'Software architect. Focus on SOLID, coupling, abstractions.',
23
+ focusAreas: ['architecture', 'maintainability'], evaluationCriteria: ['SOLID', 'Coupling', 'Patterns'],
104
24
  },
105
25
  correctness_analyst: {
106
- name: 'Correctness Analyst',
107
- description: 'Specializes in logic errors, edge cases, and bugs',
108
- systemPrompt: `You are a meticulous code analyst focused on correctness:
109
- - Logic errors and off-by-one mistakes
110
- - Edge cases and boundary conditions
111
- - Null/undefined handling
112
- - Type safety issues
113
- - Race conditions and concurrency bugs
114
- - Error handling completeness
115
- - State management issues
116
-
117
- When reviewing code:
118
- 1. Trace execution paths looking for logic errors
119
- 2. Identify missing edge case handling
120
- 3. Spot potential null pointer/undefined errors
121
- 4. Check for race conditions in async code
122
- 5. Verify error handling covers failure modes`,
123
- focusAreas: ['correctness', 'testing'],
124
- evaluationCriteria: [
125
- 'Off-by-one errors',
126
- 'Null/undefined safety',
127
- 'Boundary conditions',
128
- 'Integer overflow',
129
- 'Floating point precision',
130
- 'Race conditions',
131
- 'Deadlocks',
132
- 'Exception handling',
133
- 'State consistency',
134
- 'Test coverage gaps',
135
- ],
26
+ name: 'Correctness Analyst', description: 'Logic errors and bugs',
27
+ systemPrompt: 'Correctness analyst. Focus on logic errors, edge cases, race conditions.',
28
+ focusAreas: ['correctness', 'testing'], evaluationCriteria: ['Logic', 'Edge cases', 'Concurrency'],
136
29
  },
137
30
  general_reviewer: {
138
- name: 'General Reviewer',
139
- description: 'Balanced review across all areas',
140
- systemPrompt: `You are a senior software engineer conducting a thorough code review.
141
- Review the code across multiple dimensions:
142
- - Correctness: Logic errors, edge cases, bugs
143
- - Security: Vulnerabilities, input validation
144
- - Performance: Efficiency, complexity
145
- - Maintainability: Readability, patterns, documentation
146
-
147
- Prioritize findings by impact and likelihood. Be specific with file paths
148
- and line numbers. Provide actionable suggestions.`,
31
+ name: 'General Reviewer', description: 'Balanced review',
32
+ systemPrompt: 'Senior engineer. Review correctness, security, performance, maintainability.',
149
33
  focusAreas: ['security', 'performance', 'architecture', 'correctness', 'maintainability'],
150
- evaluationCriteria: [
151
- 'Logic correctness',
152
- 'Security vulnerabilities',
153
- 'Performance issues',
154
- 'Code quality',
155
- 'Documentation',
156
- ],
34
+ evaluationCriteria: ['Correctness', 'Security', 'Performance', 'Quality'],
157
35
  },
158
36
  };
159
- /**
160
- * Select the best expert role based on requested focus areas
161
- */
37
+ /** @deprecated Use handoff.ts selectRole() instead */
162
38
  export function selectExpertRole(focusAreas) {
163
- if (!focusAreas || focusAreas.length === 0) {
39
+ if (!focusAreas || focusAreas.length === 0)
164
40
  return EXPERT_ROLES.general_reviewer;
165
- }
166
- // Prioritize security if it's in the list
167
- if (focusAreas.includes('security')) {
41
+ if (focusAreas.includes('security'))
168
42
  return EXPERT_ROLES.security_auditor;
169
- }
170
- // Check for performance/scalability
171
- if (focusAreas.includes('performance') || focusAreas.includes('scalability')) {
43
+ if (focusAreas.includes('performance') || focusAreas.includes('scalability'))
172
44
  return EXPERT_ROLES.performance_engineer;
173
- }
174
- // Check for architecture/maintainability
175
- if (focusAreas.includes('architecture') || focusAreas.includes('maintainability')) {
45
+ if (focusAreas.includes('architecture') || focusAreas.includes('maintainability'))
176
46
  return EXPERT_ROLES.architect;
177
- }
178
- // Check for correctness/testing
179
- if (focusAreas.includes('correctness') || focusAreas.includes('testing')) {
47
+ if (focusAreas.includes('correctness') || focusAreas.includes('testing'))
180
48
  return EXPERT_ROLES.correctness_analyst;
181
- }
182
49
  return EXPERT_ROLES.general_reviewer;
183
50
  }
184
51
  // =============================================================================
@@ -9,13 +9,19 @@ import { existsSync, writeFileSync, unlinkSync, mkdtempSync } from 'fs';
9
9
  import { tmpdir } from 'os';
10
10
  import { join } from 'path';
11
11
  import { registerAdapter, } from './base.js';
12
- import { parseReviewOutput, parseLegacyMarkdownOutput, getReviewOutputJsonSchema, getPeerOutputJsonSchema, parsePeerOutput } from '../schema.js';
12
+ import { parseReviewOutput, parseLegacyMarkdownOutput, getReviewOutputJsonSchema, getPeerOutputJsonSchema, parsePeerOutput, isSubstantiveReview } from '../schema.js';
13
+ import { CliExecutor } from '../executor.js';
14
+ import { CodexEventDecoder } from '../decoders/index.js';
13
15
  import { buildSimpleHandoff, buildHandoffPrompt, buildPeerPrompt, selectRole, } from '../handoff.js';
14
16
  // =============================================================================
15
17
  // CONFIGURATION
16
18
  // =============================================================================
17
- const INACTIVITY_TIMEOUT_MS = 120000; // 2 min of no output = timeout
18
- const MAX_TIMEOUT_MS = 3600000; // 60 min absolute max
19
+ const COLD_START_TIMEOUT_MS = {
20
+ high: 180_000, // 3 min waiting for first JSONL event
21
+ xhigh: 300_000, // 5 min — xhigh thinks longer before first event
22
+ };
23
+ const STREAMING_TIMEOUT_MS = 90_000; // 90s — if events stop mid-stream
24
+ const MAX_TIMEOUT_MS = 3_600_000; // 60 min absolute max
19
25
  const MAX_RETRIES = 2;
20
26
  const MAX_BUFFER_SIZE = 1024 * 1024; // 1MB max buffer
21
27
  // =============================================================================
@@ -139,23 +145,14 @@ export class CodexAdapter {
139
145
  executionTimeMs: Date.now() - startTime,
140
146
  };
141
147
  }
142
- // Check for empty/minimal data on any parse path
143
- // A valid review may have findings, agreements, disagreements, alternatives,
144
- // or a non-default risk assessment. Only retry if truly empty across all fields.
145
- const hasMinimalData = output.findings.length === 0 &&
146
- output.agreements.length === 0 &&
147
- output.disagreements.length === 0 &&
148
- output.alternatives.length === 0 &&
149
- output.risk_assessment.overall_level === 'medium' &&
150
- output.risk_assessment.score === 50;
151
- if (hasMinimalData) {
148
+ // Check for empty/minimal output centralized substance check
149
+ if (!isSubstantiveReview(output)) {
152
150
  if (attempt < MAX_RETRIES) {
153
151
  console.error(`[codex] Received empty output, retrying...`);
154
152
  return this.runWithRetry(request, attempt + 1, startTime, usedFallback
155
153
  ? 'Received markdown output instead of JSON. Please provide valid JSON output.'
156
- : 'Output contained no findings, agreements, or disagreements. Please provide substantive review.', result.stdout);
154
+ : 'Output contained no substantive review content. Please provide findings or analysis.', result.stdout);
157
155
  }
158
- // Final attempt with no data — report failure
159
156
  return {
160
157
  success: false,
161
158
  error: {
@@ -312,138 +309,83 @@ export class CodexAdapter {
312
309
  executionTimeMs: Date.now() - startTime };
313
310
  }
314
311
  }
315
- runCli(prompt, workingDir, reasoningEffort, schemaGetter, serviceTier) {
316
- return new Promise((resolve, reject) => {
317
- // Create temp schema file for structured output
318
- let schemaFile = null;
319
- try {
320
- const tempDir = mkdtempSync(join(tmpdir(), 'codex-schema-'));
321
- schemaFile = join(tempDir, 'schema.json');
322
- const schema = schemaGetter();
323
- writeFileSync(schemaFile, JSON.stringify(schema, null, 2), 'utf-8');
324
- }
325
- catch (err) {
326
- console.error('[codex] Warning: Failed to create schema file, continuing without structured output:', err);
327
- schemaFile = null;
328
- }
329
- const args = [
330
- 'exec',
331
- '-m', 'gpt-5.4',
332
- '-c', `model_reasoning_effort=${reasoningEffort}`,
333
- '-c', 'model_reasoning_summary_format=experimental',
334
- '--dangerously-bypass-approvals-and-sandbox',
335
- '--skip-git-repo-check',
336
- '-C', workingDir,
337
- ];
338
- // Add service tier if specified (priority = fast mode, flex = cheap mode)
339
- if (serviceTier && serviceTier !== 'default') {
340
- args.push('-c', `service_tier=${serviceTier}`);
341
- }
342
- // Add schema enforcement if available
343
- if (schemaFile) {
344
- args.push('--output-schema', schemaFile);
345
- }
346
- // Use '-' to read prompt from stdin — more stable for complex prompts
347
- // with newlines, backticks, JSON templates, etc.
348
- args.push('-');
349
- const proc = spawn('codex', args, {
350
- cwd: workingDir,
351
- stdio: ['pipe', 'pipe', 'pipe'], // stdin is pipe for prompt delivery
352
- env: { ...process.env }
353
- });
354
- // Guard against EPIPE if the child exits before consuming stdin.
355
- // Log but don't reject — let the `close` handler capture the real exit code.
356
- proc.stdin.on('error', (err) => {
357
- console.error(`[codex] stdin error (likely EPIPE): ${err.message}`);
358
- });
359
- // Deliver prompt via stdin
360
- proc.stdin.write(prompt);
361
- proc.stdin.end();
362
- let stdout = '';
363
- let stderr = '';
364
- let truncated = false;
365
- let inactivityTimer;
366
- const cliStartTime = Date.now();
367
- let lastProgressTime = cliStartTime;
368
- let dataChunks = 0;
369
- // Show initial progress message
370
- const tierLabel = serviceTier && serviceTier !== 'default' ? ` [${serviceTier}]` : '';
371
- console.error(`[codex] Running review with ${reasoningEffort} reasoning${tierLabel}...`);
372
- const maxTimer = setTimeout(() => {
373
- proc.kill('SIGTERM');
374
- reject(new Error('MAX_TIMEOUT'));
375
- }, MAX_TIMEOUT_MS);
376
- const resetInactivityTimer = () => {
377
- clearTimeout(inactivityTimer);
378
- inactivityTimer = setTimeout(() => {
379
- proc.kill('SIGTERM');
380
- reject(new Error('TIMEOUT'));
381
- }, INACTIVITY_TIMEOUT_MS);
382
- };
383
- resetInactivityTimer();
384
- proc.stdout.on('data', (data) => {
385
- resetInactivityTimer();
386
- dataChunks++;
387
- // Show progress dot every 5 chunks
388
- if (dataChunks % 5 === 0) {
389
- process.stderr.write('.');
390
- }
391
- // Show elapsed time every 10 seconds
392
- const now = Date.now();
393
- if (now - lastProgressTime > 10000) {
394
- const elapsed = Math.round((now - cliStartTime) / 1000);
395
- console.error(` [${elapsed}s]`);
396
- lastProgressTime = now;
397
- }
398
- if (stdout.length < MAX_BUFFER_SIZE) {
399
- stdout += data.toString();
400
- if (stdout.length > MAX_BUFFER_SIZE) {
401
- stdout = stdout.slice(0, MAX_BUFFER_SIZE);
402
- truncated = true;
403
- }
404
- }
405
- });
406
- proc.stderr.on('data', (data) => {
407
- resetInactivityTimer();
408
- if (stderr.length < MAX_BUFFER_SIZE) {
409
- stderr += data.toString();
410
- if (stderr.length > MAX_BUFFER_SIZE) {
411
- stderr = stderr.slice(0, MAX_BUFFER_SIZE);
412
- }
413
- }
414
- });
415
- proc.on('close', (code) => {
416
- clearTimeout(inactivityTimer);
417
- clearTimeout(maxTimer);
418
- const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
419
- console.error(` ✓ [${elapsed}s]`);
420
- // Cleanup temp schema file
421
- if (schemaFile) {
422
- try {
423
- unlinkSync(schemaFile);
424
- }
425
- catch {
426
- // Ignore cleanup errors
427
- }
428
- }
429
- resolve({ stdout, stderr, exitCode: code ?? -1, truncated });
430
- });
431
- proc.on('error', (err) => {
432
- clearTimeout(inactivityTimer);
433
- clearTimeout(maxTimer);
434
- console.error(' ✗');
435
- // Cleanup temp schema file
436
- if (schemaFile) {
437
- try {
438
- unlinkSync(schemaFile);
439
- }
440
- catch {
441
- // Ignore cleanup errors
442
- }
312
+ async runCli(prompt, workingDir, reasoningEffort, schemaGetter, serviceTier) {
313
+ // Create temp schema file for structured output
314
+ let schemaFile = null;
315
+ try {
316
+ const tempDir = mkdtempSync(join(tmpdir(), 'codex-schema-'));
317
+ schemaFile = join(tempDir, 'schema.json');
318
+ const schema = schemaGetter();
319
+ writeFileSync(schemaFile, JSON.stringify(schema, null, 2), 'utf-8');
320
+ }
321
+ catch (err) {
322
+ console.error('[codex] Warning: Failed to create schema file:', err);
323
+ schemaFile = null;
324
+ }
325
+ const args = [
326
+ 'exec',
327
+ '--json', // JSONL streaming events
328
+ '-m', 'gpt-5.4',
329
+ '-c', `model_reasoning_effort=${reasoningEffort}`,
330
+ '-c', 'model_reasoning_summary_format=experimental',
331
+ '--dangerously-bypass-approvals-and-sandbox',
332
+ '--skip-git-repo-check',
333
+ '-C', workingDir,
334
+ ];
335
+ if (serviceTier && serviceTier !== 'default') {
336
+ args.push('-c', `service_tier=${serviceTier}`);
337
+ }
338
+ if (schemaFile) {
339
+ args.push('--output-schema', schemaFile);
340
+ }
341
+ args.push('-'); // Read prompt from stdin
342
+ const decoder = new CodexEventDecoder();
343
+ const cliStartTime = Date.now();
344
+ let firstEventReceived = false;
345
+ const tierLabel = serviceTier && serviceTier !== 'default' ? ` [${serviceTier}]` : '';
346
+ console.error(`[codex] Running review with ${reasoningEffort} reasoning${tierLabel}...`);
347
+ decoder.onProgress = (eventType, detail) => {
348
+ const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
349
+ const detailStr = detail ? ` — ${detail}` : '';
350
+ console.error(`[codex] ${eventType}${detailStr} (${elapsed}s)`);
351
+ };
352
+ const executor = new CliExecutor({
353
+ command: 'codex',
354
+ args,
355
+ cwd: workingDir,
356
+ stdin: prompt,
357
+ inactivityTimeoutMs: COLD_START_TIMEOUT_MS[reasoningEffort] || COLD_START_TIMEOUT_MS.high,
358
+ maxTimeoutMs: MAX_TIMEOUT_MS,
359
+ maxBufferSize: MAX_BUFFER_SIZE,
360
+ onLine: (line) => {
361
+ decoder.processLine(line);
362
+ // Phase transition: tighten timeout after first event
363
+ if (!firstEventReceived) {
364
+ firstEventReceived = true;
365
+ executor.setInactivityTimeout(STREAMING_TIMEOUT_MS);
443
366
  }
444
- reject(err);
445
- });
367
+ },
446
368
  });
369
+ try {
370
+ const result = await executor.run();
371
+ const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
372
+ console.error(`[codex] ✓ complete (${elapsed}s)`);
373
+ const finalResponse = decoder.getFinalResponse();
374
+ return {
375
+ stdout: finalResponse || result.rawStdout,
376
+ stderr: result.stderr,
377
+ exitCode: result.exitCode,
378
+ truncated: result.truncated,
379
+ };
380
+ }
381
+ finally {
382
+ if (schemaFile) {
383
+ try {
384
+ unlinkSync(schemaFile);
385
+ }
386
+ catch { /* ignore */ }
387
+ }
388
+ }
447
389
  }
448
390
  categorizeError(stderr) {
449
391
  const lower = stderr.toLowerCase();