npm - cc-reviewer - Versions diffs - 1.9.0 → 2.0.0 - Mend

cc-reviewer 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/commands/ask-codex.md +1 -1
package/commands/ask-multi.md +1 -1
package/commands/codex-xhigh.md +1 -1
package/commands/codex.md +1 -1
package/commands/multi.md +1 -1
package/dist/adapters/base.d.ts +12 -14
package/dist/adapters/base.js +22 -155
package/dist/adapters/codex.js +87 -145
package/dist/adapters/gemini.js +49 -103
package/dist/decoders/codex.d.ts +60 -0
package/dist/decoders/codex.js +116 -0
package/dist/decoders/gemini.d.ts +33 -0
package/dist/decoders/gemini.js +58 -0
package/dist/decoders/index.d.ts +4 -0
package/dist/decoders/index.js +2 -0
package/dist/executor.d.ts +103 -0
package/dist/executor.js +244 -0
package/dist/handoff.d.ts +0 -1
package/dist/handoff.js +76 -398
package/dist/schema.d.ts +11 -6
package/dist/schema.js +22 -1
package/dist/tools/feedback.d.ts +3 -3
package/dist/tools/feedback.js +6 -9
package/dist/tools/peer.d.ts +1 -1
package/dist/tools/peer.js +4 -4
package/dist/types.d.ts +1 -1
package/package.json +1 -1

package/commands/ask-codex.md CHANGED Viewed

@@ -21,7 +21,7 @@ Call `ask_codex` with:
   "taskType": "<infer from request: plan|debug|explain|question|fix|explore|general>",
   "relevantFiles": ["<files related to the question>"],
   "context": "<any error messages or prior analysis>",
-  "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'priority'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit>"
+  "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'fast'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit>"
 }
 ```

package/commands/ask-multi.md CHANGED Viewed

@@ -20,7 +20,7 @@ Call `ask_multi` with:
   "taskType": "<infer from request: plan|debug|explain|question|fix|explore|general>",
   "relevantFiles": ["<files related to the question>"],
   "context": "<any error messages or prior analysis>",
-  "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'priority'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit. Applies to Codex only.>"
+  "serviceTier": "<if user says 'fast mode'/'fast'/'priority' → 'fast'; if 'flex'/'cheap'/'budget' → 'flex'; otherwise omit. Applies to Codex only.>"
 }
 ```

package/commands/codex-xhigh.md CHANGED Viewed

@@ -18,7 +18,7 @@ Use the `codex_review` MCP tool with `reasoningEffort: "xhigh"` for deeper analy
    - `workingDir`: current working directory
    - `ccOutput`: brief summary of recent changes or context
    - `reasoningEffort`: "xhigh" (this is the key difference from /codex)
-   - `serviceTier`: if user says "fast mode"/"fast"/"priority" → "priority"; if "flex"/"cheap"/"budget" → "flex"; otherwise omit
+   - `serviceTier`: if user says "fast mode"/"fast"/"priority" → "fast"; if "flex"/"cheap"/"budget" → "flex"; otherwise omit
    - `focus`: extracted from $ARGUMENTS if it's a known focus area
    - `customInstructions`: $ARGUMENTS if it's custom text

package/commands/codex.md CHANGED Viewed

@@ -49,7 +49,7 @@ Call `codex_review` with:
 ```
 ### Service Tier (from $ARGUMENTS)
-- If user says "fast mode", "fast", or "priority" → set `serviceTier: "priority"` (faster, ~2x cost)
+- If user says "fast mode", "fast", or "priority" → set `serviceTier: "fast"` (priority processing, ~2x cost)
 - If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"` (50% cheaper, slower)
 - Otherwise → omit `serviceTier` (uses default tier)

package/commands/multi.md CHANGED Viewed

@@ -47,7 +47,7 @@ Call `multi_review` with:
 ```
 ### Service Tier (from $ARGUMENTS, applies to Codex only)
-- If user says "fast mode", "fast", or "priority" → set `serviceTier: "priority"`
+- If user says "fast mode", "fast", or "priority" → set `serviceTier: "fast"`
 - If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"`
 - Otherwise → omit `serviceTier`

package/dist/adapters/base.d.ts CHANGED Viewed

@@ -42,9 +42,19 @@ export interface ReviewRequest {
     reasoningEffort?: ReasoningEffort;
     /** Service tier (for models that support it: priority = fast, flex = cheap) */
     serviceTier?: ServiceTier;
-    /** Expert role configuration (optional override) */
-    expertRole?: ExpertRole;
 }
+/** @deprecated Use handoff.ts roles instead */
+export interface ExpertRole {
+    name: string;
+    description: string;
+    systemPrompt: string;
+    focusAreas: FocusArea[];
+    evaluationCriteria: string[];
+}
+/** @deprecated Use handoff.ts selectRole() instead */
+export declare const EXPERT_ROLES: Record<string, ExpertRole>;
+/** @deprecated Use handoff.ts selectRole() instead */
+export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
 export interface PeerRequest {
     /** Working directory containing the code */
     workingDir: string;
@@ -65,18 +75,6 @@ export interface PeerRequest {
     /** Service tier (for models that support it: priority = fast, flex = cheap) */
     serviceTier?: ServiceTier;
 }
-export interface ExpertRole {
-    name: string;
-    description: string;
-    systemPrompt: string;
-    focusAreas: FocusArea[];
-    evaluationCriteria: string[];
-}
-export declare const EXPERT_ROLES: Record<string, ExpertRole>;
-/**
- * Select the best expert role based on requested focus areas
- */
-export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
 export interface ReviewSuccess {
     success: true;
     output: ReviewOutput;

package/dist/adapters/base.js CHANGED Viewed

@@ -5,180 +5,47 @@
  * Makes it easy to add new models (Ollama, Azure, etc.) without
  * changing the core orchestration logic.
  */
+/** @deprecated Use handoff.ts selectRole() instead */
 export const EXPERT_ROLES = {
     security_auditor: {
-        name: 'Security Auditor',
-        description: 'Specializes in security vulnerabilities and secure coding practices',
-        systemPrompt: `You are a senior security auditor with expertise in:
-- OWASP Top 10 vulnerabilities (injection, broken auth, XSS, CSRF, etc.)
-- Authentication and authorization flaws
-- Input validation and sanitization
-- Cryptographic weaknesses and misuse
-- Sensitive data exposure
-- Security misconfigurations
-- Dependency vulnerabilities
-When reviewing code:
-1. Identify specific vulnerability patterns with CWE IDs when applicable
-2. Rate severity using CVSS-like scoring (critical/high/medium/low/info)
-3. Provide concrete proof-of-concept or attack scenarios
-4. Suggest specific remediations with code examples
-5. Note any security best practices being followed (to validate CC's work)`,
-        focusAreas: ['security'],
-        evaluationCriteria: [
-            'SQL/NoSQL injection vectors',
-            'XSS (stored, reflected, DOM)',
-            'Authentication bypass',
-            'Authorization flaws (IDOR, privilege escalation)',
-            'Insecure deserialization',
-            'SSRF vulnerabilities',
-            'Path traversal',
-            'Command injection',
-            'Secrets in code',
-            'Insecure dependencies',
-        ],
+        name: 'Security Auditor', description: 'Security vulnerabilities',
+        systemPrompt: 'Security auditor. Focus on injection, auth bypass, data exposure, input validation.',
+        focusAreas: ['security'], evaluationCriteria: ['Injection', 'Auth', 'Data exposure'],
     },
     performance_engineer: {
-        name: 'Performance Engineer',
-        description: 'Specializes in performance optimization and efficiency',
-        systemPrompt: `You are a senior performance engineer with expertise in:
-- Algorithm complexity analysis (Big-O notation)
-- Memory management and leak detection
-- Database query optimization
-- Caching strategies
-- Concurrency and parallelism
-- I/O optimization
-- Bundle size and load time optimization
-When reviewing code:
-1. Analyze algorithmic complexity with Big-O notation
-2. Identify memory leaks, unnecessary allocations, or retention issues
-3. Spot N+1 query problems and suggest batching/caching
-4. Recommend specific optimizations with expected improvements
-5. Validate any performance claims from CC with analysis`,
-        focusAreas: ['performance', 'scalability'],
-        evaluationCriteria: [
-            'Time complexity',
-            'Space complexity',
-            'Memory leaks',
-            'Unnecessary re-renders',
-            'N+1 queries',
-            'Missing indexes',
-            'Inefficient loops',
-            'Blocking operations',
-            'Cache invalidation',
-            'Resource pooling',
-        ],
+        name: 'Performance Engineer', description: 'Performance optimization',
+        systemPrompt: 'Performance engineer. Focus on complexity, N+1 queries, memory leaks.',
+        focusAreas: ['performance', 'scalability'], evaluationCriteria: ['Complexity', 'Memory', 'I/O'],
     },
     architect: {
-        name: 'Software Architect',
-        description: 'Specializes in design patterns, architecture, and maintainability',
-        systemPrompt: `You are a senior software architect with expertise in:
-- Design patterns (GoF, enterprise patterns)
-- SOLID principles
-- Clean architecture and DDD
-- API design and contracts
-- Dependency management
-- Code organization and modularity
-- Technical debt assessment
-When reviewing code:
-1. Evaluate adherence to design patterns and principles
-2. Identify coupling issues and suggest decoupling strategies
-3. Assess abstraction levels and cohesion
-4. Recommend refactoring opportunities with specific patterns
-5. Evaluate API design for consistency and usability`,
-        focusAreas: ['architecture', 'maintainability'],
-        evaluationCriteria: [
-            'Single responsibility',
-            'Open/closed principle',
-            'Liskov substitution',
-            'Interface segregation',
-            'Dependency inversion',
-            'Coupling and cohesion',
-            'Abstraction levels',
-            'Error handling patterns',
-            'API consistency',
-            'Technical debt indicators',
-        ],
+        name: 'Software Architect', description: 'Architecture and design',
+        systemPrompt: 'Software architect. Focus on SOLID, coupling, abstractions.',
+        focusAreas: ['architecture', 'maintainability'], evaluationCriteria: ['SOLID', 'Coupling', 'Patterns'],
     },
     correctness_analyst: {
-        name: 'Correctness Analyst',
-        description: 'Specializes in logic errors, edge cases, and bugs',
-        systemPrompt: `You are a meticulous code analyst focused on correctness:
-- Logic errors and off-by-one mistakes
-- Edge cases and boundary conditions
-- Null/undefined handling
-- Type safety issues
-- Race conditions and concurrency bugs
-- Error handling completeness
-- State management issues
-When reviewing code:
-1. Trace execution paths looking for logic errors
-2. Identify missing edge case handling
-3. Spot potential null pointer/undefined errors
-4. Check for race conditions in async code
-5. Verify error handling covers failure modes`,
-        focusAreas: ['correctness', 'testing'],
-        evaluationCriteria: [
-            'Off-by-one errors',
-            'Null/undefined safety',
-            'Boundary conditions',
-            'Integer overflow',
-            'Floating point precision',
-            'Race conditions',
-            'Deadlocks',
-            'Exception handling',
-            'State consistency',
-            'Test coverage gaps',
-        ],
+        name: 'Correctness Analyst', description: 'Logic errors and bugs',
+        systemPrompt: 'Correctness analyst. Focus on logic errors, edge cases, race conditions.',
+        focusAreas: ['correctness', 'testing'], evaluationCriteria: ['Logic', 'Edge cases', 'Concurrency'],
     },
     general_reviewer: {
-        name: 'General Reviewer',
-        description: 'Balanced review across all areas',
-        systemPrompt: `You are a senior software engineer conducting a thorough code review.
-Review the code across multiple dimensions:
-- Correctness: Logic errors, edge cases, bugs
-- Security: Vulnerabilities, input validation
-- Performance: Efficiency, complexity
-- Maintainability: Readability, patterns, documentation
-Prioritize findings by impact and likelihood. Be specific with file paths
-and line numbers. Provide actionable suggestions.`,
+        name: 'General Reviewer', description: 'Balanced review',
+        systemPrompt: 'Senior engineer. Review correctness, security, performance, maintainability.',
         focusAreas: ['security', 'performance', 'architecture', 'correctness', 'maintainability'],
-        evaluationCriteria: [
-            'Logic correctness',
-            'Security vulnerabilities',
-            'Performance issues',
-            'Code quality',
-            'Documentation',
-        ],
+        evaluationCriteria: ['Correctness', 'Security', 'Performance', 'Quality'],
     },
 };
-/**
- * Select the best expert role based on requested focus areas
- */
+/** @deprecated Use handoff.ts selectRole() instead */
 export function selectExpertRole(focusAreas) {
-    if (!focusAreas || focusAreas.length === 0) {
+    if (!focusAreas || focusAreas.length === 0)
         return EXPERT_ROLES.general_reviewer;
-    }
-    // Prioritize security if it's in the list
-    if (focusAreas.includes('security')) {
+    if (focusAreas.includes('security'))
         return EXPERT_ROLES.security_auditor;
-    }
-    // Check for performance/scalability
-    if (focusAreas.includes('performance') || focusAreas.includes('scalability')) {
+    if (focusAreas.includes('performance') || focusAreas.includes('scalability'))
         return EXPERT_ROLES.performance_engineer;
-    }
-    // Check for architecture/maintainability
-    if (focusAreas.includes('architecture') || focusAreas.includes('maintainability')) {
+    if (focusAreas.includes('architecture') || focusAreas.includes('maintainability'))
         return EXPERT_ROLES.architect;
-    }
-    // Check for correctness/testing
-    if (focusAreas.includes('correctness') || focusAreas.includes('testing')) {
+    if (focusAreas.includes('correctness') || focusAreas.includes('testing'))
         return EXPERT_ROLES.correctness_analyst;
-    }
     return EXPERT_ROLES.general_reviewer;
 }
 // =============================================================================

package/dist/adapters/codex.js CHANGED Viewed

@@ -9,13 +9,19 @@ import { existsSync, writeFileSync, unlinkSync, mkdtempSync } from 'fs';
 import { tmpdir } from 'os';
 import { join } from 'path';
 import { registerAdapter, } from './base.js';
-import { parseReviewOutput, parseLegacyMarkdownOutput, getReviewOutputJsonSchema, getPeerOutputJsonSchema, parsePeerOutput } from '../schema.js';
+import { parseReviewOutput, parseLegacyMarkdownOutput, getReviewOutputJsonSchema, getPeerOutputJsonSchema, parsePeerOutput, isSubstantiveReview } from '../schema.js';
+import { CliExecutor } from '../executor.js';
+import { CodexEventDecoder } from '../decoders/index.js';
 import { buildSimpleHandoff, buildHandoffPrompt, buildPeerPrompt, selectRole, } from '../handoff.js';
 // =============================================================================
 // CONFIGURATION
 // =============================================================================
-const INACTIVITY_TIMEOUT_MS = 120000; // 2 min of no output = timeout
-const MAX_TIMEOUT_MS = 3600000; // 60 min absolute max
+const COLD_START_TIMEOUT_MS = {
+    high: 180_000, // 3 min — waiting for first JSONL event
+    xhigh: 300_000, // 5 min — xhigh thinks longer before first event
+};
+const STREAMING_TIMEOUT_MS = 90_000; // 90s — if events stop mid-stream
+const MAX_TIMEOUT_MS = 3_600_000; // 60 min absolute max
 const MAX_RETRIES = 2;
 const MAX_BUFFER_SIZE = 1024 * 1024; // 1MB max buffer
 // =============================================================================
@@ -139,23 +145,14 @@ export class CodexAdapter {
                     executionTimeMs: Date.now() - startTime,
                 };
             }
-            // Check for empty/minimal data on any parse path
-            // A valid review may have findings, agreements, disagreements, alternatives,
-            // or a non-default risk assessment. Only retry if truly empty across all fields.
-            const hasMinimalData = output.findings.length === 0 &&
-                output.agreements.length === 0 &&
-                output.disagreements.length === 0 &&
-                output.alternatives.length === 0 &&
-                output.risk_assessment.overall_level === 'medium' &&
-                output.risk_assessment.score === 50;
-            if (hasMinimalData) {
+            // Check for empty/minimal output — centralized substance check
+            if (!isSubstantiveReview(output)) {
                 if (attempt < MAX_RETRIES) {
                     console.error(`[codex] Received empty output, retrying...`);
                     return this.runWithRetry(request, attempt + 1, startTime, usedFallback
                         ? 'Received markdown output instead of JSON. Please provide valid JSON output.'
-                        : 'Output contained no findings, agreements, or disagreements. Please provide substantive review.', result.stdout);
+                        : 'Output contained no substantive review content. Please provide findings or analysis.', result.stdout);
                 }
-                // Final attempt with no data — report failure
                 return {
                     success: false,
                     error: {
@@ -312,138 +309,83 @@ export class CodexAdapter {
                 executionTimeMs: Date.now() - startTime };
         }
     }
-    runCli(prompt, workingDir, reasoningEffort, schemaGetter, serviceTier) {
-        return new Promise((resolve, reject) => {
-            // Create temp schema file for structured output
-            let schemaFile = null;
-            try {
-                const tempDir = mkdtempSync(join(tmpdir(), 'codex-schema-'));
-                schemaFile = join(tempDir, 'schema.json');
-                const schema = schemaGetter();
-                writeFileSync(schemaFile, JSON.stringify(schema, null, 2), 'utf-8');
-            }
-            catch (err) {
-                console.error('[codex] Warning: Failed to create schema file, continuing without structured output:', err);
-                schemaFile = null;
-            }
-            const args = [
-                'exec',
-                '-m', 'gpt-5.4',
-                '-c', `model_reasoning_effort=${reasoningEffort}`,
-                '-c', 'model_reasoning_summary_format=experimental',
-                '--dangerously-bypass-approvals-and-sandbox',
-                '--skip-git-repo-check',
-                '-C', workingDir,
-            ];
-            // Add service tier if specified (priority = fast mode, flex = cheap mode)
-            if (serviceTier && serviceTier !== 'default') {
-                args.push('-c', `service_tier=${serviceTier}`);
-            }
-            // Add schema enforcement if available
-            if (schemaFile) {
-                args.push('--output-schema', schemaFile);
-            }
-            // Use '-' to read prompt from stdin — more stable for complex prompts
-            // with newlines, backticks, JSON templates, etc.
-            args.push('-');
-            const proc = spawn('codex', args, {
-                cwd: workingDir,
-                stdio: ['pipe', 'pipe', 'pipe'], // stdin is pipe for prompt delivery
-                env: { ...process.env }
-            });
-            // Guard against EPIPE if the child exits before consuming stdin.
-            // Log but don't reject — let the `close` handler capture the real exit code.
-            proc.stdin.on('error', (err) => {
-                console.error(`[codex] stdin error (likely EPIPE): ${err.message}`);
-            });
-            // Deliver prompt via stdin
-            proc.stdin.write(prompt);
-            proc.stdin.end();
-            let stdout = '';
-            let stderr = '';
-            let truncated = false;
-            let inactivityTimer;
-            const cliStartTime = Date.now();
-            let lastProgressTime = cliStartTime;
-            let dataChunks = 0;
-            // Show initial progress message
-            const tierLabel = serviceTier && serviceTier !== 'default' ? ` [${serviceTier}]` : '';
-            console.error(`[codex] Running review with ${reasoningEffort} reasoning${tierLabel}...`);
-            const maxTimer = setTimeout(() => {
-                proc.kill('SIGTERM');
-                reject(new Error('MAX_TIMEOUT'));
-            }, MAX_TIMEOUT_MS);
-            const resetInactivityTimer = () => {
-                clearTimeout(inactivityTimer);
-                inactivityTimer = setTimeout(() => {
-                    proc.kill('SIGTERM');
-                    reject(new Error('TIMEOUT'));
-                }, INACTIVITY_TIMEOUT_MS);
-            };
-            resetInactivityTimer();
-            proc.stdout.on('data', (data) => {
-                resetInactivityTimer();
-                dataChunks++;
-                // Show progress dot every 5 chunks
-                if (dataChunks % 5 === 0) {
-                    process.stderr.write('.');
-                }
-                // Show elapsed time every 10 seconds
-                const now = Date.now();
-                if (now - lastProgressTime > 10000) {
-                    const elapsed = Math.round((now - cliStartTime) / 1000);
-                    console.error(` [${elapsed}s]`);
-                    lastProgressTime = now;
-                }
-                if (stdout.length < MAX_BUFFER_SIZE) {
-                    stdout += data.toString();
-                    if (stdout.length > MAX_BUFFER_SIZE) {
-                        stdout = stdout.slice(0, MAX_BUFFER_SIZE);
-                        truncated = true;
-                    }
-                }
-            });
-            proc.stderr.on('data', (data) => {
-                resetInactivityTimer();
-                if (stderr.length < MAX_BUFFER_SIZE) {
-                    stderr += data.toString();
-                    if (stderr.length > MAX_BUFFER_SIZE) {
-                        stderr = stderr.slice(0, MAX_BUFFER_SIZE);
-                    }
-                }
-            });
-            proc.on('close', (code) => {
-                clearTimeout(inactivityTimer);
-                clearTimeout(maxTimer);
-                const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
-                console.error(` ✓ [${elapsed}s]`);
-                // Cleanup temp schema file
-                if (schemaFile) {
-                    try {
-                        unlinkSync(schemaFile);
-                    }
-                    catch {
-                        // Ignore cleanup errors
-                    }
-                }
-                resolve({ stdout, stderr, exitCode: code ?? -1, truncated });
-            });
-            proc.on('error', (err) => {
-                clearTimeout(inactivityTimer);
-                clearTimeout(maxTimer);
-                console.error(' ✗');
-                // Cleanup temp schema file
-                if (schemaFile) {
-                    try {
-                        unlinkSync(schemaFile);
-                    }
-                    catch {
-                        // Ignore cleanup errors
-                    }
+    async runCli(prompt, workingDir, reasoningEffort, schemaGetter, serviceTier) {
+        // Create temp schema file for structured output
+        let schemaFile = null;
+        try {
+            const tempDir = mkdtempSync(join(tmpdir(), 'codex-schema-'));
+            schemaFile = join(tempDir, 'schema.json');
+            const schema = schemaGetter();
+            writeFileSync(schemaFile, JSON.stringify(schema, null, 2), 'utf-8');
+        }
+        catch (err) {
+            console.error('[codex] Warning: Failed to create schema file:', err);
+            schemaFile = null;
+        }
+        const args = [
+            'exec',
+            '--json', // JSONL streaming events
+            '-m', 'gpt-5.4',
+            '-c', `model_reasoning_effort=${reasoningEffort}`,
+            '-c', 'model_reasoning_summary_format=experimental',
+            '--dangerously-bypass-approvals-and-sandbox',
+            '--skip-git-repo-check',
+            '-C', workingDir,
+        ];
+        if (serviceTier && serviceTier !== 'default') {
+            args.push('-c', `service_tier=${serviceTier}`);
+        }
+        if (schemaFile) {
+            args.push('--output-schema', schemaFile);
+        }
+        args.push('-'); // Read prompt from stdin
+        const decoder = new CodexEventDecoder();
+        const cliStartTime = Date.now();
+        let firstEventReceived = false;
+        const tierLabel = serviceTier && serviceTier !== 'default' ? ` [${serviceTier}]` : '';
+        console.error(`[codex] Running review with ${reasoningEffort} reasoning${tierLabel}...`);
+        decoder.onProgress = (eventType, detail) => {
+            const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
+            const detailStr = detail ? ` — ${detail}` : '';
+            console.error(`[codex] ${eventType}${detailStr} (${elapsed}s)`);
+        };
+        const executor = new CliExecutor({
+            command: 'codex',
+            args,
+            cwd: workingDir,
+            stdin: prompt,
+            inactivityTimeoutMs: COLD_START_TIMEOUT_MS[reasoningEffort] || COLD_START_TIMEOUT_MS.high,
+            maxTimeoutMs: MAX_TIMEOUT_MS,
+            maxBufferSize: MAX_BUFFER_SIZE,
+            onLine: (line) => {
+                decoder.processLine(line);
+                // Phase transition: tighten timeout after first event
+                if (!firstEventReceived) {
+                    firstEventReceived = true;
+                    executor.setInactivityTimeout(STREAMING_TIMEOUT_MS);
                 }
-                reject(err);
-            });
+            },
         });
+        try {
+            const result = await executor.run();
+            const elapsed = Math.round((Date.now() - cliStartTime) / 1000);
+            console.error(`[codex] ✓ complete (${elapsed}s)`);
+            const finalResponse = decoder.getFinalResponse();
+            return {
+                stdout: finalResponse || result.rawStdout,
+                stderr: result.stderr,
+                exitCode: result.exitCode,
+                truncated: result.truncated,
+            };
+        }
+        finally {
+            if (schemaFile) {
+                try {
+                    unlinkSync(schemaFile);
+                }
+                catch { /* ignore */ }
+            }
+        }
     }
     categorizeError(stderr) {
         const lower = stderr.toLowerCase();