@in-the-loop-labs/pair-review 3.5.0 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,19 +17,32 @@ const { StreamParser, parseClaudeLine } = require('./stream-parser');
17
17
  const BIN_DIR = path.join(__dirname, '..', '..', 'bin');
18
18
 
19
19
  /**
20
- * Claude model definitions with tier mappings
20
+ * Claude model definitions with tier mappings.
21
+ *
22
+ * Effort is set via the CLAUDE_CODE_EFFORT_LEVEL env var (highest-precedence way
23
+ * to control reasoning effort; takes precedence over the --effort CLI flag and is
24
+ * not deprecated). Extended thinking is forced on globally via `--thinking enabled`
25
+ * in the constructor's base args; individual models can override this via extra_args
26
+ * (e.g., Haiku uses adaptive thinking for efficiency).
27
+ *
28
+ * Effort support by model (newest CLIs): Opus 4.8 / 4.7 support low|medium|high|
29
+ * xhigh|max; Opus 4.6 & Sonnet 4.6 support low|medium|high|max (no xhigh); Haiku
30
+ * has no effort levels.
21
31
  */
22
32
  const CLAUDE_MODELS = [
33
+ // ── Thorough tier ───────────────────────────────────────────────────────
23
34
  {
24
- id: 'opus-4.7-xhigh',
35
+ id: 'opus',
36
+ aliases: ['opus-4.7-xhigh'],
25
37
  cli_model: 'claude-opus-4-7',
26
38
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'xhigh' },
27
39
  name: 'Opus 4.7 XHigh',
28
40
  tier: 'thorough',
29
- tagline: 'Latest Gen',
30
- description: 'Opus 4.7 (latest) with extra-high effort',
31
- badge: 'Latest',
32
- badgeClass: 'badge-power'
41
+ tagline: 'Maximum Depth',
42
+ description: 'Opus 4.7 with extra-high effort — deepest analysis',
43
+ badge: 'Most Thorough',
44
+ badgeClass: 'badge-power',
45
+ default: true
33
46
  },
34
47
  {
35
48
  id: 'opus-4.7-high',
@@ -37,33 +50,46 @@ const CLAUDE_MODELS = [
37
50
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
38
51
  name: 'Opus 4.7 High',
39
52
  tier: 'thorough',
40
- tagline: 'Latest Gen',
41
- description: 'Opus 4.7 (latest) with high effort',
53
+ tagline: 'High Effort',
54
+ description: 'Opus 4.7 with high effort — thorough, quicker than XHigh',
55
+ badge: 'Thorough',
56
+ badgeClass: 'badge-power'
57
+ },
58
+ {
59
+ id: 'opus-4.8-xhigh',
60
+ cli_model: 'claude-opus-4-8',
61
+ env: { CLAUDE_CODE_EFFORT_LEVEL: 'xhigh' },
62
+ name: 'Opus 4.8 XHigh',
63
+ tier: 'thorough',
64
+ tagline: 'Newest',
65
+ description: 'Opus 4.8 (newest) with extra-high effort',
42
66
  badge: 'Latest',
43
67
  badgeClass: 'badge-power'
44
68
  },
45
69
  {
46
- id: 'opus',
47
- aliases: ['opus-4.6-high'],
48
- cli_model: 'claude-opus-4-6',
70
+ id: 'opus-4.8-high',
71
+ cli_model: 'claude-opus-4-8',
49
72
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
50
- name: 'Opus 4.6 High',
73
+ name: 'Opus 4.8 High',
51
74
  tier: 'thorough',
52
- tagline: 'Maximum Depth',
53
- description: 'Opus 4.6 with high effort — deepest analysis',
54
- badge: 'Most Thorough',
55
- badgeClass: 'badge-power',
56
- default: true
75
+ tagline: 'Newest',
76
+ description: 'Opus 4.8 (newest) with high effort',
77
+ badge: 'Latest',
78
+ badgeClass: 'badge-power'
57
79
  },
58
80
  {
59
- id: 'haiku',
60
- name: 'Haiku 4.6',
61
- tier: 'fast',
62
- tagline: 'Lightning Fast',
63
- description: 'Quick analysis for simple changes',
64
- badge: 'Fastest',
65
- badgeClass: 'badge-speed'
81
+ id: 'opus-4.6-high',
82
+ aliases: ['opus-4.6-low', 'opus-4.6-medium', 'opus-4.5'],
83
+ cli_model: 'claude-opus-4-6',
84
+ env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
85
+ name: 'Opus 4.6 High',
86
+ tier: 'thorough',
87
+ tagline: 'Previous Gen',
88
+ description: 'Opus 4.6 with high effort',
89
+ badge: 'Previous Gen',
90
+ badgeClass: 'badge-power'
66
91
  },
92
+ // ── Balanced tier ───────────────────────────────────────────────────────
67
93
  {
68
94
  id: 'sonnet-4.6',
69
95
  cli_model: 'claude-sonnet-4-6',
@@ -74,28 +100,6 @@ const CLAUDE_MODELS = [
74
100
  badge: 'Standard',
75
101
  badgeClass: 'badge-recommended'
76
102
  },
77
- {
78
- id: 'opus-4.6-low',
79
- cli_model: 'claude-opus-4-6',
80
- env: { CLAUDE_CODE_EFFORT_LEVEL: 'low' },
81
- name: 'Opus 4.6 Low',
82
- tier: 'balanced',
83
- tagline: 'Fast Opus',
84
- description: 'Opus 4.6 with low effort — quick and capable',
85
- badge: 'Balanced',
86
- badgeClass: 'badge-recommended'
87
- },
88
- {
89
- id: 'opus-4.6-medium',
90
- cli_model: 'claude-opus-4-6',
91
- env: { CLAUDE_CODE_EFFORT_LEVEL: 'medium' },
92
- name: 'Opus 4.6 Medium',
93
- tier: 'balanced',
94
- tagline: 'Balanced Opus',
95
- description: 'Opus 4.6 with medium effort — balanced depth',
96
- badge: 'Thorough',
97
- badgeClass: 'badge-power'
98
- },
99
103
  {
100
104
  id: 'opus-4.6-1m',
101
105
  cli_model: 'claude-opus-4-6[1m]',
@@ -106,15 +110,17 @@ const CLAUDE_MODELS = [
106
110
  badge: 'More Context',
107
111
  badgeClass: 'badge-power'
108
112
  },
113
+ // ── Fast tier ───────────────────────────────────────────────────────────
109
114
  {
110
- id: 'opus-4.5',
111
- cli_model: 'claude-opus-4-5-20251101',
112
- name: 'Opus 4.5',
113
- tier: 'thorough',
114
- tagline: 'Deep Thinker',
115
- description: 'Extended thinking for complex analysis',
116
- badge: 'Previous Gen',
117
- badgeClass: 'badge-power'
115
+ id: 'haiku',
116
+ cli_model: 'claude-haiku-4-5-20251001',
117
+ name: 'Haiku 4.5',
118
+ tier: 'fast',
119
+ tagline: 'Lightning Fast',
120
+ description: 'Quick analysis for simple changes',
121
+ badge: 'Fastest',
122
+ badgeClass: 'badge-speed',
123
+ extra_args: ['--thinking', 'adaptive']
118
124
  }
119
125
  ];
120
126
 
@@ -196,7 +202,12 @@ class ClaudeProvider extends AIProvider {
196
202
  // user's configured environment. To disable skills, add --disable-slash-commands
197
203
  // to extra_args in provider/model config.
198
204
  const hooksArgs = ['--settings', '{"disableAllHooks":true}'];
199
- const baseArgs = ['-p', '--verbose', ...cliModelArgs, '--output-format', 'stream-json', ...hooksArgs, ...permissionArgs];
205
+ // Force extended thinking on for every analysis call. The Claude CLI's
206
+ // `--thinking` flag accepts enabled|adaptive|disabled; we always want
207
+ // reasoning engaged for code review. User config extra_args appended later
208
+ // win over this (commander uses the last occurrence) if an override is set.
209
+ const thinkingArgs = ['--thinking', 'enabled'];
210
+ const baseArgs = ['-p', '--verbose', ...cliModelArgs, '--output-format', 'stream-json', ...thinkingArgs, ...hooksArgs, ...permissionArgs];
200
211
  if (maxBudget) {
201
212
  const budgetNum = parseFloat(maxBudget);
202
213
  if (isNaN(budgetNum) || budgetNum <= 0) {
@@ -242,7 +253,8 @@ class ClaudeProvider extends AIProvider {
242
253
  // - string: use this exact value for --model
243
254
  // - null: explicitly suppress --model (for tools that want the model set via env instead)
244
255
  const builtIn = CLAUDE_MODELS.find(m => m.id === modelId || (m.aliases && m.aliases.includes(modelId)));
245
- const configModel = configOverrides.models?.find(m => m.id === modelId);
256
+ const modelKeys = new Set([modelId, builtIn?.id, ...(builtIn?.aliases || [])].filter(Boolean));
257
+ const configModel = configOverrides.models?.find(m => modelKeys.has(m.id));
246
258
  const resolvedCliModel = configModel?.cli_model !== undefined
247
259
  ? configModel.cli_model
248
260
  : (builtIn?.cli_model !== undefined ? builtIn.cli_model : modelId);
@@ -34,6 +34,29 @@ const BIN_DIR = path.join(__dirname, '..', '..', 'bin');
34
34
  * Deprecated (April 2026): gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.1-codex
35
35
  */
36
36
  const CODEX_MODELS = [
37
+ {
38
+ id: 'gpt-5.5-high',
39
+ cli_model: 'gpt-5.5',
40
+ extra_args: ['-c', 'model_reasoning_effort="high"'],
41
+ name: 'GPT-5.5 High',
42
+ tier: 'thorough',
43
+ tagline: 'Latest Deep',
44
+ description: 'Latest-generation GPT model with high reasoning effort for demanding PR reviews, strong code understanding, and careful cross-file analysis.',
45
+ badge: 'Recommended',
46
+ badgeClass: 'badge-recommended',
47
+ default: true
48
+ },
49
+ {
50
+ id: 'gpt-5.5-xhigh',
51
+ cli_model: 'gpt-5.5',
52
+ extra_args: ['-c', 'model_reasoning_effort="xhigh"'],
53
+ name: 'GPT-5.5 XHigh',
54
+ tier: 'thorough',
55
+ tagline: 'Frontier Depth',
56
+ description: 'GPT-5.5 with extra-high reasoning effort for the hardest reviews: architecture, concurrency, security-sensitive changes, and large codebase context.',
57
+ badge: 'Max Reasoning',
58
+ badgeClass: 'badge-power'
59
+ },
37
60
  {
38
61
  id: 'gpt-5.4-high',
39
62
  // Alias keeps results/councils saved under the previous bare `gpt-5.4`
@@ -45,9 +68,8 @@ const CODEX_MODELS = [
45
68
  tier: 'thorough',
46
69
  tagline: 'Deep Review',
47
70
  description: 'GPT-5.4 with high reasoning effort for complex multi-file reviews, architectural consistency, and subtle behavioral regressions.',
48
- badge: 'Recommended',
49
- badgeClass: 'badge-recommended',
50
- default: true
71
+ badge: 'Previous Gen',
72
+ badgeClass: 'badge-power'
51
73
  },
52
74
  {
53
75
  id: 'gpt-5.4-xhigh',
@@ -60,28 +82,6 @@ const CODEX_MODELS = [
60
82
  badge: 'Extra High',
61
83
  badgeClass: 'badge-power'
62
84
  },
63
- {
64
- id: 'gpt-5.5-high',
65
- cli_model: 'gpt-5.5',
66
- extra_args: ['-c', 'model_reasoning_effort="high"'],
67
- name: 'GPT-5.5 High',
68
- tier: 'thorough',
69
- tagline: 'Latest Deep',
70
- description: 'Latest-generation GPT model with high reasoning effort for demanding PR reviews, strong code understanding, and careful cross-file analysis.',
71
- badge: 'High Effort',
72
- badgeClass: 'badge-power'
73
- },
74
- {
75
- id: 'gpt-5.5-xhigh',
76
- cli_model: 'gpt-5.5',
77
- extra_args: ['-c', 'model_reasoning_effort="xhigh"'],
78
- name: 'GPT-5.5 XHigh',
79
- tier: 'thorough',
80
- tagline: 'Frontier Depth',
81
- description: 'GPT-5.5 with extra-high reasoning effort for the hardest reviews: architecture, concurrency, security-sensitive changes, and large codebase context.',
82
- badge: 'Max Reasoning',
83
- badgeClass: 'badge-power'
84
- },
85
85
  {
86
86
  id: 'gpt-5.3-codex',
87
87
  name: 'GPT-5.3 Codex',
@@ -121,7 +121,7 @@ class CodexProvider extends AIProvider {
121
121
  * @param {Object} configOverrides.env - Additional environment variables
122
122
  * @param {Object[]} configOverrides.models - Custom model definitions
123
123
  */
124
- constructor(model = 'gpt-5.4-high', configOverrides = {}) {
124
+ constructor(model = 'gpt-5.5-high', configOverrides = {}) {
125
125
  super(model);
126
126
 
127
127
  // Command precedence: ENV > config > default
@@ -149,9 +149,9 @@ class CodexProvider extends AIProvider {
149
149
  // 2. "read-only" prevents ALL shell commands including git-diff-lines
150
150
  // 3. The AI is instructed to only analyze code, not modify it
151
151
  //
152
- // --full-auto: Non-interactive mode that auto-approves within sandbox bounds.
153
- // Combined with workspace-write sandbox, this limits damage to the worktree only.
154
- // Note: The -a flag is for interactive mode only; exec subcommand uses --full-auto.
152
+ // Newer Codex CLI versions deprecate --full-auto; `codex exec` is already
153
+ // non-interactive, and `--sandbox workspace-write` selects the required
154
+ // sandbox policy.
155
155
  //
156
156
  // Shell environment config:
157
157
  // - allow_login_shell=false: Prevents zsh from using -l flag, which would
@@ -164,7 +164,7 @@ class CodexProvider extends AIProvider {
164
164
  // (--dangerously-bypass-approvals-and-sandbox is the Codex CLI equivalent of Claude's --dangerously-skip-permissions)
165
165
  const sandboxArgs = configOverrides.yolo
166
166
  ? ['--dangerously-bypass-approvals-and-sandbox']
167
- : ['--sandbox', 'workspace-write', '--full-auto'];
167
+ : ['--sandbox', 'workspace-write'];
168
168
  // Shell env args prevent login shell from reconstructing PATH (orthogonal to
169
169
  // sandbox permissions). Overridable via configOverrides.args following the
170
170
  // same two-tier pattern as chat-providers.js: args replaces, extra_args appends.
@@ -352,7 +352,7 @@ class CodexProvider extends AIProvider {
352
352
 
353
353
  if (code !== 0) {
354
354
  logger.error(`${levelPrefix} Codex CLI exited with code ${code}`);
355
- settle(reject, new Error(`${levelPrefix} Codex CLI exited with code ${code}: ${stderr}`));
355
+ settle(reject, this.createExitError(code, stderr, levelPrefix));
356
356
  return;
357
357
  }
358
358
 
@@ -433,6 +433,37 @@ class CodexProvider extends AIProvider {
433
433
  });
434
434
  }
435
435
 
436
+ /**
437
+ * Build an actionable error for Codex CLI process failures.
438
+ *
439
+ * @param {number} code - Process exit code
440
+ * @param {string} stderr - Captured stderr
441
+ * @param {string} levelPrefix - Logging prefix
442
+ * @returns {Error}
443
+ */
444
+ createExitError(code, stderr, levelPrefix) {
445
+ const stderrText = stderr.trim();
446
+
447
+ if (this.isAuthError(stderrText)) {
448
+ return new Error(
449
+ `${levelPrefix} Codex CLI authentication failed. Check Codex CLI authentication and try again. ` +
450
+ `Original stderr: ${stderrText}`
451
+ );
452
+ }
453
+
454
+ return new Error(`${levelPrefix} Codex CLI exited with code ${code}: ${stderr}`);
455
+ }
456
+
457
+ /**
458
+ * Detect authentication failures reported by the Codex CLI.
459
+ *
460
+ * @param {string} stderr - Captured stderr
461
+ * @returns {boolean}
462
+ */
463
+ isAuthError(stderr) {
464
+ return /(?:401\s+Unauthorized|HTTP error:\s*401|Unauthorized)/i.test(stderr);
465
+ }
466
+
436
467
  /**
437
468
  * Parse Codex CLI JSONL response
438
469
  * Codex outputs JSONL with multiple event types:
@@ -664,7 +695,7 @@ class CodexProvider extends AIProvider {
664
695
 
665
696
  // Base args for extraction (read-only sandbox, no shell access needed)
666
697
  // Note: '-' (stdin marker) must come LAST, after any extra_args
667
- const baseArgs = ['exec', '-m', cliModel, '--json', '--sandbox', 'read-only', '--full-auto'];
698
+ const baseArgs = ['exec', '-m', cliModel, '--json', '--sandbox', 'read-only'];
668
699
 
669
700
  // Append stdin marker '-' at the end after all other args
670
701
  return [...baseArgs, ...extraArgs, '-'];
@@ -790,7 +821,7 @@ class CodexProvider extends AIProvider {
790
821
  }
791
822
 
792
823
  static getDefaultModel() {
793
- return 'gpt-5.4-high';
824
+ return 'gpt-5.5-high';
794
825
  }
795
826
 
796
827
  static getInstallInstructions() {
@@ -235,7 +235,7 @@ curl -s -X POST http://localhost:{{PORT}}/api/pr/OWNER/REPO/PR_NUMBER/analyses \
235
235
  -H 'Content-Type: application/json' \\
236
236
  -d '{
237
237
  "provider": "claude",
238
- "model": "claude-sonnet-4-5-20250929",
238
+ "model": "claude-opus-4-7",
239
239
  "tier": "balanced",
240
240
  "customInstructions": "Focus on security issues."
241
241
  }'
@@ -12,6 +12,7 @@ const logger = require('../utils/logger');
12
12
 
13
13
  // Default dependencies (overridable for testing)
14
14
  const defaults = { spawn };
15
+ const CODEX_SANDBOX_MODES = new Set(['workspace-write', 'read-only']);
15
16
 
16
17
  /**
17
18
  * Built-in chat provider definitions.
@@ -68,6 +69,7 @@ const CHAT_PROVIDERS = {
68
69
  name: 'Codex (JSON-RPC)',
69
70
  type: 'codex',
70
71
  command: 'codex',
72
+ sandbox: 'workspace-write',
71
73
  // Shell environment config prevents zsh -l from reconstructing PATH,
72
74
  // ensuring git-diff-lines and other bin/ scripts remain findable.
73
75
  args: [
@@ -126,6 +128,9 @@ function getChatProvider(id) {
126
128
  }
127
129
  if (overrides.load_skills !== undefined) provider.load_skills = overrides.load_skills;
128
130
  if (overrides.app_extensions !== undefined) provider.app_extensions = overrides.app_extensions;
131
+ if (provider.type === 'codex' && overrides.sandbox !== undefined) {
132
+ provider.sandbox = normalizeCodexSandbox(overrides.sandbox, id);
133
+ }
129
134
  if (provider.command.includes(' ')) {
130
135
  provider.useShell = true;
131
136
  }
@@ -152,6 +157,9 @@ function getChatProvider(id) {
152
157
  }
153
158
  if (overrides.load_skills !== undefined) merged.load_skills = overrides.load_skills;
154
159
  if (overrides.app_extensions !== undefined) merged.app_extensions = overrides.app_extensions;
160
+ if (base.type === 'codex' && overrides.sandbox !== undefined) {
161
+ merged.sandbox = normalizeCodexSandbox(overrides.sandbox, id);
162
+ }
155
163
  // For multi-word commands (e.g. "devx claude"), use shell mode
156
164
  if (merged.command && merged.command.includes(' ')) {
157
165
  merged.useShell = true;
@@ -159,6 +167,24 @@ function getChatProvider(id) {
159
167
  return merged;
160
168
  }
161
169
 
170
+ /**
171
+ * Validate the small user-facing Codex sandbox config surface.
172
+ * @param {string} sandbox
173
+ * @param {string} providerId
174
+ * @returns {string}
175
+ */
176
+ function normalizeCodexSandbox(sandbox, providerId = 'codex') {
177
+ if (CODEX_SANDBOX_MODES.has(sandbox)) {
178
+ return sandbox;
179
+ }
180
+
181
+ logger.warn(
182
+ `[ChatProviders] Invalid sandbox "${sandbox}" for ${providerId}; ` +
183
+ 'falling back to workspace-write. Supported values: workspace-write, read-only.'
184
+ );
185
+ return 'workspace-write';
186
+ }
187
+
162
188
  /**
163
189
  * Get all chat provider definitions (built-in + dynamic from config).
164
190
  * @returns {Array<Object>}