@in-the-loop-labs/pair-review 3.5.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -493,7 +493,7 @@ Configure your preferred models in `providers.pi.models` — see [AI Provider Co
493
493
  }
494
494
  ```
495
495
 
496
- Available chat provider IDs: `pi`, `claude`, `codex`, `copilot-acp`, `gemini-acp`, `opencode-acp`, `cursor-acp`. Each supports `command`, `args` (replaces defaults), `extra_args` (appends), and `env` overrides.
496
+ Available chat provider IDs: `pi`, `claude`, `codex`, `copilot-acp`, `gemini-acp`, `opencode-acp`, `cursor-acp`. Each supports `command`, `args` (replaces defaults), `extra_args` (appends), and `env` overrides. Codex chat also supports `sandbox`: use `workspace-write` by default, or `read-only` for discussion-only sessions.
497
497
 
498
498
  **Keyboard shortcut:** Press `p` then `c` to toggle the chat panel.
499
499
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@in-the-loop-labs/pair-review",
3
- "version": "3.5.0",
3
+ "version": "3.5.1",
4
4
  "description": "Your AI-powered code review partner - Close the feedback loop with AI coding agents",
5
5
  "main": "src/server.js",
6
6
  "bin": {
@@ -21,20 +21,6 @@
21
21
  "engines": {
22
22
  "node": ">=20.0.0"
23
23
  },
24
- "scripts": {
25
- "start": "node src/server.js",
26
- "dev": "node bin/pair-review.js",
27
- "test": "vitest run",
28
- "test:watch": "vitest",
29
- "test:coverage": "vitest run --coverage",
30
- "test:e2e": "playwright test",
31
- "test:e2e:headed": "playwright test --headed",
32
- "test:e2e:debug": "playwright test --debug",
33
- "generate:skill-prompts": "node scripts/generate-skill-prompts.js",
34
- "changeset": "changeset",
35
- "version": "changeset version && pnpm install --lockfile-only && bash scripts/generate-package-lock.sh && node scripts/sync-plugin-versions.js && git add package.json pnpm-lock.yaml package-lock.json CHANGELOG.md .changeset .claude-plugin/marketplace.json plugin/.claude-plugin/plugin.json plugin-code-critic/.claude-plugin/plugin.json && git commit -m \"RELEASING: v$(node -p \"require('./package.json').version\")\"",
36
- "release": "npm whoami > /dev/null || { echo 'Error: Not logged in to npm. Run: npm login'; exit 1; } && pnpm run version && changeset tag && npm publish && git push && git push --tags"
37
- },
38
24
  "keywords": [
39
25
  "code-review",
40
26
  "pull-request",
@@ -84,9 +70,18 @@
84
70
  "supertest": "^7.1.4",
85
71
  "vitest": "^4.0.16"
86
72
  },
87
- "pnpm": {
88
- "onlyBuiltDependencies": [
89
- "better-sqlite3"
90
- ]
73
+ "scripts": {
74
+ "start": "node src/server.js",
75
+ "dev": "node bin/pair-review.js",
76
+ "test": "vitest run",
77
+ "test:watch": "vitest",
78
+ "test:coverage": "vitest run --coverage",
79
+ "test:e2e": "playwright test",
80
+ "test:e2e:headed": "playwright test --headed",
81
+ "test:e2e:debug": "playwright test --debug",
82
+ "generate:skill-prompts": "node scripts/generate-skill-prompts.js",
83
+ "changeset": "changeset",
84
+ "version": "changeset version && pnpm install --lockfile-only && bash scripts/generate-package-lock.sh && node scripts/sync-plugin-versions.js && git add package.json pnpm-lock.yaml package-lock.json CHANGELOG.md .changeset .claude-plugin/marketplace.json plugin/.claude-plugin/plugin.json plugin-code-critic/.claude-plugin/plugin.json && git commit -m \"RELEASING: v$(node -p \"require('./package.json').version\")\"",
85
+ "release": "npm whoami > /dev/null || { echo 'Error: Not logged in to npm. Run: npm login'; exit 1; } && pnpm run version && changeset tag && npm publish && git push && git push --tags"
91
86
  }
92
- }
87
+ }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pair-review",
3
- "version": "3.5.0",
3
+ "version": "3.5.1",
4
4
  "description": "pair-review app integration — Open PRs and local changes in the pair-review web UI, run server-side AI analysis, and address review feedback. Requires the pair-review MCP server.",
5
5
  "author": {
6
6
  "name": "in-the-loop-labs",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-critic",
3
- "version": "3.5.0",
3
+ "version": "3.5.1",
4
4
  "description": "AI-powered code review analysis — Run three-level AI analysis and implement-review-fix loops directly in your coding agent. Works standalone, no server required.",
5
5
  "author": {
6
6
  "name": "in-the-loop-labs",
@@ -17,19 +17,32 @@ const { StreamParser, parseClaudeLine } = require('./stream-parser');
17
17
  const BIN_DIR = path.join(__dirname, '..', '..', 'bin');
18
18
 
19
19
  /**
20
- * Claude model definitions with tier mappings
20
+ * Claude model definitions with tier mappings.
21
+ *
22
+ * Effort is set via the CLAUDE_CODE_EFFORT_LEVEL env var (highest-precedence way
23
+ * to control reasoning effort; takes precedence over the --effort CLI flag and is
24
+ * not deprecated). Extended thinking is forced on globally via `--thinking enabled`
25
+ * in the constructor's base args; individual models can override this via extra_args
26
+ * (e.g., Haiku uses adaptive thinking for efficiency).
27
+ *
28
+ * Effort support by model (newest CLIs): Opus 4.8 / 4.7 support low|medium|high|
29
+ * xhigh|max; Opus 4.6 & Sonnet 4.6 support low|medium|high|max (no xhigh); Haiku
30
+ * has no effort levels.
21
31
  */
22
32
  const CLAUDE_MODELS = [
33
+ // ── Thorough tier ───────────────────────────────────────────────────────
23
34
  {
24
- id: 'opus-4.7-xhigh',
35
+ id: 'opus',
36
+ aliases: ['opus-4.7-xhigh'],
25
37
  cli_model: 'claude-opus-4-7',
26
38
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'xhigh' },
27
39
  name: 'Opus 4.7 XHigh',
28
40
  tier: 'thorough',
29
- tagline: 'Latest Gen',
30
- description: 'Opus 4.7 (latest) with extra-high effort',
31
- badge: 'Latest',
32
- badgeClass: 'badge-power'
41
+ tagline: 'Maximum Depth',
42
+ description: 'Opus 4.7 with extra-high effort — deepest analysis',
43
+ badge: 'Most Thorough',
44
+ badgeClass: 'badge-power',
45
+ default: true
33
46
  },
34
47
  {
35
48
  id: 'opus-4.7-high',
@@ -37,33 +50,46 @@ const CLAUDE_MODELS = [
37
50
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
38
51
  name: 'Opus 4.7 High',
39
52
  tier: 'thorough',
40
- tagline: 'Latest Gen',
41
- description: 'Opus 4.7 (latest) with high effort',
53
+ tagline: 'High Effort',
54
+ description: 'Opus 4.7 with high effort — thorough, quicker than XHigh',
55
+ badge: 'Thorough',
56
+ badgeClass: 'badge-power'
57
+ },
58
+ {
59
+ id: 'opus-4.8-xhigh',
60
+ cli_model: 'claude-opus-4-8',
61
+ env: { CLAUDE_CODE_EFFORT_LEVEL: 'xhigh' },
62
+ name: 'Opus 4.8 XHigh',
63
+ tier: 'thorough',
64
+ tagline: 'Newest',
65
+ description: 'Opus 4.8 (newest) with extra-high effort',
42
66
  badge: 'Latest',
43
67
  badgeClass: 'badge-power'
44
68
  },
45
69
  {
46
- id: 'opus',
47
- aliases: ['opus-4.6-high'],
48
- cli_model: 'claude-opus-4-6',
70
+ id: 'opus-4.8-high',
71
+ cli_model: 'claude-opus-4-8',
49
72
  env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
50
- name: 'Opus 4.6 High',
73
+ name: 'Opus 4.8 High',
51
74
  tier: 'thorough',
52
- tagline: 'Maximum Depth',
53
- description: 'Opus 4.6 with high effort — deepest analysis',
54
- badge: 'Most Thorough',
55
- badgeClass: 'badge-power',
56
- default: true
75
+ tagline: 'Newest',
76
+ description: 'Opus 4.8 (newest) with high effort',
77
+ badge: 'Latest',
78
+ badgeClass: 'badge-power'
57
79
  },
58
80
  {
59
- id: 'haiku',
60
- name: 'Haiku 4.6',
61
- tier: 'fast',
62
- tagline: 'Lightning Fast',
63
- description: 'Quick analysis for simple changes',
64
- badge: 'Fastest',
65
- badgeClass: 'badge-speed'
81
+ id: 'opus-4.6-high',
82
+ aliases: ['opus-4.6-low', 'opus-4.6-medium', 'opus-4.5'],
83
+ cli_model: 'claude-opus-4-6',
84
+ env: { CLAUDE_CODE_EFFORT_LEVEL: 'high' },
85
+ name: 'Opus 4.6 High',
86
+ tier: 'thorough',
87
+ tagline: 'Previous Gen',
88
+ description: 'Opus 4.6 with high effort',
89
+ badge: 'Previous Gen',
90
+ badgeClass: 'badge-power'
66
91
  },
92
+ // ── Balanced tier ───────────────────────────────────────────────────────
67
93
  {
68
94
  id: 'sonnet-4.6',
69
95
  cli_model: 'claude-sonnet-4-6',
@@ -74,28 +100,6 @@ const CLAUDE_MODELS = [
74
100
  badge: 'Standard',
75
101
  badgeClass: 'badge-recommended'
76
102
  },
77
- {
78
- id: 'opus-4.6-low',
79
- cli_model: 'claude-opus-4-6',
80
- env: { CLAUDE_CODE_EFFORT_LEVEL: 'low' },
81
- name: 'Opus 4.6 Low',
82
- tier: 'balanced',
83
- tagline: 'Fast Opus',
84
- description: 'Opus 4.6 with low effort — quick and capable',
85
- badge: 'Balanced',
86
- badgeClass: 'badge-recommended'
87
- },
88
- {
89
- id: 'opus-4.6-medium',
90
- cli_model: 'claude-opus-4-6',
91
- env: { CLAUDE_CODE_EFFORT_LEVEL: 'medium' },
92
- name: 'Opus 4.6 Medium',
93
- tier: 'balanced',
94
- tagline: 'Balanced Opus',
95
- description: 'Opus 4.6 with medium effort — balanced depth',
96
- badge: 'Thorough',
97
- badgeClass: 'badge-power'
98
- },
99
103
  {
100
104
  id: 'opus-4.6-1m',
101
105
  cli_model: 'claude-opus-4-6[1m]',
@@ -106,15 +110,17 @@ const CLAUDE_MODELS = [
106
110
  badge: 'More Context',
107
111
  badgeClass: 'badge-power'
108
112
  },
113
+ // ── Fast tier ───────────────────────────────────────────────────────────
109
114
  {
110
- id: 'opus-4.5',
111
- cli_model: 'claude-opus-4-5-20251101',
112
- name: 'Opus 4.5',
113
- tier: 'thorough',
114
- tagline: 'Deep Thinker',
115
- description: 'Extended thinking for complex analysis',
116
- badge: 'Previous Gen',
117
- badgeClass: 'badge-power'
115
+ id: 'haiku',
116
+ cli_model: 'claude-haiku-4-5-20251001',
117
+ name: 'Haiku 4.5',
118
+ tier: 'fast',
119
+ tagline: 'Lightning Fast',
120
+ description: 'Quick analysis for simple changes',
121
+ badge: 'Fastest',
122
+ badgeClass: 'badge-speed',
123
+ extra_args: ['--thinking', 'adaptive']
118
124
  }
119
125
  ];
120
126
 
@@ -196,7 +202,12 @@ class ClaudeProvider extends AIProvider {
196
202
  // user's configured environment. To disable skills, add --disable-slash-commands
197
203
  // to extra_args in provider/model config.
198
204
  const hooksArgs = ['--settings', '{"disableAllHooks":true}'];
199
- const baseArgs = ['-p', '--verbose', ...cliModelArgs, '--output-format', 'stream-json', ...hooksArgs, ...permissionArgs];
205
+ // Force extended thinking on for every analysis call. The Claude CLI's
206
+ // `--thinking` flag accepts enabled|adaptive|disabled; we always want
207
+ // reasoning engaged for code review. User config extra_args appended later
208
+ // win over this (commander uses the last occurrence) if an override is set.
209
+ const thinkingArgs = ['--thinking', 'enabled'];
210
+ const baseArgs = ['-p', '--verbose', ...cliModelArgs, '--output-format', 'stream-json', ...thinkingArgs, ...hooksArgs, ...permissionArgs];
200
211
  if (maxBudget) {
201
212
  const budgetNum = parseFloat(maxBudget);
202
213
  if (isNaN(budgetNum) || budgetNum <= 0) {
@@ -242,7 +253,8 @@ class ClaudeProvider extends AIProvider {
242
253
  // - string: use this exact value for --model
243
254
  // - null: explicitly suppress --model (for tools that want the model set via env instead)
244
255
  const builtIn = CLAUDE_MODELS.find(m => m.id === modelId || (m.aliases && m.aliases.includes(modelId)));
245
- const configModel = configOverrides.models?.find(m => m.id === modelId);
256
+ const modelKeys = new Set([modelId, builtIn?.id, ...(builtIn?.aliases || [])].filter(Boolean));
257
+ const configModel = configOverrides.models?.find(m => modelKeys.has(m.id));
246
258
  const resolvedCliModel = configModel?.cli_model !== undefined
247
259
  ? configModel.cli_model
248
260
  : (builtIn?.cli_model !== undefined ? builtIn.cli_model : modelId);
@@ -34,6 +34,29 @@ const BIN_DIR = path.join(__dirname, '..', '..', 'bin');
34
34
  * Deprecated (April 2026): gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.1-codex
35
35
  */
36
36
  const CODEX_MODELS = [
37
+ {
38
+ id: 'gpt-5.5-high',
39
+ cli_model: 'gpt-5.5',
40
+ extra_args: ['-c', 'model_reasoning_effort="high"'],
41
+ name: 'GPT-5.5 High',
42
+ tier: 'thorough',
43
+ tagline: 'Latest Deep',
44
+ description: 'Latest-generation GPT model with high reasoning effort for demanding PR reviews, strong code understanding, and careful cross-file analysis.',
45
+ badge: 'Recommended',
46
+ badgeClass: 'badge-recommended',
47
+ default: true
48
+ },
49
+ {
50
+ id: 'gpt-5.5-xhigh',
51
+ cli_model: 'gpt-5.5',
52
+ extra_args: ['-c', 'model_reasoning_effort="xhigh"'],
53
+ name: 'GPT-5.5 XHigh',
54
+ tier: 'thorough',
55
+ tagline: 'Frontier Depth',
56
+ description: 'GPT-5.5 with extra-high reasoning effort for the hardest reviews: architecture, concurrency, security-sensitive changes, and large codebase context.',
57
+ badge: 'Max Reasoning',
58
+ badgeClass: 'badge-power'
59
+ },
37
60
  {
38
61
  id: 'gpt-5.4-high',
39
62
  // Alias keeps results/councils saved under the previous bare `gpt-5.4`
@@ -45,9 +68,8 @@ const CODEX_MODELS = [
45
68
  tier: 'thorough',
46
69
  tagline: 'Deep Review',
47
70
  description: 'GPT-5.4 with high reasoning effort for complex multi-file reviews, architectural consistency, and subtle behavioral regressions.',
48
- badge: 'Recommended',
49
- badgeClass: 'badge-recommended',
50
- default: true
71
+ badge: 'Previous Gen',
72
+ badgeClass: 'badge-power'
51
73
  },
52
74
  {
53
75
  id: 'gpt-5.4-xhigh',
@@ -60,28 +82,6 @@ const CODEX_MODELS = [
60
82
  badge: 'Extra High',
61
83
  badgeClass: 'badge-power'
62
84
  },
63
- {
64
- id: 'gpt-5.5-high',
65
- cli_model: 'gpt-5.5',
66
- extra_args: ['-c', 'model_reasoning_effort="high"'],
67
- name: 'GPT-5.5 High',
68
- tier: 'thorough',
69
- tagline: 'Latest Deep',
70
- description: 'Latest-generation GPT model with high reasoning effort for demanding PR reviews, strong code understanding, and careful cross-file analysis.',
71
- badge: 'High Effort',
72
- badgeClass: 'badge-power'
73
- },
74
- {
75
- id: 'gpt-5.5-xhigh',
76
- cli_model: 'gpt-5.5',
77
- extra_args: ['-c', 'model_reasoning_effort="xhigh"'],
78
- name: 'GPT-5.5 XHigh',
79
- tier: 'thorough',
80
- tagline: 'Frontier Depth',
81
- description: 'GPT-5.5 with extra-high reasoning effort for the hardest reviews: architecture, concurrency, security-sensitive changes, and large codebase context.',
82
- badge: 'Max Reasoning',
83
- badgeClass: 'badge-power'
84
- },
85
85
  {
86
86
  id: 'gpt-5.3-codex',
87
87
  name: 'GPT-5.3 Codex',
@@ -121,7 +121,7 @@ class CodexProvider extends AIProvider {
121
121
  * @param {Object} configOverrides.env - Additional environment variables
122
122
  * @param {Object[]} configOverrides.models - Custom model definitions
123
123
  */
124
- constructor(model = 'gpt-5.4-high', configOverrides = {}) {
124
+ constructor(model = 'gpt-5.5-high', configOverrides = {}) {
125
125
  super(model);
126
126
 
127
127
  // Command precedence: ENV > config > default
@@ -149,9 +149,9 @@ class CodexProvider extends AIProvider {
149
149
  // 2. "read-only" prevents ALL shell commands including git-diff-lines
150
150
  // 3. The AI is instructed to only analyze code, not modify it
151
151
  //
152
- // --full-auto: Non-interactive mode that auto-approves within sandbox bounds.
153
- // Combined with workspace-write sandbox, this limits damage to the worktree only.
154
- // Note: The -a flag is for interactive mode only; exec subcommand uses --full-auto.
152
+ // Newer Codex CLI versions deprecate --full-auto; `codex exec` is already
153
+ // non-interactive, and `--sandbox workspace-write` selects the required
154
+ // sandbox policy.
155
155
  //
156
156
  // Shell environment config:
157
157
  // - allow_login_shell=false: Prevents zsh from using -l flag, which would
@@ -164,7 +164,7 @@ class CodexProvider extends AIProvider {
164
164
  // (--dangerously-bypass-approvals-and-sandbox is the Codex CLI equivalent of Claude's --dangerously-skip-permissions)
165
165
  const sandboxArgs = configOverrides.yolo
166
166
  ? ['--dangerously-bypass-approvals-and-sandbox']
167
- : ['--sandbox', 'workspace-write', '--full-auto'];
167
+ : ['--sandbox', 'workspace-write'];
168
168
  // Shell env args prevent login shell from reconstructing PATH (orthogonal to
169
169
  // sandbox permissions). Overridable via configOverrides.args following the
170
170
  // same two-tier pattern as chat-providers.js: args replaces, extra_args appends.
@@ -352,7 +352,7 @@ class CodexProvider extends AIProvider {
352
352
 
353
353
  if (code !== 0) {
354
354
  logger.error(`${levelPrefix} Codex CLI exited with code ${code}`);
355
- settle(reject, new Error(`${levelPrefix} Codex CLI exited with code ${code}: ${stderr}`));
355
+ settle(reject, this.createExitError(code, stderr, levelPrefix));
356
356
  return;
357
357
  }
358
358
 
@@ -433,6 +433,37 @@ class CodexProvider extends AIProvider {
433
433
  });
434
434
  }
435
435
 
436
+ /**
437
+ * Build an actionable error for Codex CLI process failures.
438
+ *
439
+ * @param {number} code - Process exit code
440
+ * @param {string} stderr - Captured stderr
441
+ * @param {string} levelPrefix - Logging prefix
442
+ * @returns {Error}
443
+ */
444
+ createExitError(code, stderr, levelPrefix) {
445
+ const stderrText = stderr.trim();
446
+
447
+ if (this.isAuthError(stderrText)) {
448
+ return new Error(
449
+ `${levelPrefix} Codex CLI authentication failed. Check Codex CLI authentication and try again. ` +
450
+ `Original stderr: ${stderrText}`
451
+ );
452
+ }
453
+
454
+ return new Error(`${levelPrefix} Codex CLI exited with code ${code}: ${stderr}`);
455
+ }
456
+
457
+ /**
458
+ * Detect authentication failures reported by the Codex CLI.
459
+ *
460
+ * @param {string} stderr - Captured stderr
461
+ * @returns {boolean}
462
+ */
463
+ isAuthError(stderr) {
464
+ return /(?:401\s+Unauthorized|HTTP error:\s*401|Unauthorized)/i.test(stderr);
465
+ }
466
+
436
467
  /**
437
468
  * Parse Codex CLI JSONL response
438
469
  * Codex outputs JSONL with multiple event types:
@@ -664,7 +695,7 @@ class CodexProvider extends AIProvider {
664
695
 
665
696
  // Base args for extraction (read-only sandbox, no shell access needed)
666
697
  // Note: '-' (stdin marker) must come LAST, after any extra_args
667
- const baseArgs = ['exec', '-m', cliModel, '--json', '--sandbox', 'read-only', '--full-auto'];
698
+ const baseArgs = ['exec', '-m', cliModel, '--json', '--sandbox', 'read-only'];
668
699
 
669
700
  // Append stdin marker '-' at the end after all other args
670
701
  return [...baseArgs, ...extraArgs, '-'];
@@ -790,7 +821,7 @@ class CodexProvider extends AIProvider {
790
821
  }
791
822
 
792
823
  static getDefaultModel() {
793
- return 'gpt-5.4-high';
824
+ return 'gpt-5.5-high';
794
825
  }
795
826
 
796
827
  static getInstallInstructions() {
@@ -235,7 +235,7 @@ curl -s -X POST http://localhost:{{PORT}}/api/pr/OWNER/REPO/PR_NUMBER/analyses \
235
235
  -H 'Content-Type: application/json' \\
236
236
  -d '{
237
237
  "provider": "claude",
238
- "model": "claude-sonnet-4-5-20250929",
238
+ "model": "claude-opus-4-7",
239
239
  "tier": "balanced",
240
240
  "customInstructions": "Focus on security issues."
241
241
  }'
@@ -12,6 +12,7 @@ const logger = require('../utils/logger');
12
12
 
13
13
  // Default dependencies (overridable for testing)
14
14
  const defaults = { spawn };
15
+ const CODEX_SANDBOX_MODES = new Set(['workspace-write', 'read-only']);
15
16
 
16
17
  /**
17
18
  * Built-in chat provider definitions.
@@ -68,6 +69,7 @@ const CHAT_PROVIDERS = {
68
69
  name: 'Codex (JSON-RPC)',
69
70
  type: 'codex',
70
71
  command: 'codex',
72
+ sandbox: 'workspace-write',
71
73
  // Shell environment config prevents zsh -l from reconstructing PATH,
72
74
  // ensuring git-diff-lines and other bin/ scripts remain findable.
73
75
  args: [
@@ -126,6 +128,9 @@ function getChatProvider(id) {
126
128
  }
127
129
  if (overrides.load_skills !== undefined) provider.load_skills = overrides.load_skills;
128
130
  if (overrides.app_extensions !== undefined) provider.app_extensions = overrides.app_extensions;
131
+ if (provider.type === 'codex' && overrides.sandbox !== undefined) {
132
+ provider.sandbox = normalizeCodexSandbox(overrides.sandbox, id);
133
+ }
129
134
  if (provider.command.includes(' ')) {
130
135
  provider.useShell = true;
131
136
  }
@@ -152,6 +157,9 @@ function getChatProvider(id) {
152
157
  }
153
158
  if (overrides.load_skills !== undefined) merged.load_skills = overrides.load_skills;
154
159
  if (overrides.app_extensions !== undefined) merged.app_extensions = overrides.app_extensions;
160
+ if (base.type === 'codex' && overrides.sandbox !== undefined) {
161
+ merged.sandbox = normalizeCodexSandbox(overrides.sandbox, id);
162
+ }
155
163
  // For multi-word commands (e.g. "devx claude"), use shell mode
156
164
  if (merged.command && merged.command.includes(' ')) {
157
165
  merged.useShell = true;
@@ -159,6 +167,24 @@ function getChatProvider(id) {
159
167
  return merged;
160
168
  }
161
169
 
170
+ /**
171
+ * Validate the small user-facing Codex sandbox config surface.
172
+ * @param {string} sandbox
173
+ * @param {string} providerId
174
+ * @returns {string}
175
+ */
176
+ function normalizeCodexSandbox(sandbox, providerId = 'codex') {
177
+ if (CODEX_SANDBOX_MODES.has(sandbox)) {
178
+ return sandbox;
179
+ }
180
+
181
+ logger.warn(
182
+ `[ChatProviders] Invalid sandbox "${sandbox}" for ${providerId}; ` +
183
+ 'falling back to workspace-write. Supported values: workspace-write, read-only.'
184
+ );
185
+ return 'workspace-write';
186
+ }
187
+
162
188
  /**
163
189
  * Get all chat provider definitions (built-in + dynamic from config).
164
190
  * @returns {Array<Object>}
@@ -13,6 +13,7 @@
13
13
  const { EventEmitter } = require('events');
14
14
  const { spawn } = require('child_process');
15
15
  const { createInterface } = require('readline');
16
+ const { quoteShellArgs } = require('../ai/provider');
16
17
  const logger = require('../utils/logger');
17
18
  const { version: pkgVersion } = require('../../package.json');
18
19
 
@@ -22,6 +23,34 @@ const defaults = {
22
23
  createInterface,
23
24
  };
24
25
 
26
+ const DEFAULT_APPROVAL_POLICY = 'never';
27
+ const DEFAULT_SANDBOX_MODE = 'workspace-write';
28
+ const ACTIVE_TURN_STATUSES = new Set(['inProgress', 'running', 'working']);
29
+ const TERMINAL_TURN_STATUSES = new Set(['completed', 'failed', 'interrupted', 'cancelled', 'canceled']);
30
+
31
+ function buildSandboxPolicy(sandbox = DEFAULT_SANDBOX_MODE) {
32
+ if (sandbox === 'read-only') {
33
+ return {
34
+ type: 'readOnly',
35
+ networkAccess: true,
36
+ };
37
+ }
38
+
39
+ return {
40
+ type: 'workspaceWrite',
41
+ writableRoots: [],
42
+ networkAccess: true,
43
+ excludeTmpdirEnvVar: false,
44
+ excludeSlashTmp: false,
45
+ };
46
+ }
47
+
48
+ function compactParams(params) {
49
+ return Object.fromEntries(
50
+ Object.entries(params).filter(([, value]) => value !== undefined && value !== null)
51
+ );
52
+ }
53
+
25
54
  class CodexBridge extends EventEmitter {
26
55
  /**
27
56
  * @param {Object} options
@@ -33,6 +62,8 @@ class CodexBridge extends EventEmitter {
33
62
  * @param {Object} [options.env] - Extra env vars for subprocess
34
63
  * @param {boolean} [options.useShell] - Use shell mode for multi-word commands
35
64
  * @param {string} [options.resumeThreadId] - Thread ID to resume
65
+ * @param {string|null} [options.sandbox] - Thread sandbox mode (default: 'workspace-write')
66
+ * @param {Object|null} [options.sandboxPolicy] - Turn sandbox policy override for tests
36
67
  * @param {Object} [options._deps] - Dependency injection for testing
37
68
  */
38
69
  constructor(options = {}) {
@@ -43,6 +74,11 @@ class CodexBridge extends EventEmitter {
43
74
  this.env = options.env || {};
44
75
  this.useShell = options.useShell || false;
45
76
  this.resumeThreadId = options.resumeThreadId || null;
77
+ this.approvalPolicy = DEFAULT_APPROVAL_POLICY;
78
+ this.sandbox = options.sandbox !== undefined ? options.sandbox : DEFAULT_SANDBOX_MODE;
79
+ this.sandboxPolicy = options.sandboxPolicy !== undefined
80
+ ? options.sandboxPolicy
81
+ : buildSandboxPolicy(this.sandbox);
46
82
 
47
83
  // Command resolution: constructor option → env var → default
48
84
  this.codexCommand = options.codexCommand
@@ -81,13 +117,9 @@ class CodexBridge extends EventEmitter {
81
117
  const args = [...this.codexArgs];
82
118
  const useShell = this.useShell;
83
119
 
84
- // Append model flag if configured
85
- if (this.model) {
86
- args.push('--model', this.model);
87
- }
88
-
89
- // For multi-word commands (e.g. "devx codex"), use shell mode
90
- const spawnCmd = useShell ? `${command} ${args.join(' ')}` : command;
120
+ // For multi-word commands (e.g. "devx codex"), use shell mode. Quote args
121
+ // so TOML config values like include_only=["PATH","HOME"] survive the shell.
122
+ const spawnCmd = useShell ? `${command} ${quoteShellArgs(args).join(' ')}` : command;
91
123
  const spawnArgs = useShell ? [] : args;
92
124
 
93
125
  logger.info(`[CodexBridge] Starting Codex agent: ${command} ${args.join(' ')}`);
@@ -188,13 +220,13 @@ class CodexBridge extends EventEmitter {
188
220
 
189
221
  // 3. Start or resume thread
190
222
  if (this.resumeThreadId) {
191
- const result = await this._sendRequest('thread/resume', {
223
+ const result = await this._sendRequest('thread/resume', this._buildThreadParams({
192
224
  threadId: this.resumeThreadId,
193
- });
225
+ }));
194
226
  this._threadId = result.thread?.id || result.threadId || this.resumeThreadId;
195
227
  logger.info(`[CodexBridge] Thread resumed: ${this._threadId}`);
196
228
  } else {
197
- const result = await this._sendRequest('thread/start', {});
229
+ const result = await this._sendRequest('thread/start', this._buildThreadParams());
198
230
  this._threadId = result.thread?.id || result.threadId;
199
231
  if (!this._threadId) {
200
232
  throw new Error('thread/start response missing thread ID');
@@ -206,6 +238,41 @@ class CodexBridge extends EventEmitter {
206
238
  this.emit('session', { threadId: this._threadId });
207
239
  }
208
240
 
241
+ /**
242
+ * Build thread start/resume settings that keep Codex chat able to call the
243
+ * pair-review API from the review worktree.
244
+ * @param {Object} [extra] - Additional params, e.g. threadId for resume.
245
+ * @returns {Object}
246
+ */
247
+ _buildThreadParams(extra = {}) {
248
+ return compactParams({
249
+ ...extra,
250
+ cwd: this.cwd,
251
+ model: this.model,
252
+ approvalPolicy: this.approvalPolicy,
253
+ // thread/start uses the same sandbox enum as the Codex CLI, while
254
+ // turn/start.sandboxPolicy uses the v2 camelCase policy object.
255
+ sandbox: this.sandbox,
256
+ });
257
+ }
258
+
259
+ /**
260
+ * Build turn/start params. App-server uses the v2 camelCase SandboxPolicy
261
+ * shape here, not the `codex exec --sandbox workspace-write` CLI flag.
262
+ * @param {Array<Object>} input
263
+ * @returns {Object}
264
+ */
265
+ _buildTurnStartParams(input) {
266
+ return compactParams({
267
+ threadId: this._threadId,
268
+ input,
269
+ cwd: this.cwd,
270
+ model: this.model,
271
+ approvalPolicy: this.approvalPolicy,
272
+ sandboxPolicy: this.sandboxPolicy,
273
+ });
274
+ }
275
+
209
276
  /**
210
277
  * Send a user message to the Codex agent.
211
278
  * Fire-and-forget: returns immediately, emits events as the agent responds.
@@ -232,14 +299,11 @@ class CodexBridge extends EventEmitter {
232
299
  // not by this response. Store turnId for abort support.
233
300
  // Codex app-server expects `input` as an array of typed objects, not a
234
301
  // plain string. See https://developers.openai.com/codex/app-server/
235
- this._sendRequest('turn/start', {
236
- threadId: this._threadId,
237
- input: [{ type: 'text', text: messageContent }],
238
- approvalPolicy: 'never',
239
- })
302
+ this._sendRequest('turn/start', this._buildTurnStartParams([{ type: 'text', text: messageContent }]))
240
303
  .then((result) => {
241
- if (result && result.turnId) {
242
- this._turnId = result.turnId;
304
+ const turnId = this._extractTurnId(result);
305
+ if (turnId) {
306
+ this._turnId = turnId;
243
307
  }
244
308
  })
245
309
  .catch((err) => {
@@ -468,7 +532,14 @@ class CodexBridge extends EventEmitter {
468
532
  break;
469
533
 
470
534
  case 'turn/started':
471
- this.emit('status', { status: 'working' });
535
+ this._handleTurnStarted(params);
536
+ break;
537
+
538
+ case 'turn/statusChanged':
539
+ this._handleTurnStatusChanged(params);
540
+ break;
541
+
542
+ case 'remoteControl/status/changed':
472
543
  break;
473
544
 
474
545
  case 'item/started':
@@ -490,13 +561,64 @@ class CodexBridge extends EventEmitter {
490
561
  */
491
562
  _handleDelta(params) {
492
563
  if (!params) return;
493
- const text = params.delta || params.text;
564
+ let text = params.delta || params.text;
494
565
  if (text) {
566
+ text = this._normalizeDeltaBoundary(text);
495
567
  this._accumulatedText += text;
496
568
  this.emit('delta', { text });
497
569
  }
498
570
  }
499
571
 
572
+ /**
573
+ * Preserve readable boundaries when app-server splits prose deltas without
574
+ * carrying the whitespace between adjacent chunks.
575
+ * @param {string} text
576
+ * @returns {string}
577
+ */
578
+ _normalizeDeltaBoundary(text) {
579
+ const previous = this._accumulatedText;
580
+ if (
581
+ previous &&
582
+ /[.!?]$/.test(previous) &&
583
+ /^[A-Z]/.test(text)
584
+ ) {
585
+ return ` ${text}`;
586
+ }
587
+ return text;
588
+ }
589
+
590
+ /**
591
+ * Handle turn started notifications and capture the active turn id.
592
+ * @param {Object} params
593
+ */
594
+ _handleTurnStarted(params) {
595
+ const turnId = this._extractTurnId(params);
596
+ if (turnId) {
597
+ this._turnId = turnId;
598
+ }
599
+ this.emit('status', { status: 'working' });
600
+ }
601
+
602
+ /**
603
+ * Handle turn status changes without reviving a completed turn.
604
+ * @param {Object} params
605
+ */
606
+ _handleTurnStatusChanged(params) {
607
+ const status = params?.status || params?.turn?.status;
608
+
609
+ if (ACTIVE_TURN_STATUSES.has(status)) {
610
+ this._handleTurnStarted(params);
611
+ return;
612
+ }
613
+
614
+ if (TERMINAL_TURN_STATUSES.has(status)) {
615
+ this._turnId = null;
616
+ if (status === 'failed') {
617
+ this._inMessage = false;
618
+ }
619
+ }
620
+ }
621
+
500
622
  /**
501
623
  * Handle turn completion.
502
624
  * @param {Object} params
@@ -530,14 +652,40 @@ class CodexBridge extends EventEmitter {
530
652
  if (!params) return;
531
653
  const type = params.type || params.itemType;
532
654
  if (type === 'command' || type === 'tool_call' || type === 'function_call') {
533
- this.emit('tool_use', {
534
- toolCallId: params.itemId || params.id,
535
- toolName: params.name || params.title || params.command || type,
536
- status: 'start',
537
- });
655
+ this.emit('tool_use', this._buildToolUseEvent(params, 'start'));
538
656
  }
539
657
  }
540
658
 
659
+ /**
660
+ * Build the normalized tool event shape consumed by the chat broadcaster.
661
+ * Command items are represented as bash calls so internal pair-review API
662
+ * curls can be suppressed consistently across providers.
663
+ * @param {Object} params
664
+ * @param {'start'|'end'} status
665
+ * @returns {Object}
666
+ */
667
+ _buildToolUseEvent(params, status) {
668
+ const type = params.type || params.itemType;
669
+ const toolCallId = params.itemId || params.id;
670
+ if (type === 'command') {
671
+ const event = {
672
+ toolCallId,
673
+ toolName: 'bash',
674
+ status,
675
+ };
676
+ if (params.command) {
677
+ event.args = { command: params.command };
678
+ }
679
+ return event;
680
+ }
681
+
682
+ return {
683
+ toolCallId,
684
+ toolName: params.name || params.title || params.command || type,
685
+ status,
686
+ };
687
+ }
688
+
541
689
  /**
542
690
  * Handle item/completed — emit tool_use end for command-type items.
543
691
  * @param {Object} params
@@ -546,11 +694,7 @@ class CodexBridge extends EventEmitter {
546
694
  if (!params) return;
547
695
  const type = params.type || params.itemType;
548
696
  if (type === 'command' || type === 'tool_call' || type === 'function_call') {
549
- this.emit('tool_use', {
550
- toolCallId: params.itemId || params.id,
551
- toolName: params.name || params.title || params.command || type,
552
- status: 'end',
553
- });
697
+ this.emit('tool_use', this._buildToolUseEvent(params, 'end'));
554
698
  }
555
699
  }
556
700
 
@@ -573,11 +717,76 @@ class CodexBridge extends EventEmitter {
573
717
  return;
574
718
  }
575
719
 
720
+ if (method === 'item/commandExecution/requestApproval') {
721
+ logger.debug(`[CodexBridge] Auto-approving command execution request (id=${id})`);
722
+ this._sendResponse(id, { decision: 'accept' });
723
+ return;
724
+ }
725
+
726
+ if (method === 'execCommandApproval') {
727
+ logger.debug(`[CodexBridge] Auto-approving execCommandApproval request (id=${id})`);
728
+ this._sendResponse(id, { decision: 'approved' });
729
+ return;
730
+ }
731
+
732
+ if (method === 'item/permissions/requestApproval') {
733
+ logger.debug(`[CodexBridge] Granting requested network permissions (id=${id})`);
734
+ this._sendResponse(id, this._buildPermissionsApproval(params));
735
+ return;
736
+ }
737
+
738
+ if (method === 'item/fileChange/requestApproval') {
739
+ logger.debug(`[CodexBridge] Declining file change approval request (id=${id})`);
740
+ this._sendResponse(id, { decision: 'decline' });
741
+ return;
742
+ }
743
+
744
+ if (method === 'applyPatchApproval') {
745
+ logger.debug(`[CodexBridge] Denying applyPatchApproval request (id=${id})`);
746
+ this._sendResponse(id, { decision: 'denied' });
747
+ return;
748
+ }
749
+
576
750
  // Unknown server request — respond with error to avoid hangs
577
751
  logger.warn(`[CodexBridge] Unknown server request: ${method} (id=${id})`);
578
752
  this._sendErrorResponse(id, -32601, `Method not found: ${method}`);
579
753
  }
580
754
 
755
+ /**
756
+ * Build a response for Codex v2 permission requests. For pair-review chat we
757
+ * grant network permission so localhost API `curl` calls can proceed, while
758
+ * avoiding broad file-system permission grants beyond the configured sandbox.
759
+ * @param {Object} params
760
+ * @returns {Object}
761
+ */
762
+ _buildPermissionsApproval(params = {}) {
763
+ const requested = params.permissions || {};
764
+ const permissions = {};
765
+
766
+ if (requested.network) {
767
+ permissions.network = requested.network;
768
+ } else {
769
+ // Codex chat needs localhost network access to call pair-review's API.
770
+ // Grant it even if app-server requests permissions without a network body.
771
+ permissions.network = { enabled: true };
772
+ }
773
+
774
+ return {
775
+ permissions,
776
+ scope: 'session',
777
+ strictAutoReview: false,
778
+ };
779
+ }
780
+
781
+ /**
782
+ * Extract a turn id from legacy and current app-server shapes.
783
+ * @param {Object} value
784
+ * @returns {string|null}
785
+ */
786
+ _extractTurnId(value) {
787
+ return value?.turn?.id || value?.turnId || value?.id || null;
788
+ }
789
+
581
790
  /**
582
791
  * Send a JSON-RPC success response.
583
792
  * @param {number|string} id - Request ID
@@ -571,6 +571,7 @@ class ChatSessionManager {
571
571
  codexArgs: def?.args,
572
572
  env: def?.env,
573
573
  useShell: def?.useShell,
574
+ sandbox: def?.sandbox,
574
575
  });
575
576
  }
576
577
  // Pi provider — resolve config overrides (command, model, env) from provider def.
package/src/main.js CHANGED
@@ -118,8 +118,9 @@ OPTIONS:
118
118
  The web UI also starts for the human reviewer.
119
119
  --model <name> Override the AI model. Claude Code is the default provider.
120
120
  Available models: opus, sonnet, haiku (Claude Code);
121
- also: opus-4.5, opus-4.6-low, opus-4.6-medium, opus-4.6-1m,
122
- opus-4.7-high, opus-4.7-xhigh
121
+ also: opus-4.8-xhigh, opus-4.8-high, opus-4.7-xhigh,
122
+ opus-4.7-high, opus-4.6-high, opus-4.6-1m, sonnet-4.6
123
+ (opus is Opus 4.7 XHigh, the default)
123
124
  or use provider-specific models with Gemini/Codex
124
125
  --use-checkout Use current directory instead of creating worktree
125
126
  (automatic in GitHub Actions)