ai-cli-mcp 2.18.0 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/README.ja.md +37 -11
  3. package/README.md +44 -11
  4. package/dist/app/cli.js +2 -1
  5. package/dist/app/mcp.js +65 -13
  6. package/dist/cli-builder.js +13 -6
  7. package/dist/cli-process-service.js +81 -95
  8. package/dist/cli-utils.js +6 -0
  9. package/dist/cli.js +1 -1
  10. package/dist/model-catalog.js +3 -2
  11. package/dist/parsers.js +111 -8
  12. package/dist/process-service.js +5 -4
  13. package/package.json +26 -2
  14. package/server.json +3 -3
  15. package/.gemini/settings.json +0 -11
  16. package/.github/dependabot.yml +0 -28
  17. package/.github/pull_request_template.md +0 -28
  18. package/.github/workflows/ci.yml +0 -34
  19. package/.github/workflows/dependency-review.yml +0 -22
  20. package/.github/workflows/publish.yml +0 -89
  21. package/.github/workflows/test.yml +0 -20
  22. package/.github/workflows/watch-session-prs.yml +0 -276
  23. package/.husky/pre-commit +0 -1
  24. package/.mcp.json +0 -11
  25. package/.releaserc.json +0 -18
  26. package/.vscode/settings.json +0 -3
  27. package/CONTRIBUTING.md +0 -81
  28. package/dist/__tests__/app-cli.test.js +0 -392
  29. package/dist/__tests__/cli-bin-smoke.test.js +0 -101
  30. package/dist/__tests__/cli-builder.test.js +0 -442
  31. package/dist/__tests__/cli-process-service.test.js +0 -655
  32. package/dist/__tests__/cli-utils.test.js +0 -171
  33. package/dist/__tests__/e2e.test.js +0 -256
  34. package/dist/__tests__/edge-cases.test.js +0 -130
  35. package/dist/__tests__/error-cases.test.js +0 -292
  36. package/dist/__tests__/mcp-contract.test.js +0 -636
  37. package/dist/__tests__/mocks.js +0 -32
  38. package/dist/__tests__/model-alias.test.js +0 -36
  39. package/dist/__tests__/parsers.test.js +0 -500
  40. package/dist/__tests__/peek.test.js +0 -36
  41. package/dist/__tests__/process-management.test.js +0 -871
  42. package/dist/__tests__/server.test.js +0 -809
  43. package/dist/__tests__/setup.js +0 -11
  44. package/dist/__tests__/utils/claude-mock.js +0 -80
  45. package/dist/__tests__/utils/mcp-client.js +0 -121
  46. package/dist/__tests__/utils/opencode-mock.js +0 -91
  47. package/dist/__tests__/utils/persistent-mock.js +0 -28
  48. package/dist/__tests__/utils/test-helpers.js +0 -11
  49. package/dist/__tests__/validation.test.js +0 -308
  50. package/dist/__tests__/version-print.test.js +0 -65
  51. package/dist/__tests__/wait.test.js +0 -260
  52. package/docs/RELEASE_CHECKLIST.md +0 -65
  53. package/docs/cli-architecture.md +0 -275
  54. package/docs/concept.md +0 -154
  55. package/docs/development.md +0 -156
  56. package/docs/e2e-testing.md +0 -148
  57. package/docs/prd.md +0 -146
  58. package/docs/session-stacking.md +0 -67
  59. package/src/__tests__/app-cli.test.ts +0 -495
  60. package/src/__tests__/cli-bin-smoke.test.ts +0 -136
  61. package/src/__tests__/cli-builder.test.ts +0 -549
  62. package/src/__tests__/cli-process-service.test.ts +0 -759
  63. package/src/__tests__/cli-utils.test.ts +0 -200
  64. package/src/__tests__/e2e.test.ts +0 -311
  65. package/src/__tests__/edge-cases.test.ts +0 -176
  66. package/src/__tests__/error-cases.test.ts +0 -370
  67. package/src/__tests__/mcp-contract.test.ts +0 -755
  68. package/src/__tests__/mocks.ts +0 -35
  69. package/src/__tests__/model-alias.test.ts +0 -44
  70. package/src/__tests__/parsers.test.ts +0 -564
  71. package/src/__tests__/peek.test.ts +0 -44
  72. package/src/__tests__/process-management.test.ts +0 -1043
  73. package/src/__tests__/server.test.ts +0 -1020
  74. package/src/__tests__/setup.ts +0 -13
  75. package/src/__tests__/utils/claude-mock.ts +0 -87
  76. package/src/__tests__/utils/mcp-client.ts +0 -159
  77. package/src/__tests__/utils/opencode-mock.ts +0 -108
  78. package/src/__tests__/utils/persistent-mock.ts +0 -33
  79. package/src/__tests__/utils/test-helpers.ts +0 -13
  80. package/src/__tests__/validation.test.ts +0 -369
  81. package/src/__tests__/version-print.test.ts +0 -81
  82. package/src/__tests__/wait.test.ts +0 -302
  83. package/src/app/cli.ts +0 -424
  84. package/src/app/mcp.ts +0 -466
  85. package/src/bin/ai-cli-mcp.ts +0 -7
  86. package/src/bin/ai-cli.ts +0 -11
  87. package/src/cli-builder.ts +0 -274
  88. package/src/cli-parse.ts +0 -105
  89. package/src/cli-process-service.ts +0 -708
  90. package/src/cli-utils.ts +0 -258
  91. package/src/cli.ts +0 -124
  92. package/src/model-catalog.ts +0 -87
  93. package/src/parsers.ts +0 -840
  94. package/src/peek.ts +0 -95
  95. package/src/process-result.ts +0 -88
  96. package/src/process-service.ts +0 -367
  97. package/src/server.ts +0 -10
  98. package/tsconfig.json +0 -16
  99. package/vitest.config.e2e.ts +0 -27
  100. package/vitest.config.ts +0 -22
  101. package/vitest.config.unit.ts +0 -28
package/CHANGELOG.md CHANGED
@@ -1,3 +1,29 @@
1
+ # [2.20.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.19.0...v2.20.0) (2026-04-18)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * derive mcp server version from package ([d055ae8](https://github.com/mkXultra/ai-cli-mcp/commit/d055ae8d7900e879633711d79b6ae9fff95ce7f1))
7
+ * harden cli termination handling ([8497f22](https://github.com/mkXultra/ai-cli-mcp/commit/8497f2282f53e24d881825fbbe52c61b8f488166))
8
+ * keep mcp doctor reachable with invalid cli env ([aee7783](https://github.com/mkXultra/ai-cli-mcp/commit/aee7783f09413118735e640a2b8745d0140bff31))
9
+ * persist detached cli exit codes ([53cec30](https://github.com/mkXultra/ai-cli-mcp/commit/53cec30c60dd3a957a31403546989ffac25b26f9))
10
+ * restrict npm package contents ([cf1de69](https://github.com/mkXultra/ai-cli-mcp/commit/cf1de6972592645ffdbaa149d23293cb99314066))
11
+ * sync server manifest during release ([9c7059f](https://github.com/mkXultra/ai-cli-mcp/commit/9c7059fe35a114742205f0440308fbbbd7020951))
12
+
13
+
14
+ ### Features
15
+
16
+ * expose doctor as mcp tool ([c33e1de](https://github.com/mkXultra/ai-cli-mcp/commit/c33e1de3ed768735501a3308ccb5b496e87e230a))
17
+ * expose models as mcp tool ([4bae139](https://github.com/mkXultra/ai-cli-mcp/commit/4bae1391a9e5413c25cf393ecb186b387e11b5eb))
18
+ * support new Claude effort levels ([ec66dc9](https://github.com/mkXultra/ai-cli-mcp/commit/ec66dc9cbb260eb2f0401185a1c9e83e670d0cd1))
19
+
20
+ # [2.19.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.18.0...v2.19.0) (2026-04-15)
21
+
22
+
23
+ ### Features
24
+
25
+ * peekコマンドにForgeエージェントのベストエフォートサポートを追加 ([7c01958](https://github.com/mkXultra/ai-cli-mcp/commit/7c01958b0c9a8133da07c556b303481abd511b6b))
26
+
1
27
  # [2.18.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.17.0...v2.18.0) (2026-04-12)
2
28
 
3
29
 
package/README.ja.md CHANGED
@@ -24,7 +24,7 @@ Cursorなどのエディタが、複雑な手順を伴う編集や操作に苦
24
24
  - OpenCode を非対話 JSON モードで実行(`opencode run --format json --dir <workFolder> <prompt>` を使用)
25
25
  - 複数のAIモデルのサポート:
26
26
  - Claude (sonnet, sonnet[1m], opus, opusplan, haiku)
27
- - Codex (gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, など)
27
+ - Codex (`codex` は Codex CLI 側の設定済みデフォルトモデル、加えて gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, など)
28
28
  - Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview)
29
29
  - Forge (`forge`)
30
30
  - OpenCode (`opencode` と `oc-<provider/model>` ラッパー。例: `oc-openai/gpt-5.4`)
@@ -192,6 +192,7 @@ macOSでは、これらのツールを初めて実行する際にフォルダへ
192
192
  ```bash
193
193
  ai-cli doctor
194
194
  ai-cli models
195
+ ai-cli run --cwd "$PWD" --model codex --prompt "Codex CLI のデフォルトモデルで実行"
195
196
  ai-cli run --cwd "$PWD" --model codex-ultra --prompt "fix failing tests"
196
197
  ai-cli run --cwd "$PWD" --model opencode --session-id ses_existing --prompt "この OpenCode セッションを継続して"
197
198
  ai-cli run --cwd "$PWD" --model oc-openai/gpt-5.4 --prompt "明示的な OpenCode モデルで実行"
@@ -214,7 +215,9 @@ OpenCode のモデル指定は次の 2 つを受け付けます。
214
215
 
215
216
  `ai-cli models` は OpenCode を機械可読に `opencode: ["opencode"]` と `dynamicModelBackends.opencode` で公開します。実際に利用可能なバックエンドネイティブなモデル一覧は `opencode models` で確認してください。
216
217
 
217
- `doctor` CLI バイナリの存在確認と path 解決だけを行います。ログイン状態や利用規約同意までは確認しません。
218
+ Codex のモデル指定では、`codex` を使うと Codex CLI 側の設定済みデフォルトモデルを使用します。Codex CLI がアカウント種別によって明示的な `gpt-*` モデル指定を受け付けない場合に有用です。
219
+
220
+ `doctor` は CLI バイナリの利用可否と path 解決だけを確認します。JSON 出力には `checks` ブロックが含まれ、ログイン状態と利用規約同意は未確認として示されます。
218
221
 
219
222
  ## CLI の状態保存先
220
223
 
@@ -229,13 +232,13 @@ OpenCode のモデル指定は次の 2 つを受け付けます。
229
232
  - `meta.json`
230
233
  - `stdout.log`
231
234
  - `stderr.log`
232
- - `exit-status.json`(detached な OpenCode 実行用)
235
+ - `exit-status.json`(detached 実行用)
233
236
 
234
237
  完了済み・失敗済みの実行は `ai-cli cleanup` で削除できます。`running` のものは保持されます。
235
238
 
236
- ## 既知の制約
239
+ ## Exit status の追跡
237
240
 
238
- detached 実行された `ai-cli` では、OpenCode バックエンドに限り自然終了時の exit status を永続化します。そのため OpenCode の失敗終了は非ゼロ exit code を含めて `failed` として扱われ、結果では生の `stdout` / `stderr` を保持します。一方、他の detached バックエンドでは従来どおり、より広い exit-status 追跡が追加されるまでは自然終了した実行が信頼できる exit code なしで `completed` と見なされる制約が残ります。
241
+ detached 実行された `ai-cli` は、すべての対応バックエンドで自然終了時の exit status `exit-status.json` に永続化します。非ゼロ終了は記録された `exitCode` 付きの `failed` として扱われ、ゼロ終了は `exitCode: 0` 付きの `completed` として扱われます。`ai-cli kill` SIGTERM による終了を failed exit として記録し、追跡中プロセスが exit metadata なしで消えた場合も成功とは見なさず `failed` として扱います。
239
242
 
240
243
  ## MCPクライアントへの接続
241
244
 
@@ -256,13 +259,13 @@ Claude CLI、Codex CLI、Gemini CLI、Forge CLI、または OpenCode を使用
256
259
  - `prompt_file` (string, 任意): プロンプトを含むファイルへのパス。`prompt` または `prompt_file` のいずれかが必須です。絶対パス、または `workFolder` からの相対パスが指定可能です。
257
260
  - `workFolder` (string, 必須): CLIを実行する作業ディレクトリ。絶対パスである必要があります。
258
261
  - **モデル (Models):**
259
- - **Ultra エイリアス:** `claude-ultra` (自動的に high effort に設定), `codex-ultra` (自動的に xhigh reasoning に設定), `gemini-ultra`
262
+ - **Ultra エイリアス:** `claude-ultra` (自動的に max effort に設定), `codex-ultra` (自動的に xhigh reasoning に設定), `gemini-ultra`
260
263
  - Claude: `sonnet`, `sonnet[1m]`, `opus`, `opusplan`, `haiku`
261
- - Codex: `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
264
+ - Codex: `codex`(Codex CLI 側の設定済みデフォルトモデル)および `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
262
265
  - Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro-preview`, `gemini-3-pro-preview`, `gemini-3-flash-preview`
263
266
  - Forge: `forge`
264
267
  - OpenCode: `opencode`(設定済みのデフォルトモデル)および `oc-openai/gpt-5.4` のような明示ラッパー
265
- - `reasoning_effort` (string, 任意): Claude と Codex の推論制御。Claude では `--effort` を使います(許容値: "low", "medium", "high")。Codex では `model_reasoning_effort` を使います(許容値: "low", "medium", "high", "xhigh")。Gemini、Forge、OpenCode では `reasoning_effort` はサポートしません。
268
+ - `reasoning_effort` (string, 任意): Claude と Codex の推論制御。Claude では `--effort` を使います(許容値: "low", "medium", "high", "xhigh", "max")。Codex では `model_reasoning_effort` を使います(許容値: "low", "medium", "high", "xhigh")。Gemini、Forge、OpenCode では `reasoning_effort` はサポートしません。
266
269
  - `session_id` (string, 任意): 以前のセッションを再開するためのセッションID。Claude、Codex、Gemini、Forge、OpenCode でサポートされます。OpenCode は `--session` による in-place resume で再開し、`oc-<provider/model>` の明示指定と併用できます。
267
270
 
268
271
  ### `wait`
@@ -297,9 +300,9 @@ ai-cli peek 123 --time 10 --include-tool-calls
297
300
  - `peek_started_at` と `events[].ts` は、ai-cli-mcp サーバー側の UTC RFC3339 タイムスタンプです。`peek_started_at` は検証とリスナー登録後に観測ウィンドウが始まった時刻、`events[].ts` は ai-cli-mcp がイベントを観測して受理した時刻です。
298
301
  - 観測ウィンドウは `peek_time_sec` が経過するか、対象プロセスがすべて終端状態になった時点で終了します。
299
302
  - 観測開始前のイベントは返しません。同じPIDへの同時 `peek` は可能で、それぞれ独立した観測ウィンドウを持つため、イベントが重複して返ることがあります。
300
- - メッセージイベントは、Codex の `agent_message` text、Claude assistant の text content、OpenCode の `type: "text"` かつ `part.type` が `"text"` のイベント、Gemini stream-json の `role` が `"assistant"` の `message` イベントから認識します。
301
- - tool call を含める場合、Codex の command/MCP call、Claude の tool use/result、Gemini の tool use/result、OpenCode の完了済み tool use event を正規化した `tool_call` イベントとして返します。tool summary は tool 名と入力メタデータだけから作る短い1行文字列です。raw `stdout` / `stderr`、raw JSONL、tool result output、コマンド出力、`result.response`、stats、token usage、verbose メタデータは除外します。
302
- - 未知のイベント形状はデフォルトで拒否します。Forge など、まだ明示対応されていない管理対象エージェントは、実際のプロセス状態を返しつつ、`events: []`、`truncated: false`、`error: null` にします。
303
+ - メッセージイベントは、Codex の `agent_message` text、Claude assistant の text content、OpenCode の `type: "text"` かつ `part.type` が `"text"` のイベント、Gemini stream-json の `role` が `"assistant"` の `message` イベント、Forge の `Summary:` または `Completed successfully:` で始まる plain-text 行から best-effort に認識します。
304
+ - tool call を含める場合、Codex の command/MCP call、Claude の tool use/result、Gemini の tool use/result、OpenCode の完了済み tool use event、Forge の低精度な `Execute` / `Finished` marker を正規化した `tool_call` イベントとして返します。tool summary は tool 名と入力メタデータだけから作る短い1行文字列です。Forge のコマンド出力自体は tail せず、公開しません。raw `stdout` / `stderr`、raw JSONL、tool result output、コマンド出力、`result.response`、stats、token usage、verbose メタデータは除外します。
305
+ - 未知のイベント形状はデフォルトで拒否します。まだ明示対応されていない管理対象エージェントは、実際のプロセス状態を返しつつ、`events: []`、`truncated: false`、`error: null` にします。
303
306
  - 各PIDごとに、観測ウィンドウ内で最初に観測された50件までを保持します。それ以降のイベントを捨てた場合は `truncated` が `true` になります。
304
307
  - `status` は `running`、`completed`、`failed`、`not_found` のいずれかで、観測ウィンドウ終了時点の状態を表します。
305
308
  - `agent` は `claude`、`codex`、`gemini`、`forge`、`opencode`、将来追加される追跡済みエージェント文字列、または `null` です。`null` はプロセスが見つからない、またはエージェント種別を判断できない場合を表します。
@@ -338,6 +341,14 @@ ai-cli peek 123 --time 10 --include-tool-calls
338
341
 
339
342
  実行中および完了したすべてのAIエージェントプロセスを、ステータス、PID、基本情報とともにリストアップします。
340
343
 
344
+ ### `doctor`
345
+
346
+ MCP クライアントから、対応する AI CLI バイナリの利用可否と path 解決を確認します。`ai-cli doctor` と同じく `checks` ブロックを返し、ログイン状態や利用規約同意は確認しません。
347
+
348
+ ### `models`
349
+
350
+ MCP クライアントから、対応モデル名、エイリアス、動的バックエンドの discovery hint を確認します。`ai-cli models` と同じ構造化 payload を返します。
351
+
341
352
  ### `get_result`
342
353
 
343
354
  PIDを指定して、AIエージェントプロセスの現在の出力とステータスを取得します。
@@ -380,8 +391,23 @@ npm run test:unit
380
391
 
381
392
  # E2Eテストの実行(モック使用)
382
393
  npm run test:e2e
394
+
395
+ # 公開 npm package の contents smoke test
396
+ npm run test:package
397
+
398
+ # GitHub Actions と同じ deterministic PR/release gate
399
+ # これだけでは実際の外部 CLI 実行は有効になりません
400
+ npm run test:release
401
+
402
+ # リリース前の live E2E(実際にインストール済み AI CLI を叩く)
403
+ ACM_LIVE_E2E=1 ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
404
+
405
+ # ai-cli と MCP server surface の両方を叩く live E2E
406
+ ACM_LIVE_E2E=1 ACM_LIVE_E2E_SURFACE=all ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
383
407
  ```
384
408
 
409
+ live E2E は opt-in です。インストール済みかつ認証済みの外部 CLI、ネットワーク、provider 側の可用性、コスト予算に依存するため、通常の `npm test` には含めていません。`ACM_LIVE_E2E_SURFACE` は既定で `cli` です。MCP server surface も含める場合は `mcp` または `all` を指定します。
410
+
385
411
  ## 高度な設定(オプション)
386
412
 
387
413
  通常の利用では設定不要ですが、CLIツールのパスをカスタマイズしたい場合やデバッグが必要な場合に使用できる環境変数です。
package/README.md CHANGED
@@ -24,7 +24,7 @@ This MCP server provides tools that can be used by LLMs to interact with AI CLI
24
24
  - Execute Gemini CLI with automatic approval mode (using `-y`)
25
25
  - Execute Forge CLI in non-interactive mode (using `forge -C <workFolder> -p <prompt>`)
26
26
  - Execute OpenCode in non-interactive JSON mode (using `opencode run --format json --dir <workFolder> <prompt>`)
27
- - Support multiple AI models: Claude (sonnet, sonnet[1m], opus, opusplan, haiku), Codex (gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5-codex, gpt-5-codex-mini, gpt-5), Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview), Forge (`forge`), and OpenCode (`opencode` plus explicit `oc-<provider/model>` wrappers such as `oc-openai/gpt-5.4`)
27
+ - Support multiple AI models: Claude (sonnet, sonnet[1m], opus, opusplan, haiku), Codex (`codex` for the CLI's configured default model, plus gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5-codex, gpt-5-codex-mini, gpt-5), Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview), Forge (`forge`), and OpenCode (`opencode` plus explicit `oc-<provider/model>` wrappers such as `oc-openai/gpt-5.4`)
28
28
  - Manage background processes with PID tracking
29
29
  - Parse and return structured outputs from both tools
30
30
 
@@ -189,6 +189,7 @@ Example flow:
189
189
  ```bash
190
190
  ai-cli doctor
191
191
  ai-cli models
192
+ ai-cli run --cwd "$PWD" --model codex --prompt "use the Codex CLI default model"
192
193
  ai-cli run --cwd "$PWD" --model codex-ultra --prompt "fix failing tests"
193
194
  ai-cli run --cwd "$PWD" --model opencode --session-id ses_existing --prompt "continue this OpenCode session"
194
195
  ai-cli run --cwd "$PWD" --model oc-openai/gpt-5.4 --prompt "run with an explicit OpenCode backend model"
@@ -211,7 +212,9 @@ OpenCode model selection accepts either:
211
212
 
212
213
  `ai-cli models` exposes OpenCode machine-readably via `opencode: ["opencode"]` plus `dynamicModelBackends.opencode`, which points users to `opencode models` for backend-native discovery.
213
214
 
214
- `doctor` checks only binary existence and path resolution. It does not verify login state or terms acceptance.
215
+ Codex model selection accepts `codex` to use the Codex CLI's configured default model. This is useful for account types where explicit `gpt-*` model overrides are not accepted by the Codex CLI.
216
+
217
+ `doctor` checks only binary availability and path resolution. Its JSON output includes a `checks` block that marks login state and terms acceptance as unchecked.
215
218
 
216
219
  ## CLI State Storage
217
220
 
@@ -226,13 +229,13 @@ Each PID directory contains:
226
229
  - `meta.json`
227
230
  - `stdout.log`
228
231
  - `stderr.log`
229
- - `exit-status.json` for detached OpenCode runs
232
+ - `exit-status.json` for detached runs
230
233
 
231
234
  Use `ai-cli cleanup` to remove completed and failed runs. Running processes are preserved.
232
235
 
233
- ## Known Limitation
236
+ ## Exit Status Tracking
234
237
 
235
- Detached `ai-cli` runs persist natural exit status for OpenCode-backed runs, including failed exit codes used to preserve raw OpenCode stdout/stderr in result output. Other detached backends still keep the pre-existing limitation: naturally finished runs may be surfaced as completed without a reliable persisted exit code until broader exit tracking is added.
238
+ Detached `ai-cli` runs persist natural exit status for all supported backends through `exit-status.json`. Non-zero exits are surfaced as `failed` with the recorded `exitCode`; zero exits are surfaced as `completed` with `exitCode: 0`. `ai-cli kill` records SIGTERM termination as a failed exit, and a tracked process that disappears without exit metadata is treated as `failed` rather than assumed successful.
236
239
 
237
240
  ## Connecting to Your MCP Client
238
241
 
@@ -253,13 +256,13 @@ Executes a prompt using Claude CLI, Codex CLI, Gemini CLI, Forge CLI, or OpenCod
253
256
  - `prompt_file` (string, optional): Path to a file containing the prompt. Either `prompt` or `prompt_file` is required. Can be absolute path or relative to `workFolder`.
254
257
  - `workFolder` (string, required): The working directory for the CLI execution. Must be an absolute path.
255
258
  **Models:**
256
- - **Ultra Aliases:** `claude-ultra` (defaults to high effort), `codex-ultra` (defaults to xhigh reasoning), `gemini-ultra`
259
+ - **Ultra Aliases:** `claude-ultra` (defaults to max effort), `codex-ultra` (defaults to xhigh reasoning), `gemini-ultra`
257
260
  - Claude: `sonnet`, `sonnet[1m]`, `opus`, `opusplan`, `haiku`
258
- - Codex: `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
261
+ - Codex: `codex` for the CLI's configured default model, plus `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
259
262
  - Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro-preview`, `gemini-3-pro-preview`, `gemini-3-flash-preview`
260
263
  - Forge: `forge`
261
264
  - OpenCode: `opencode` for the configured default backend model, plus explicit wrappers like `oc-openai/gpt-5.4`
262
- - `reasoning_effort` (string, optional): Reasoning control for Claude and Codex. Claude uses `--effort` (allowed: "low", "medium", "high"). Codex uses `model_reasoning_effort` (allowed: "low", "medium", "high", "xhigh"). Gemini, Forge, and OpenCode do not support `reasoning_effort`.
265
+ - `reasoning_effort` (string, optional): Reasoning control for Claude and Codex. Claude uses `--effort` (allowed: "low", "medium", "high", "xhigh", "max"). Codex uses `model_reasoning_effort` (allowed: "low", "medium", "high", "xhigh"). Gemini, Forge, and OpenCode do not support `reasoning_effort`.
263
266
  - `session_id` (string, optional): Optional session ID to resume a previous session. Supported for Claude, Codex, Gemini, Forge, and OpenCode. OpenCode resumes in place via `--session` and may also be combined with an explicit `oc-<provider/model>` selection.
264
267
 
265
268
  ### `wait`
@@ -294,9 +297,9 @@ ai-cli peek 123 --time 10 --include-tool-calls
294
297
  - `peek_started_at` and `events[].ts` are ai-cli-mcp server-side UTC RFC3339 timestamps. `peek_started_at` is when the observation window starts after validation and listener registration; `events[].ts` is when ai-cli-mcp observed and accepted the event.
295
298
  - The window ends when `peek_time_sec` elapses or all target processes reach a terminal state, whichever comes first.
296
299
  - Events emitted before the window starts are not returned. Concurrent `peek` calls for the same PID are allowed; each has an independent window and may return overlapping events.
297
- - Message events are recognized from Codex `agent_message` text, Claude assistant text content, OpenCode `type: "text"` events where `part.type` is `"text"`, and Gemini stream-json `message` events where `role` is `"assistant"`.
298
- - When tool calls are included, `tool_call` events are normalized for Codex command/MCP calls, Claude tool use/results, Gemini tool use/results, and OpenCode completed tool use events. Tool summaries are bounded one-line strings derived from tool names and input metadata only. Raw stdout/stderr, raw JSONL, tool result output, command output, `result.response`, stats, token usage, and verbose metadata are excluded.
299
- - Unknown event shapes are denied by default. Managed agents without supported extraction, such as Forge until explicitly supported, return their real process status with `events: []`, `truncated: false`, and `error: null`.
300
+ - Message events are recognized from Codex `agent_message` text, Claude assistant text content, OpenCode `type: "text"` events where `part.type` is `"text"`, Gemini stream-json `message` events where `role` is `"assistant"`, and best-effort Forge plain-text lines beginning with `Summary:` or `Completed successfully:`.
301
+ - When tool calls are included, `tool_call` events are normalized for Codex command/MCP calls, Claude tool use/results, Gemini tool use/results, OpenCode completed tool use events, and low-precision Forge `Execute`/`Finished` markers. Tool summaries are bounded one-line strings derived from tool names and input metadata only. Forge command output itself is not tailed or exposed. Raw stdout/stderr, raw JSONL, tool result output, command output, `result.response`, stats, token usage, and verbose metadata are excluded.
302
+ - Unknown event shapes are denied by default. Managed agents without supported extraction return their real process status with `events: []`, `truncated: false`, and `error: null`.
300
303
  - Each PID keeps the first 50 events observed in the window. If later events are dropped, `truncated` is `true`.
301
304
  - `status` is one of `running`, `completed`, `failed`, or `not_found`, and reflects state when the observation window closes.
302
305
  - `agent` is `claude`, `codex`, `gemini`, `forge`, `opencode`, a future tracked string value, or `null` when the process is not found or the agent cannot be determined.
@@ -335,6 +338,14 @@ Example response:
335
338
 
336
339
  Lists all running and completed AI agent processes with their status, PID, and basic info.
337
340
 
341
+ ### `doctor`
342
+
343
+ Checks supported AI CLI binary availability and path resolution from MCP clients. Like `ai-cli doctor`, it returns a `checks` block and does not verify login state or terms acceptance.
344
+
345
+ ### `models`
346
+
347
+ Lists supported model names, aliases, and dynamic backend discovery hints from MCP clients. This returns the same structured payload as `ai-cli models`.
348
+
338
349
  ### `get_result`
339
350
 
340
351
  Gets the current output and status of an AI agent process by PID.
@@ -365,6 +376,28 @@ Terminates a running AI agent process by PID.
365
376
 
366
377
  For development setup, testing, and contribution guidelines, see the [Development Guide](./docs/development.md).
367
378
 
379
+ ## Testing
380
+
381
+ ```bash
382
+ # Deterministic unit, parser, contract, and mocked e2e tests
383
+ npm test
384
+
385
+ # Published npm package contents smoke test
386
+ npm run test:package
387
+
388
+ # Deterministic PR/release gate used by GitHub Actions.
389
+ # This does not enable real external CLI runs by itself.
390
+ npm run test:release
391
+
392
+ # Release-time live E2E against real installed AI CLIs
393
+ ACM_LIVE_E2E=1 ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
394
+
395
+ # Release-time live E2E for both ai-cli and MCP server surfaces
396
+ ACM_LIVE_E2E=1 ACM_LIVE_E2E_SURFACE=all ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
397
+ ```
398
+
399
+ Live E2E is opt-in because it depends on installed and authenticated external CLIs, network access, provider availability, and cost budget. `ACM_LIVE_E2E_SURFACE` defaults to `cli`; use `mcp` or `all` to include the MCP server surface.
400
+
368
401
  ## Advanced Configuration (Optional)
369
402
 
370
403
  Normally not required, but useful for customizing CLI paths or debugging.
package/dist/app/cli.js CHANGED
@@ -58,7 +58,7 @@ Options:
58
58
  export const PEEK_HELP_TEXT = `Usage: ai-cli peek <pid...> [options]
59
59
 
60
60
  Observe new natural-language agent messages, and optionally tool calls, for a short one-shot window.
61
- In v1, message extraction is supported for Codex, Claude, OpenCode, and Gemini; Forge returns status with events: [].
61
+ In v1, message extraction is supported for Codex, Claude, OpenCode, Gemini, and best-effort Forge Summary/Completed successfully lines. Forge tool calls are low-precision Execute/Finished markers and never include command output.
62
62
  This is not a history API, gapless streaming, or stdout/stderr tailing. No --follow mode is available in v1.
63
63
 
64
64
  Options:
@@ -97,6 +97,7 @@ Options:
97
97
  export const DOCTOR_HELP_TEXT = `Usage: ai-cli doctor
98
98
 
99
99
  Check whether supported AI CLI binaries are available, including OpenCode.
100
+ This checks binary availability and path resolution only; it does not verify login state or terms acceptance.
100
101
 
101
102
  Options:
102
103
  --help, -h Show this help message
package/dist/app/mcp.js CHANGED
@@ -2,12 +2,13 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2
2
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3
3
  import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js';
4
4
  import { spawn } from 'node:child_process';
5
- import { debugLog, findClaudeCli, findCodexCli, findForgeCli, findGeminiCli, findOpencodeCli } from '../cli-utils.js';
6
- import { getModelParameterDescription, getSupportedModelsDescription } from '../model-catalog.js';
5
+ import { createRequire } from 'node:module';
6
+ import { debugLog, getCliDoctorStatus } from '../cli-utils.js';
7
+ import { getModelParameterDescription, getModelsPayload, getSupportedModelsDescription } from '../model-catalog.js';
7
8
  import { validatePeekPids, validatePeekTimeSec } from '../peek.js';
8
9
  import { ProcessService } from '../process-service.js';
9
- // Server version - update this when releasing new versions
10
- const SERVER_VERSION = "2.2.0";
10
+ const require = createRequire(import.meta.url);
11
+ const SERVER_VERSION = require('../../package.json').version;
11
12
  // Track if this is the first tool use for version printing
12
13
  let isFirstToolUse = true;
13
14
  // Capture server startup time when the module loads
@@ -63,19 +64,18 @@ export class ClaudeCodeServer {
63
64
  opencodeCliPath;
64
65
  processService;
65
66
  sigintHandler;
66
- packageVersion;
67
67
  constructor() {
68
- this.claudeCliPath = findClaudeCli();
69
- this.codexCliPath = findCodexCli();
70
- this.geminiCliPath = findGeminiCli();
71
- this.forgeCliPath = findForgeCli();
72
- this.opencodeCliPath = findOpencodeCli();
68
+ const doctorStatus = getCliDoctorStatus();
69
+ this.claudeCliPath = this.resolveDoctorCliPath(doctorStatus.claude);
70
+ this.codexCliPath = this.resolveDoctorCliPath(doctorStatus.codex);
71
+ this.geminiCliPath = this.resolveDoctorCliPath(doctorStatus.gemini);
72
+ this.forgeCliPath = this.resolveDoctorCliPath(doctorStatus.forge);
73
+ this.opencodeCliPath = this.resolveDoctorCliPath(doctorStatus.opencode);
73
74
  console.error(`[Setup] Using Claude CLI command/path: ${this.claudeCliPath}`);
74
75
  console.error(`[Setup] Using Codex CLI command/path: ${this.codexCliPath}`);
75
76
  console.error(`[Setup] Using Gemini CLI command/path: ${this.geminiCliPath}`);
76
77
  console.error(`[Setup] Using Forge CLI command/path: ${this.forgeCliPath}`);
77
78
  console.error(`[Setup] Using OpenCode CLI command/path: ${this.opencodeCliPath}`);
78
- this.packageVersion = SERVER_VERSION;
79
79
  this.processService = new ProcessService({
80
80
  cliPaths: {
81
81
  claude: this.claudeCliPath,
@@ -101,6 +101,18 @@ export class ClaudeCodeServer {
101
101
  };
102
102
  process.on('SIGINT', this.sigintHandler);
103
103
  }
104
+ resolveDoctorCliPath(status) {
105
+ return status.resolvedPath || status.configuredCommand;
106
+ }
107
+ getCliConfigurationError() {
108
+ const doctorStatus = getCliDoctorStatus();
109
+ for (const name of ['claude', 'codex', 'gemini', 'forge', 'opencode']) {
110
+ if (doctorStatus[name].error) {
111
+ return doctorStatus[name].error;
112
+ }
113
+ }
114
+ return null;
115
+ }
104
116
  setupToolHandlers() {
105
117
  this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
106
118
  tools: [
@@ -150,7 +162,7 @@ ${getSupportedModelsDescription()}
150
162
  },
151
163
  reasoning_effort: {
152
164
  type: 'string',
153
- description: 'Reasoning control for Claude and Codex. Claude uses --effort with "low", "medium", "high". Codex uses model_reasoning_effort with "low", "medium", "high", "xhigh". Gemini, Forge, and OpenCode do not support reasoning_effort in this integration.',
165
+ description: 'Reasoning control for Claude and Codex. Claude uses --effort with "low", "medium", "high", "xhigh", "max". Codex uses model_reasoning_effort with "low", "medium", "high", "xhigh". Gemini, Forge, and OpenCode do not support reasoning_effort in this integration.',
154
166
  },
155
167
  session_id: {
156
168
  type: 'string',
@@ -211,7 +223,7 @@ ${getSupportedModelsDescription()}
211
223
  },
212
224
  {
213
225
  name: 'peek',
214
- description: 'One-shot short observation window for running child agents. Returns only natural-language message events, and optionally normalized tool_call events, observed during this call; not a history API, not gapless streaming, and not stdout/stderr tailing. In v1, message extraction is supported for Codex, Claude, OpenCode, and Gemini; Forge returns status with events: []. Tool calls exclude raw tool output.',
226
+ description: 'One-shot short observation window for running child agents. Returns only natural-language message events, and optionally normalized tool_call events, observed during this call; not a history API, not gapless streaming, and not stdout/stderr tailing. In v1, message extraction is supported for Codex, Claude, OpenCode, Gemini, and best-effort Forge Summary/Completed successfully lines. Forge tool calls are low-precision Execute/Finished markers and never include command output. Tool calls exclude raw tool output.',
215
227
  inputSchema: {
216
228
  type: 'object',
217
229
  properties: {
@@ -253,6 +265,22 @@ ${getSupportedModelsDescription()}
253
265
  type: 'object',
254
266
  properties: {},
255
267
  },
268
+ },
269
+ {
270
+ name: 'doctor',
271
+ description: 'Check supported AI CLI binary availability and path resolution. Does not verify login state or terms acceptance.',
272
+ inputSchema: {
273
+ type: 'object',
274
+ properties: {},
275
+ },
276
+ },
277
+ {
278
+ name: 'models',
279
+ description: 'List supported model names, model aliases, and dynamic backend discovery hints.',
280
+ inputSchema: {
281
+ type: 'object',
282
+ properties: {},
283
+ },
256
284
  }
257
285
  ],
258
286
  }));
@@ -275,6 +303,10 @@ ${getSupportedModelsDescription()}
275
303
  return this.handleKillProcess(toolArguments);
276
304
  case 'cleanup_processes':
277
305
  return this.handleCleanupProcesses();
306
+ case 'doctor':
307
+ return this.handleDoctor();
308
+ case 'models':
309
+ return this.handleModels();
278
310
  default:
279
311
  throw new McpError(ErrorCode.MethodNotFound, `Tool ${toolName} not found`);
280
312
  }
@@ -285,6 +317,10 @@ ${getSupportedModelsDescription()}
285
317
  console.error(`ai_cli_mcp v${SERVER_VERSION} started at ${serverStartupTime}`);
286
318
  isFirstToolUse = false;
287
319
  }
320
+ const cliConfigurationError = this.getCliConfigurationError();
321
+ if (cliConfigurationError) {
322
+ throw new McpError(ErrorCode.InvalidParams, cliConfigurationError);
323
+ }
288
324
  try {
289
325
  const result = this.processService.startProcess({
290
326
  prompt: toolArguments.prompt,
@@ -410,6 +446,22 @@ ${getSupportedModelsDescription()}
410
446
  }]
411
447
  };
412
448
  }
449
+ async handleDoctor() {
450
+ return {
451
+ content: [{
452
+ type: 'text',
453
+ text: JSON.stringify(getCliDoctorStatus(), null, 2)
454
+ }]
455
+ };
456
+ }
457
+ async handleModels() {
458
+ return {
459
+ content: [{
460
+ type: 'text',
461
+ text: JSON.stringify(getModelsPayload(), null, 2)
462
+ }]
463
+ };
464
+ }
413
465
  async run() {
414
466
  const transport = new StdioServerTransport();
415
467
  await this.server.connect(transport);
@@ -1,13 +1,17 @@
1
1
  import { existsSync, readFileSync } from 'node:fs';
2
2
  import { resolve as pathResolve, isAbsolute } from 'node:path';
3
3
  import { MODEL_ALIASES } from './model-catalog.js';
4
- export const ALLOWED_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
5
- const CLAUDE_REASONING_EFFORTS = new Set(['low', 'medium', 'high']);
4
+ export const ALLOWED_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh', 'max']);
5
+ const CLAUDE_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh', 'max']);
6
+ const CODEX_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
6
7
  const OPENCODE_MODEL_ERROR = 'Invalid OpenCode model. Expected exact syntax oc-<provider/model>.';
7
8
  function getStandardAgentForModel(model) {
8
9
  if (model === 'forge') {
9
10
  return 'forge';
10
11
  }
12
+ if (model === 'codex') {
13
+ return 'codex';
14
+ }
11
15
  if (model.startsWith('gpt-')) {
12
16
  return 'codex';
13
17
  }
@@ -76,7 +80,7 @@ export function getReasoningEffort(model, rawValue) {
76
80
  }
77
81
  const normalized = trimmed.toLowerCase();
78
82
  if (!ALLOWED_REASONING_EFFORTS.has(normalized)) {
79
- throw new Error(`Invalid reasoning_effort: ${rawValue}. Allowed values: low, medium, high, xhigh.`);
83
+ throw new Error(`Invalid reasoning_effort: ${rawValue}. Allowed values: low, medium, high, xhigh, max.`);
80
84
  }
81
85
  const agent = getStandardAgentForModel(model);
82
86
  if (agent === 'forge') {
@@ -86,7 +90,10 @@ export function getReasoningEffort(model, rawValue) {
86
90
  throw new Error('reasoning_effort is only supported for Claude and Codex models.');
87
91
  }
88
92
  if (agent === 'claude' && !CLAUDE_REASONING_EFFORTS.has(normalized)) {
89
- throw new Error('Claude reasoning_effort supports only low, medium, high.');
93
+ throw new Error('Claude reasoning_effort supports only low, medium, high, xhigh, max.');
94
+ }
95
+ if (agent === 'codex' && !CODEX_REASONING_EFFORTS.has(normalized)) {
96
+ throw new Error('Codex reasoning_effort supports only low, medium, high, xhigh.');
90
97
  }
91
98
  return normalized;
92
99
  }
@@ -132,7 +139,7 @@ export function buildCliCommand(options) {
132
139
  reasoningEffortArg = 'xhigh';
133
140
  }
134
141
  else if (rawModel === 'claude-ultra') {
135
- reasoningEffortArg = 'high';
142
+ reasoningEffortArg = 'max';
136
143
  }
137
144
  }
138
145
  const reasoningTargetModel = rawModel === 'opencode' || rawModel.startsWith('oc-')
@@ -152,7 +159,7 @@ export function buildCliCommand(options) {
152
159
  if (reasoningEffort) {
153
160
  args.push('-c', `model_reasoning_effort=${reasoningEffort}`);
154
161
  }
155
- if (resolvedModel) {
162
+ if (resolvedModel && resolvedModel !== 'codex') {
156
163
  args.push('--model', resolvedModel);
157
164
  }
158
165
  args.push('--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox', '--json', prompt);