npm - ai-cli-mcp - Versions diffs - 2.18.0 → 2.20.0 - Mend

ai-cli-mcp 2.18.0 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/CHANGELOG.md +26 -0
package/README.ja.md +37 -11
package/README.md +44 -11
package/dist/app/cli.js +2 -1
package/dist/app/mcp.js +65 -13
package/dist/cli-builder.js +13 -6
package/dist/cli-process-service.js +81 -95
package/dist/cli-utils.js +6 -0
package/dist/cli.js +1 -1
package/dist/model-catalog.js +3 -2
package/dist/parsers.js +111 -8
package/dist/process-service.js +5 -4
package/package.json +26 -2
package/server.json +3 -3
package/.gemini/settings.json +0 -11
package/.github/dependabot.yml +0 -28
package/.github/pull_request_template.md +0 -28
package/.github/workflows/ci.yml +0 -34
package/.github/workflows/dependency-review.yml +0 -22
package/.github/workflows/publish.yml +0 -89
package/.github/workflows/test.yml +0 -20
package/.github/workflows/watch-session-prs.yml +0 -276
package/.husky/pre-commit +0 -1
package/.mcp.json +0 -11
package/.releaserc.json +0 -18
package/.vscode/settings.json +0 -3
package/CONTRIBUTING.md +0 -81
package/dist/__tests__/app-cli.test.js +0 -392
package/dist/__tests__/cli-bin-smoke.test.js +0 -101
package/dist/__tests__/cli-builder.test.js +0 -442
package/dist/__tests__/cli-process-service.test.js +0 -655
package/dist/__tests__/cli-utils.test.js +0 -171
package/dist/__tests__/e2e.test.js +0 -256
package/dist/__tests__/edge-cases.test.js +0 -130
package/dist/__tests__/error-cases.test.js +0 -292
package/dist/__tests__/mcp-contract.test.js +0 -636
package/dist/__tests__/mocks.js +0 -32
package/dist/__tests__/model-alias.test.js +0 -36
package/dist/__tests__/parsers.test.js +0 -500
package/dist/__tests__/peek.test.js +0 -36
package/dist/__tests__/process-management.test.js +0 -871
package/dist/__tests__/server.test.js +0 -809
package/dist/__tests__/setup.js +0 -11
package/dist/__tests__/utils/claude-mock.js +0 -80
package/dist/__tests__/utils/mcp-client.js +0 -121
package/dist/__tests__/utils/opencode-mock.js +0 -91
package/dist/__tests__/utils/persistent-mock.js +0 -28
package/dist/__tests__/utils/test-helpers.js +0 -11
package/dist/__tests__/validation.test.js +0 -308
package/dist/__tests__/version-print.test.js +0 -65
package/dist/__tests__/wait.test.js +0 -260
package/docs/RELEASE_CHECKLIST.md +0 -65
package/docs/cli-architecture.md +0 -275
package/docs/concept.md +0 -154
package/docs/development.md +0 -156
package/docs/e2e-testing.md +0 -148
package/docs/prd.md +0 -146
package/docs/session-stacking.md +0 -67
package/src/__tests__/app-cli.test.ts +0 -495
package/src/__tests__/cli-bin-smoke.test.ts +0 -136
package/src/__tests__/cli-builder.test.ts +0 -549
package/src/__tests__/cli-process-service.test.ts +0 -759
package/src/__tests__/cli-utils.test.ts +0 -200
package/src/__tests__/e2e.test.ts +0 -311
package/src/__tests__/edge-cases.test.ts +0 -176
package/src/__tests__/error-cases.test.ts +0 -370
package/src/__tests__/mcp-contract.test.ts +0 -755
package/src/__tests__/mocks.ts +0 -35
package/src/__tests__/model-alias.test.ts +0 -44
package/src/__tests__/parsers.test.ts +0 -564
package/src/__tests__/peek.test.ts +0 -44
package/src/__tests__/process-management.test.ts +0 -1043
package/src/__tests__/server.test.ts +0 -1020
package/src/__tests__/setup.ts +0 -13
package/src/__tests__/utils/claude-mock.ts +0 -87
package/src/__tests__/utils/mcp-client.ts +0 -159
package/src/__tests__/utils/opencode-mock.ts +0 -108
package/src/__tests__/utils/persistent-mock.ts +0 -33
package/src/__tests__/utils/test-helpers.ts +0 -13
package/src/__tests__/validation.test.ts +0 -369
package/src/__tests__/version-print.test.ts +0 -81
package/src/__tests__/wait.test.ts +0 -302
package/src/app/cli.ts +0 -424
package/src/app/mcp.ts +0 -466
package/src/bin/ai-cli-mcp.ts +0 -7
package/src/bin/ai-cli.ts +0 -11
package/src/cli-builder.ts +0 -274
package/src/cli-parse.ts +0 -105
package/src/cli-process-service.ts +0 -708
package/src/cli-utils.ts +0 -258
package/src/cli.ts +0 -124
package/src/model-catalog.ts +0 -87
package/src/parsers.ts +0 -840
package/src/peek.ts +0 -95
package/src/process-result.ts +0 -88
package/src/process-service.ts +0 -367
package/src/server.ts +0 -10
package/tsconfig.json +0 -16
package/vitest.config.e2e.ts +0 -27
package/vitest.config.ts +0 -22
package/vitest.config.unit.ts +0 -28

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,29 @@
+# [2.20.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.19.0...v2.20.0) (2026-04-18)
+### Bug Fixes
+* derive mcp server version from package ([d055ae8](https://github.com/mkXultra/ai-cli-mcp/commit/d055ae8d7900e879633711d79b6ae9fff95ce7f1))
+* harden cli termination handling ([8497f22](https://github.com/mkXultra/ai-cli-mcp/commit/8497f2282f53e24d881825fbbe52c61b8f488166))
+* keep mcp doctor reachable with invalid cli env ([aee7783](https://github.com/mkXultra/ai-cli-mcp/commit/aee7783f09413118735e640a2b8745d0140bff31))
+* persist detached cli exit codes ([53cec30](https://github.com/mkXultra/ai-cli-mcp/commit/53cec30c60dd3a957a31403546989ffac25b26f9))
+* restrict npm package contents ([cf1de69](https://github.com/mkXultra/ai-cli-mcp/commit/cf1de6972592645ffdbaa149d23293cb99314066))
+* sync server manifest during release ([9c7059f](https://github.com/mkXultra/ai-cli-mcp/commit/9c7059fe35a114742205f0440308fbbbd7020951))
+### Features
+* expose doctor as mcp tool ([c33e1de](https://github.com/mkXultra/ai-cli-mcp/commit/c33e1de3ed768735501a3308ccb5b496e87e230a))
+* expose models as mcp tool ([4bae139](https://github.com/mkXultra/ai-cli-mcp/commit/4bae1391a9e5413c25cf393ecb186b387e11b5eb))
+* support new Claude effort levels ([ec66dc9](https://github.com/mkXultra/ai-cli-mcp/commit/ec66dc9cbb260eb2f0401185a1c9e83e670d0cd1))
+# [2.19.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.18.0...v2.19.0) (2026-04-15)
+### Features
+* peekコマンドにForgeエージェントのベストエフォートサポートを追加 ([7c01958](https://github.com/mkXultra/ai-cli-mcp/commit/7c01958b0c9a8133da07c556b303481abd511b6b))
 # [2.18.0](https://github.com/mkXultra/ai-cli-mcp/compare/v2.17.0...v2.18.0) (2026-04-12)

package/README.ja.md CHANGED Viewed

@@ -24,7 +24,7 @@ Cursorなどのエディタが、複雑な手順を伴う編集や操作に苦
 - OpenCode を非対話 JSON モードで実行（`opencode run --format json --dir <workFolder> <prompt>` を使用）
 - 複数のAIモデルのサポート：
     - Claude (sonnet, sonnet[1m], opus, opusplan, haiku)
-    - Codex (gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, など)
+    - Codex (`codex` は Codex CLI 側の設定済みデフォルトモデル、加えて gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, など)
     - Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview)
     - Forge (`forge`)
     - OpenCode (`opencode` と `oc-<provider/model>` ラッパー。例: `oc-openai/gpt-5.4`)
@@ -192,6 +192,7 @@ macOSでは、これらのツールを初めて実行する際にフォルダへ
 ```bash
 ai-cli doctor
 ai-cli models
+ai-cli run --cwd "$PWD" --model codex --prompt "Codex CLI のデフォルトモデルで実行"
 ai-cli run --cwd "$PWD" --model codex-ultra --prompt "fix failing tests"
 ai-cli run --cwd "$PWD" --model opencode --session-id ses_existing --prompt "この OpenCode セッションを継続して"
 ai-cli run --cwd "$PWD" --model oc-openai/gpt-5.4 --prompt "明示的な OpenCode モデルで実行"
@@ -214,7 +215,9 @@ OpenCode のモデル指定は次の 2 つを受け付けます。
 `ai-cli models` は OpenCode を機械可読に `opencode: ["opencode"]` と `dynamicModelBackends.opencode` で公開します。実際に利用可能なバックエンドネイティブなモデル一覧は `opencode models` で確認してください。
-`doctor` は CLI バイナリの存在確認と path 解決だけを行います。ログイン状態や利用規約同意までは確認しません。
+Codex のモデル指定では、`codex` を使うと Codex CLI 側の設定済みデフォルトモデルを使用します。Codex CLI がアカウント種別によって明示的な `gpt-*` モデル指定を受け付けない場合に有用です。
+`doctor` は CLI バイナリの利用可否と path 解決だけを確認します。JSON 出力には `checks` ブロックが含まれ、ログイン状態と利用規約同意は未確認として示されます。
 ## CLI の状態保存先
@@ -229,13 +232,13 @@ OpenCode のモデル指定は次の 2 つを受け付けます。
 - `meta.json`
 - `stdout.log`
 - `stderr.log`
-- `exit-status.json`（detached な OpenCode 実行用）
+- `exit-status.json`（detached 実行用）
 完了済み・失敗済みの実行は `ai-cli cleanup` で削除できます。`running` のものは保持されます。
-## 既知の制約
+## Exit status の追跡
-detached 実行された `ai-cli` では、OpenCode バックエンドに限り自然終了時の exit status を永続化します。そのため OpenCode の失敗終了は非ゼロ exit code を含めて `failed` として扱われ、結果では生の `stdout` / `stderr` を保持します。一方、他の detached バックエンドでは従来どおり、より広い exit-status 追跡が追加されるまでは自然終了した実行が信頼できる exit code なしで `completed` と見なされる制約が残ります。
+detached 実行された `ai-cli` は、すべての対応バックエンドで自然終了時の exit status を `exit-status.json` に永続化します。非ゼロ終了は記録された `exitCode` 付きの `failed` として扱われ、ゼロ終了は `exitCode: 0` 付きの `completed` として扱われます。`ai-cli kill` は SIGTERM による終了を failed exit として記録し、追跡中プロセスが exit metadata なしで消えた場合も成功とは見なさず `failed` として扱います。
 ## MCPクライアントへの接続
@@ -256,13 +259,13 @@ Claude CLI、Codex CLI、Gemini CLI、Forge CLI、または OpenCode を使用
 - `prompt_file` (string, 任意): プロンプトを含むファイルへのパス。`prompt` または `prompt_file` のいずれかが必須です。絶対パス、または `workFolder` からの相対パスが指定可能です。
 - `workFolder` (string, 必須): CLIを実行する作業ディレクトリ。絶対パスである必要があります。
 - **モデル (Models):**
-    - **Ultra エイリアス:** `claude-ultra` (自動的に high effort に設定), `codex-ultra` (自動的に xhigh reasoning に設定), `gemini-ultra`
+    - **Ultra エイリアス:** `claude-ultra` (自動的に max effort に設定), `codex-ultra` (自動的に xhigh reasoning に設定), `gemini-ultra`
     - Claude: `sonnet`, `sonnet[1m]`, `opus`, `opusplan`, `haiku`
-    - Codex: `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
+    - Codex: `codex`（Codex CLI 側の設定済みデフォルトモデル）および `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
     - Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro-preview`, `gemini-3-pro-preview`, `gemini-3-flash-preview`
     - Forge: `forge`
     - OpenCode: `opencode`（設定済みのデフォルトモデル）および `oc-openai/gpt-5.4` のような明示ラッパー
-- `reasoning_effort` (string, 任意): Claude と Codex の推論制御。Claude では `--effort` を使います（許容値: "low", "medium", "high"）。Codex では `model_reasoning_effort` を使います（許容値: "low", "medium", "high", "xhigh"）。Gemini、Forge、OpenCode では `reasoning_effort` はサポートしません。
+- `reasoning_effort` (string, 任意): Claude と Codex の推論制御。Claude では `--effort` を使います（許容値: "low", "medium", "high", "xhigh", "max"）。Codex では `model_reasoning_effort` を使います（許容値: "low", "medium", "high", "xhigh"）。Gemini、Forge、OpenCode では `reasoning_effort` はサポートしません。
 - `session_id` (string, 任意): 以前のセッションを再開するためのセッションID。Claude、Codex、Gemini、Forge、OpenCode でサポートされます。OpenCode は `--session` による in-place resume で再開し、`oc-<provider/model>` の明示指定と併用できます。
 ### `wait`
@@ -297,9 +300,9 @@ ai-cli peek 123 --time 10 --include-tool-calls
 - `peek_started_at` と `events[].ts` は、ai-cli-mcp サーバー側の UTC RFC3339 タイムスタンプです。`peek_started_at` は検証とリスナー登録後に観測ウィンドウが始まった時刻、`events[].ts` は ai-cli-mcp がイベントを観測して受理した時刻です。
 - 観測ウィンドウは `peek_time_sec` が経過するか、対象プロセスがすべて終端状態になった時点で終了します。
 - 観測開始前のイベントは返しません。同じPIDへの同時 `peek` は可能で、それぞれ独立した観測ウィンドウを持つため、イベントが重複して返ることがあります。
-- メッセージイベントは、Codex の `agent_message` text、Claude assistant の text content、OpenCode の `type: "text"` かつ `part.type` が `"text"` のイベント、Gemini stream-json の `role` が `"assistant"` の `message` イベントから認識します。
-- tool call を含める場合、Codex の command/MCP call、Claude の tool use/result、Gemini の tool use/result、OpenCode の完了済み tool use event を正規化した `tool_call` イベントとして返します。tool summary は tool 名と入力メタデータだけから作る短い1行文字列です。raw `stdout` / `stderr`、raw JSONL、tool result output、コマンド出力、`result.response`、stats、token usage、verbose メタデータは除外します。
-- 未知のイベント形状はデフォルトで拒否します。Forge など、まだ明示対応されていない管理対象エージェントは、実際のプロセス状態を返しつつ、`events: []`、`truncated: false`、`error: null` にします。
+- メッセージイベントは、Codex の `agent_message` text、Claude assistant の text content、OpenCode の `type: "text"` かつ `part.type` が `"text"` のイベント、Gemini stream-json の `role` が `"assistant"` の `message` イベント、Forge の `Summary:` または `Completed successfully:` で始まる plain-text 行から best-effort に認識します。
+- tool call を含める場合、Codex の command/MCP call、Claude の tool use/result、Gemini の tool use/result、OpenCode の完了済み tool use event、Forge の低精度な `Execute` / `Finished` marker を正規化した `tool_call` イベントとして返します。tool summary は tool 名と入力メタデータだけから作る短い1行文字列です。Forge のコマンド出力自体は tail せず、公開しません。raw `stdout` / `stderr`、raw JSONL、tool result output、コマンド出力、`result.response`、stats、token usage、verbose メタデータは除外します。
+- 未知のイベント形状はデフォルトで拒否します。まだ明示対応されていない管理対象エージェントは、実際のプロセス状態を返しつつ、`events: []`、`truncated: false`、`error: null` にします。
 - 各PIDごとに、観測ウィンドウ内で最初に観測された50件までを保持します。それ以降のイベントを捨てた場合は `truncated` が `true` になります。
 - `status` は `running`、`completed`、`failed`、`not_found` のいずれかで、観測ウィンドウ終了時点の状態を表します。
 - `agent` は `claude`、`codex`、`gemini`、`forge`、`opencode`、将来追加される追跡済みエージェント文字列、または `null` です。`null` はプロセスが見つからない、またはエージェント種別を判断できない場合を表します。
@@ -338,6 +341,14 @@ ai-cli peek 123 --time 10 --include-tool-calls
 実行中および完了したすべてのAIエージェントプロセスを、ステータス、PID、基本情報とともにリストアップします。
+### `doctor`
+MCP クライアントから、対応する AI CLI バイナリの利用可否と path 解決を確認します。`ai-cli doctor` と同じく `checks` ブロックを返し、ログイン状態や利用規約同意は確認しません。
+### `models`
+MCP クライアントから、対応モデル名、エイリアス、動的バックエンドの discovery hint を確認します。`ai-cli models` と同じ構造化 payload を返します。
 ### `get_result`
 PIDを指定して、AIエージェントプロセスの現在の出力とステータスを取得します。
@@ -380,8 +391,23 @@ npm run test:unit
 # E2Eテストの実行（モック使用）
 npm run test:e2e
+# 公開 npm package の contents smoke test
+npm run test:package
+# GitHub Actions と同じ deterministic PR/release gate
+# これだけでは実際の外部 CLI 実行は有効になりません
+npm run test:release
+# リリース前の live E2E（実際にインストール済み AI CLI を叩く）
+ACM_LIVE_E2E=1 ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
+# ai-cli と MCP server surface の両方を叩く live E2E
+ACM_LIVE_E2E=1 ACM_LIVE_E2E_SURFACE=all ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
 ```
+live E2E は opt-in です。インストール済みかつ認証済みの外部 CLI、ネットワーク、provider 側の可用性、コスト予算に依存するため、通常の `npm test` には含めていません。`ACM_LIVE_E2E_SURFACE` は既定で `cli` です。MCP server surface も含める場合は `mcp` または `all` を指定します。
 ## 高度な設定（オプション）
 通常の利用では設定不要ですが、CLIツールのパスをカスタマイズしたい場合やデバッグが必要な場合に使用できる環境変数です。

package/README.md CHANGED Viewed

@@ -24,7 +24,7 @@ This MCP server provides tools that can be used by LLMs to interact with AI CLI
 - Execute Gemini CLI with automatic approval mode (using `-y`)
 - Execute Forge CLI in non-interactive mode (using `forge -C <workFolder> -p <prompt>`)
 - Execute OpenCode in non-interactive JSON mode (using `opencode run --format json --dir <workFolder> <prompt>`)
-- Support multiple AI models: Claude (sonnet, sonnet[1m], opus, opusplan, haiku), Codex (gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5-codex, gpt-5-codex-mini, gpt-5), Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview), Forge (`forge`), and OpenCode (`opencode` plus explicit `oc-<provider/model>` wrappers such as `oc-openai/gpt-5.4`)
+- Support multiple AI models: Claude (sonnet, sonnet[1m], opus, opusplan, haiku), Codex (`codex` for the CLI's configured default model, plus gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max, gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5-codex, gpt-5-codex-mini, gpt-5), Gemini (gemini-2.5-pro, gemini-2.5-flash, gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview), Forge (`forge`), and OpenCode (`opencode` plus explicit `oc-<provider/model>` wrappers such as `oc-openai/gpt-5.4`)
 - Manage background processes with PID tracking
 - Parse and return structured outputs from both tools
@@ -189,6 +189,7 @@ Example flow:
 ```bash
 ai-cli doctor
 ai-cli models
+ai-cli run --cwd "$PWD" --model codex --prompt "use the Codex CLI default model"
 ai-cli run --cwd "$PWD" --model codex-ultra --prompt "fix failing tests"
 ai-cli run --cwd "$PWD" --model opencode --session-id ses_existing --prompt "continue this OpenCode session"
 ai-cli run --cwd "$PWD" --model oc-openai/gpt-5.4 --prompt "run with an explicit OpenCode backend model"
@@ -211,7 +212,9 @@ OpenCode model selection accepts either:
 `ai-cli models` exposes OpenCode machine-readably via `opencode: ["opencode"]` plus `dynamicModelBackends.opencode`, which points users to `opencode models` for backend-native discovery.
-`doctor` checks only binary existence and path resolution. It does not verify login state or terms acceptance.
+Codex model selection accepts `codex` to use the Codex CLI's configured default model. This is useful for account types where explicit `gpt-*` model overrides are not accepted by the Codex CLI.
+`doctor` checks only binary availability and path resolution. Its JSON output includes a `checks` block that marks login state and terms acceptance as unchecked.
 ## CLI State Storage
@@ -226,13 +229,13 @@ Each PID directory contains:
 - `meta.json`
 - `stdout.log`
 - `stderr.log`
-- `exit-status.json` for detached OpenCode runs
+- `exit-status.json` for detached runs
 Use `ai-cli cleanup` to remove completed and failed runs. Running processes are preserved.
-## Known Limitation
+## Exit Status Tracking
-Detached `ai-cli` runs persist natural exit status for OpenCode-backed runs, including failed exit codes used to preserve raw OpenCode stdout/stderr in result output. Other detached backends still keep the pre-existing limitation: naturally finished runs may be surfaced as completed without a reliable persisted exit code until broader exit tracking is added.
+Detached `ai-cli` runs persist natural exit status for all supported backends through `exit-status.json`. Non-zero exits are surfaced as `failed` with the recorded `exitCode`; zero exits are surfaced as `completed` with `exitCode: 0`. `ai-cli kill` records SIGTERM termination as a failed exit, and a tracked process that disappears without exit metadata is treated as `failed` rather than assumed successful.
 ## Connecting to Your MCP Client
@@ -253,13 +256,13 @@ Executes a prompt using Claude CLI, Codex CLI, Gemini CLI, Forge CLI, or OpenCod
 - `prompt_file` (string, optional): Path to a file containing the prompt. Either `prompt` or `prompt_file` is required. Can be absolute path or relative to `workFolder`.
 - `workFolder` (string, required): The working directory for the CLI execution. Must be an absolute path.
 **Models:**
-- **Ultra Aliases:** `claude-ultra` (defaults to high effort), `codex-ultra` (defaults to xhigh reasoning), `gemini-ultra`
+- **Ultra Aliases:** `claude-ultra` (defaults to max effort), `codex-ultra` (defaults to xhigh reasoning), `gemini-ultra`
 - Claude: `sonnet`, `sonnet[1m]`, `opus`, `opusplan`, `haiku`
-- Codex: `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
+- Codex: `codex` for the CLI's configured default model, plus `gpt-5.4`, `gpt-5.3-codex`, `gpt-5.2-codex`, `gpt-5.1-codex-mini`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1`, `gpt-5`
 - Gemini: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-3.1-pro-preview`, `gemini-3-pro-preview`, `gemini-3-flash-preview`
 - Forge: `forge`
 - OpenCode: `opencode` for the configured default backend model, plus explicit wrappers like `oc-openai/gpt-5.4`
-- `reasoning_effort` (string, optional): Reasoning control for Claude and Codex. Claude uses `--effort` (allowed: "low", "medium", "high"). Codex uses `model_reasoning_effort` (allowed: "low", "medium", "high", "xhigh"). Gemini, Forge, and OpenCode do not support `reasoning_effort`.
+- `reasoning_effort` (string, optional): Reasoning control for Claude and Codex. Claude uses `--effort` (allowed: "low", "medium", "high", "xhigh", "max"). Codex uses `model_reasoning_effort` (allowed: "low", "medium", "high", "xhigh"). Gemini, Forge, and OpenCode do not support `reasoning_effort`.
 - `session_id` (string, optional): Optional session ID to resume a previous session. Supported for Claude, Codex, Gemini, Forge, and OpenCode. OpenCode resumes in place via `--session` and may also be combined with an explicit `oc-<provider/model>` selection.
 ### `wait`
@@ -294,9 +297,9 @@ ai-cli peek 123 --time 10 --include-tool-calls
 - `peek_started_at` and `events[].ts` are ai-cli-mcp server-side UTC RFC3339 timestamps. `peek_started_at` is when the observation window starts after validation and listener registration; `events[].ts` is when ai-cli-mcp observed and accepted the event.
 - The window ends when `peek_time_sec` elapses or all target processes reach a terminal state, whichever comes first.
 - Events emitted before the window starts are not returned. Concurrent `peek` calls for the same PID are allowed; each has an independent window and may return overlapping events.
-- Message events are recognized from Codex `agent_message` text, Claude assistant text content, OpenCode `type: "text"` events where `part.type` is `"text"`, and Gemini stream-json `message` events where `role` is `"assistant"`.
-- When tool calls are included, `tool_call` events are normalized for Codex command/MCP calls, Claude tool use/results, Gemini tool use/results, and OpenCode completed tool use events. Tool summaries are bounded one-line strings derived from tool names and input metadata only. Raw stdout/stderr, raw JSONL, tool result output, command output, `result.response`, stats, token usage, and verbose metadata are excluded.
-- Unknown event shapes are denied by default. Managed agents without supported extraction, such as Forge until explicitly supported, return their real process status with `events: []`, `truncated: false`, and `error: null`.
+- Message events are recognized from Codex `agent_message` text, Claude assistant text content, OpenCode `type: "text"` events where `part.type` is `"text"`, Gemini stream-json `message` events where `role` is `"assistant"`, and best-effort Forge plain-text lines beginning with `Summary:` or `Completed successfully:`.
+- When tool calls are included, `tool_call` events are normalized for Codex command/MCP calls, Claude tool use/results, Gemini tool use/results, OpenCode completed tool use events, and low-precision Forge `Execute`/`Finished` markers. Tool summaries are bounded one-line strings derived from tool names and input metadata only. Forge command output itself is not tailed or exposed. Raw stdout/stderr, raw JSONL, tool result output, command output, `result.response`, stats, token usage, and verbose metadata are excluded.
+- Unknown event shapes are denied by default. Managed agents without supported extraction return their real process status with `events: []`, `truncated: false`, and `error: null`.
 - Each PID keeps the first 50 events observed in the window. If later events are dropped, `truncated` is `true`.
 - `status` is one of `running`, `completed`, `failed`, or `not_found`, and reflects state when the observation window closes.
 - `agent` is `claude`, `codex`, `gemini`, `forge`, `opencode`, a future tracked string value, or `null` when the process is not found or the agent cannot be determined.
@@ -335,6 +338,14 @@ Example response:
 Lists all running and completed AI agent processes with their status, PID, and basic info.
+### `doctor`
+Checks supported AI CLI binary availability and path resolution from MCP clients. Like `ai-cli doctor`, it returns a `checks` block and does not verify login state or terms acceptance.
+### `models`
+Lists supported model names, aliases, and dynamic backend discovery hints from MCP clients. This returns the same structured payload as `ai-cli models`.
 ### `get_result`
 Gets the current output and status of an AI agent process by PID.
@@ -365,6 +376,28 @@ Terminates a running AI agent process by PID.
 For development setup, testing, and contribution guidelines, see the [Development Guide](./docs/development.md).
+## Testing
+```bash
+# Deterministic unit, parser, contract, and mocked e2e tests
+npm test
+# Published npm package contents smoke test
+npm run test:package
+# Deterministic PR/release gate used by GitHub Actions.
+# This does not enable real external CLI runs by itself.
+npm run test:release
+# Release-time live E2E against real installed AI CLIs
+ACM_LIVE_E2E=1 ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
+# Release-time live E2E for both ai-cli and MCP server surfaces
+ACM_LIVE_E2E=1 ACM_LIVE_E2E_SURFACE=all ACM_LIVE_E2E_AGENTS=claude,codex npm run test:live
+```
+Live E2E is opt-in because it depends on installed and authenticated external CLIs, network access, provider availability, and cost budget. `ACM_LIVE_E2E_SURFACE` defaults to `cli`; use `mcp` or `all` to include the MCP server surface.
 ## Advanced Configuration (Optional)
 Normally not required, but useful for customizing CLI paths or debugging.

package/dist/app/cli.js CHANGED Viewed

@@ -58,7 +58,7 @@ Options:
 export const PEEK_HELP_TEXT = `Usage: ai-cli peek <pid...> [options]
 Observe new natural-language agent messages, and optionally tool calls, for a short one-shot window.
-In v1, message extraction is supported for Codex, Claude, OpenCode, and Gemini; Forge returns status with events: [].
+In v1, message extraction is supported for Codex, Claude, OpenCode, Gemini, and best-effort Forge Summary/Completed successfully lines. Forge tool calls are low-precision Execute/Finished markers and never include command output.
 This is not a history API, gapless streaming, or stdout/stderr tailing. No --follow mode is available in v1.
 Options:
@@ -97,6 +97,7 @@ Options:
 export const DOCTOR_HELP_TEXT = `Usage: ai-cli doctor
 Check whether supported AI CLI binaries are available, including OpenCode.
+This checks binary availability and path resolution only; it does not verify login state or terms acceptance.
 Options:
   --help, -h                   Show this help message

package/dist/app/mcp.js CHANGED Viewed

@@ -2,12 +2,13 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js';
 import { spawn } from 'node:child_process';
-import { debugLog, findClaudeCli, findCodexCli, findForgeCli, findGeminiCli, findOpencodeCli } from '../cli-utils.js';
-import { getModelParameterDescription, getSupportedModelsDescription } from '../model-catalog.js';
+import { createRequire } from 'node:module';
+import { debugLog, getCliDoctorStatus } from '../cli-utils.js';
+import { getModelParameterDescription, getModelsPayload, getSupportedModelsDescription } from '../model-catalog.js';
 import { validatePeekPids, validatePeekTimeSec } from '../peek.js';
 import { ProcessService } from '../process-service.js';
-// Server version - update this when releasing new versions
-const SERVER_VERSION = "2.2.0";
+const require = createRequire(import.meta.url);
+const SERVER_VERSION = require('../../package.json').version;
 // Track if this is the first tool use for version printing
 let isFirstToolUse = true;
 // Capture server startup time when the module loads
@@ -63,19 +64,18 @@ export class ClaudeCodeServer {
     opencodeCliPath;
     processService;
     sigintHandler;
-    packageVersion;
     constructor() {
-        this.claudeCliPath = findClaudeCli();
-        this.codexCliPath = findCodexCli();
-        this.geminiCliPath = findGeminiCli();
-        this.forgeCliPath = findForgeCli();
-        this.opencodeCliPath = findOpencodeCli();
+        const doctorStatus = getCliDoctorStatus();
+        this.claudeCliPath = this.resolveDoctorCliPath(doctorStatus.claude);
+        this.codexCliPath = this.resolveDoctorCliPath(doctorStatus.codex);
+        this.geminiCliPath = this.resolveDoctorCliPath(doctorStatus.gemini);
+        this.forgeCliPath = this.resolveDoctorCliPath(doctorStatus.forge);
+        this.opencodeCliPath = this.resolveDoctorCliPath(doctorStatus.opencode);
         console.error(`[Setup] Using Claude CLI command/path: ${this.claudeCliPath}`);
         console.error(`[Setup] Using Codex CLI command/path: ${this.codexCliPath}`);
         console.error(`[Setup] Using Gemini CLI command/path: ${this.geminiCliPath}`);
         console.error(`[Setup] Using Forge CLI command/path: ${this.forgeCliPath}`);
         console.error(`[Setup] Using OpenCode CLI command/path: ${this.opencodeCliPath}`);
-        this.packageVersion = SERVER_VERSION;
         this.processService = new ProcessService({
             cliPaths: {
                 claude: this.claudeCliPath,
@@ -101,6 +101,18 @@ export class ClaudeCodeServer {
         };
         process.on('SIGINT', this.sigintHandler);
     }
+    resolveDoctorCliPath(status) {
+        return status.resolvedPath || status.configuredCommand;
+    }
+    getCliConfigurationError() {
+        const doctorStatus = getCliDoctorStatus();
+        for (const name of ['claude', 'codex', 'gemini', 'forge', 'opencode']) {
+            if (doctorStatus[name].error) {
+                return doctorStatus[name].error;
+            }
+        }
+        return null;
+    }
     setupToolHandlers() {
         this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
             tools: [
@@ -150,7 +162,7 @@ ${getSupportedModelsDescription()}
                             },
                             reasoning_effort: {
                                 type: 'string',
-                                description: 'Reasoning control for Claude and Codex. Claude uses --effort with "low", "medium", "high". Codex uses model_reasoning_effort with "low", "medium", "high", "xhigh". Gemini, Forge, and OpenCode do not support reasoning_effort in this integration.',
+                                description: 'Reasoning control for Claude and Codex. Claude uses --effort with "low", "medium", "high", "xhigh", "max". Codex uses model_reasoning_effort with "low", "medium", "high", "xhigh". Gemini, Forge, and OpenCode do not support reasoning_effort in this integration.',
                             },
                             session_id: {
                                 type: 'string',
@@ -211,7 +223,7 @@ ${getSupportedModelsDescription()}
                 },
                 {
                     name: 'peek',
-                    description: 'One-shot short observation window for running child agents. Returns only natural-language message events, and optionally normalized tool_call events, observed during this call; not a history API, not gapless streaming, and not stdout/stderr tailing. In v1, message extraction is supported for Codex, Claude, OpenCode, and Gemini; Forge returns status with events: []. Tool calls exclude raw tool output.',
+                    description: 'One-shot short observation window for running child agents. Returns only natural-language message events, and optionally normalized tool_call events, observed during this call; not a history API, not gapless streaming, and not stdout/stderr tailing. In v1, message extraction is supported for Codex, Claude, OpenCode, Gemini, and best-effort Forge Summary/Completed successfully lines. Forge tool calls are low-precision Execute/Finished markers and never include command output. Tool calls exclude raw tool output.',
                     inputSchema: {
                         type: 'object',
                         properties: {
@@ -253,6 +265,22 @@ ${getSupportedModelsDescription()}
                         type: 'object',
                         properties: {},
                     },
+                },
+                {
+                    name: 'doctor',
+                    description: 'Check supported AI CLI binary availability and path resolution. Does not verify login state or terms acceptance.',
+                    inputSchema: {
+                        type: 'object',
+                        properties: {},
+                    },
+                },
+                {
+                    name: 'models',
+                    description: 'List supported model names, model aliases, and dynamic backend discovery hints.',
+                    inputSchema: {
+                        type: 'object',
+                        properties: {},
+                    },
                 }
             ],
         }));
@@ -275,6 +303,10 @@ ${getSupportedModelsDescription()}
                     return this.handleKillProcess(toolArguments);
                 case 'cleanup_processes':
                     return this.handleCleanupProcesses();
+                case 'doctor':
+                    return this.handleDoctor();
+                case 'models':
+                    return this.handleModels();
                 default:
                     throw new McpError(ErrorCode.MethodNotFound, `Tool ${toolName} not found`);
             }
@@ -285,6 +317,10 @@ ${getSupportedModelsDescription()}
             console.error(`ai_cli_mcp v${SERVER_VERSION} started at ${serverStartupTime}`);
             isFirstToolUse = false;
         }
+        const cliConfigurationError = this.getCliConfigurationError();
+        if (cliConfigurationError) {
+            throw new McpError(ErrorCode.InvalidParams, cliConfigurationError);
+        }
         try {
             const result = this.processService.startProcess({
                 prompt: toolArguments.prompt,
@@ -410,6 +446,22 @@ ${getSupportedModelsDescription()}
                 }]
         };
     }
+    async handleDoctor() {
+        return {
+            content: [{
+                    type: 'text',
+                    text: JSON.stringify(getCliDoctorStatus(), null, 2)
+                }]
+        };
+    }
+    async handleModels() {
+        return {
+            content: [{
+                    type: 'text',
+                    text: JSON.stringify(getModelsPayload(), null, 2)
+                }]
+        };
+    }
     async run() {
         const transport = new StdioServerTransport();
         await this.server.connect(transport);

package/dist/cli-builder.js CHANGED Viewed

@@ -1,13 +1,17 @@
 import { existsSync, readFileSync } from 'node:fs';
 import { resolve as pathResolve, isAbsolute } from 'node:path';
 import { MODEL_ALIASES } from './model-catalog.js';
-export const ALLOWED_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
-const CLAUDE_REASONING_EFFORTS = new Set(['low', 'medium', 'high']);
+export const ALLOWED_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh', 'max']);
+const CLAUDE_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh', 'max']);
+const CODEX_REASONING_EFFORTS = new Set(['low', 'medium', 'high', 'xhigh']);
 const OPENCODE_MODEL_ERROR = 'Invalid OpenCode model. Expected exact syntax oc-<provider/model>.';
 function getStandardAgentForModel(model) {
     if (model === 'forge') {
         return 'forge';
     }
+    if (model === 'codex') {
+        return 'codex';
+    }
     if (model.startsWith('gpt-')) {
         return 'codex';
     }
@@ -76,7 +80,7 @@ export function getReasoningEffort(model, rawValue) {
     }
     const normalized = trimmed.toLowerCase();
     if (!ALLOWED_REASONING_EFFORTS.has(normalized)) {
-        throw new Error(`Invalid reasoning_effort: ${rawValue}. Allowed values: low, medium, high, xhigh.`);
+        throw new Error(`Invalid reasoning_effort: ${rawValue}. Allowed values: low, medium, high, xhigh, max.`);
     }
     const agent = getStandardAgentForModel(model);
     if (agent === 'forge') {
@@ -86,7 +90,10 @@ export function getReasoningEffort(model, rawValue) {
         throw new Error('reasoning_effort is only supported for Claude and Codex models.');
     }
     if (agent === 'claude' && !CLAUDE_REASONING_EFFORTS.has(normalized)) {
-        throw new Error('Claude reasoning_effort supports only low, medium, high.');
+        throw new Error('Claude reasoning_effort supports only low, medium, high, xhigh, max.');
+    }
+    if (agent === 'codex' && !CODEX_REASONING_EFFORTS.has(normalized)) {
+        throw new Error('Codex reasoning_effort supports only low, medium, high, xhigh.');
     }
     return normalized;
 }
@@ -132,7 +139,7 @@ export function buildCliCommand(options) {
             reasoningEffortArg = 'xhigh';
         }
         else if (rawModel === 'claude-ultra') {
-            reasoningEffortArg = 'high';
+            reasoningEffortArg = 'max';
         }
     }
     const reasoningTargetModel = rawModel === 'opencode' || rawModel.startsWith('oc-')
@@ -152,7 +159,7 @@ export function buildCliCommand(options) {
         if (reasoningEffort) {
             args.push('-c', `model_reasoning_effort=${reasoningEffort}`);
         }
-        if (resolvedModel) {
+        if (resolvedModel && resolvedModel !== 'codex') {
             args.push('--model', resolvedModel);
         }
         args.push('--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox', '--json', prompt);