PyPI - cc-plugin-codex - Versions diffs - 0.1.4__py3-none-any.whl - Mend

cc-plugin-codex 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

cc_plugin_codex/__init__.py +5 -0
cc_plugin_codex/claude.py +284 -0
cc_plugin_codex/cli_contract.py +122 -0
cc_plugin_codex/config.py +172 -0
cc_plugin_codex/context.py +210 -0
cc_plugin_codex/jobs.py +561 -0
cc_plugin_codex/normalize.py +243 -0
cc_plugin_codex/preflight.py +94 -0
cc_plugin_codex/py.typed +0 -0
cc_plugin_codex/schemas.py +344 -0
cc_plugin_codex/server.py +1656 -0
cc_plugin_codex-0.1.4.dist-info/METADATA +223 -0
cc_plugin_codex-0.1.4.dist-info/RECORD +16 -0
cc_plugin_codex-0.1.4.dist-info/WHEEL +4 -0
cc_plugin_codex-0.1.4.dist-info/entry_points.txt +2 -0
cc_plugin_codex-0.1.4.dist-info/licenses/LICENSE +21 -0

cc_plugin_codex/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""cc-plugin-codex: call Claude Code from Codex for bounded, read-only critique."""
+from importlib.metadata import version
+__version__ = version("cc-plugin-codex")

cc_plugin_codex/claude.py ADDED Viewed

@@ -0,0 +1,284 @@
+"""Build and run the `claude` CLI invocation; classify failures."""
+from __future__ import annotations
+import contextlib
+import json
+import os
+import signal
+import subprocess
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+import anyio
+from anyio.to_thread import run_sync
+from cc_plugin_codex import cli_contract, preflight
+from cc_plugin_codex.config import (
+    INDEPENDENT_CRITIC_PROMPT,
+    access_flags,
+    config_mode_flags,
+)
+from cc_plugin_codex.schemas import ErrorInfo
+_BUDGET_REPAIR = (
+    "Raise max_budget_usd or reduce context. For small prompts, try at least "
+    "$0.10-$0.20; lower best-effort budgets can spend and still stop before a "
+    "useful answer."
+)
+if TYPE_CHECKING:
+    from cc_plugin_codex.preflight import FlagSupport
+@dataclass
+class ClaudeRun:
+    stdout: str
+    stderr: str
+    exit_code: int
+    elapsed_ms: int
+    timed_out: bool
+def _gate_optional(tokens: list[str], fs: FlagSupport) -> tuple[list[str], list[str]]:
+    """Drop any HELP_GATED flag (and its value, if it takes one) the installed
+    `claude` does not advertise in --help. Returns (kept_tokens, dropped_flags).
+    ALWAYS_SEND flags are never in HELP_GATED_FLAGS, so they always survive."""
+    kept: list[str] = []
+    dropped: list[str] = []
+    i = 0
+    while i < len(tokens):
+        token = tokens[i]
+        takes_value = cli_contract.HELP_GATED_FLAGS.get(token)
+        if takes_value is not None and not preflight.is_supported(token, fs):
+            dropped.append(token)
+            i += 2 if takes_value else 1
+            continue
+        kept.append(token)
+        i += 1
+    return kept, dropped
+def build_command(
+    prompt: str,
+    config_mode: str,
+    access: str,
+    model: str | None,
+    max_budget_usd: float,
+    effort: str | None = None,
+    flag_support: FlagSupport | None = None,
+) -> tuple[list[str], list[str]]:
+    """Build the `claude` invocation. Returns (cmd, dropped_optional_flags).
+    Guarantee-bearing flags are sent unconditionally; HELP_GATED (depth/cosmetic)
+    flags are dropped when the installed CLI does not list them, so a minor
+    upstream change degrades instead of aborting a paid run. dropped_optional_flags
+    feeds Meta.compat_warnings."""
+    fs = flag_support if flag_support is not None else preflight.flag_support()
+    # --no-chrome disables the "Claude in Chrome" integration, which could
+    # otherwise open an interactive picker that hangs an unattended run until the
+    # timeout (burning the whole timeout and the spend) instead of answering.
+    tokens = [cli_contract.CLAUDE_BIN, *cli_contract.CORE_INVOCATION, "--no-chrome"]
+    tokens += config_mode_flags(config_mode)
+    tokens += access_flags(access)
+    tokens += ["--append-system-prompt", INDEPENDENT_CRITIC_PROMPT]
+    tokens += ["--max-budget-usd", f"{max_budget_usd}"]
+    if effort and effort in cli_contract.VALID_EFFORTS:
+        tokens += ["--effort", effort]
+    if model:
+        tokens += ["--model", model]
+    cmd, dropped = _gate_optional(tokens, fs)
+    # Gate BEFORE appending the prompt so a prompt that contains "--effort" etc.
+    # can never be mistaken for a flag.
+    cmd += [cli_contract.END_OF_OPTIONS, prompt]
+    return cmd, dropped
+def auth_status(timeout_seconds: int = 10) -> tuple[bool | None, str | None]:
+    """Probe `claude auth status` without making a paid call.
+    Returns (logged_in, detail). logged_in is None when the probe could not run
+    (claude missing, timeout) so callers can report 'unknown' rather than a
+    misleading False. detail is a NON-identifying phrase, never the raw CLI output:
+    `claude auth status` prints the account email and organization, which would leak
+    into shared logs/transcripts. The boolean already carries the machine-readable
+    truth, so we deliberately drop the raw text."""
+    try:
+        proc = subprocess.run(
+            [cli_contract.CLAUDE_BIN, *cli_contract.AUTH_STATUS_ARGS],
+            capture_output=True,
+            text=True,
+            timeout=timeout_seconds,
+            check=False,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return None, None
+    logged_in = proc.returncode == 0
+    detail = (
+        "Claude CLI reports an authenticated session."
+        if logged_in
+        else "Claude CLI reports no authenticated session; run `claude /login`."
+    )
+    return logged_in, detail
+def _kill_process_tree(proc: subprocess.Popen) -> None:
+    """Best-effort terminate the process and its children. POSIX: kill the
+    process group (the child is its own session leader). Falls back to killing
+    just the process where process groups are unavailable (e.g. Windows)."""
+    if proc.poll() is not None:
+        return
+    try:
+        if hasattr(os, "killpg"):
+            os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+        else:  # pragma: no cover - non-POSIX fallback
+            proc.kill()
+    except (ProcessLookupError, PermissionError):
+        with contextlib.suppress(ProcessLookupError):
+            proc.kill()
+async def run_claude_async(cmd: list[str], cwd: str, timeout_seconds: int) -> ClaudeRun:
+    """Run `claude` as a subprocess, returning a ClaudeRun.
+    The subprocess is started in its own session (process group) so that, on a
+    timeout OR an MCP request cancellation, we can terminate the whole tree
+    rather than orphaning a paid Claude run."""
+    start = time.monotonic()
+    try:
+        proc = subprocess.Popen(
+            cmd,
+            cwd=cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            start_new_session=True,
+        )
+    except OSError:
+        elapsed = int((time.monotonic() - start) * 1000)
+        return ClaudeRun("", "claude_not_found", 127, elapsed, False)
+    def _wait() -> tuple[str, str, bool]:
+        try:
+            out, err = proc.communicate(timeout=timeout_seconds)
+            return out, err, False
+        except subprocess.TimeoutExpired:
+            _kill_process_tree(proc)
+            out, err = proc.communicate()
+            return out, err, True
+    try:
+        out, err, timed_out = await run_sync(_wait, abandon_on_cancel=True)
+    except anyio.get_cancelled_exc_class():
+        _kill_process_tree(proc)
+        raise
+    elapsed = int((time.monotonic() - start) * 1000)
+    if timed_out:
+        return ClaudeRun("", "timeout", -9, elapsed, True)
+    return ClaudeRun(out, err, proc.returncode, elapsed, False)
+def classify_failure(run: ClaudeRun) -> ErrorInfo:
+    env = None
+    with contextlib.suppress(json.JSONDecodeError, ValueError, TypeError):
+        env = json.loads(run.stdout)
+    if run.stderr == "claude_not_found":
+        return ErrorInfo(
+            code="claude_not_found",
+            message="The `claude` CLI was not found on PATH.",
+            repair="Install Claude Code and ensure `claude` is on PATH.",
+        )
+    if run.timed_out:
+        return ErrorInfo(
+            code="timeout",
+            message="claude exceeded the timeout.",
+            repair="Narrow the scope/focus or raise timeout_seconds.",
+            retryable=True,
+        )
+    if isinstance(env, dict) and env.get("is_error"):
+        subtype = str(env.get("subtype") or "").lower()
+        result = str(env.get("result") or "")
+        structured_blob = f"{subtype}\n{result}".lower()
+        if "api_key" in structured_blob or "invalid api key" in structured_blob:
+            return ErrorInfo(
+                code="api_key_invalid",
+                message="ANTHROPIC_API_KEY is invalid.",
+                repair="Set a valid ANTHROPIC_API_KEY, or use config_mode "
+                "inherit/scoped to use your existing login.",
+            )
+        if "auth" in structured_blob or "login" in structured_blob:
+            return ErrorInfo(
+                code="claude_auth_required",
+                message="claude is not authenticated.",
+                repair="Run `claude /login`.",
+            )
+        if "budget" in structured_blob:
+            return ErrorInfo(
+                code="budget_exceeded",
+                message="claude reached the max-budget stop threshold "
+                "(a best-effort limit, not a hard cap).",
+                repair=_BUDGET_REPAIR,
+                retryable=True,
+            )
+        if "permission" in structured_blob or "denied" in structured_blob:
+            return ErrorInfo(
+                code="claude_permission_error",
+                message="claude was denied a requested permission.",
+                repair="Use access=toolless, or allow the needed read-only tools.",
+            )
+        if "rate" in structured_blob or "overloaded" in structured_blob:
+            return ErrorInfo(
+                code="nonzero_exit",
+                message=f"claude reported a retryable error: {result[:200]}",
+                repair="Retry later, or reduce request size.",
+                retryable=True,
+            )
+    extra = ""
+    if isinstance(env, dict):
+        extra = f"{env.get('subtype', '')} {env.get('result', '')}"
+    blob = f"{extra}\n{run.stdout}\n{run.stderr}".lower()
+    if "invalid api key" in blob:
+        return ErrorInfo(
+            code="api_key_invalid",
+            message="ANTHROPIC_API_KEY is invalid.",
+            repair="Set a valid ANTHROPIC_API_KEY, or use config_mode "
+            "inherit/scoped to use your existing login.",
+        )
+    if "not logged in" in blob or "/login" in blob:
+        return ErrorInfo(
+            code="claude_auth_required",
+            message="claude is not authenticated.",
+            repair="Run `claude /login`.",
+        )
+    if "budget" in blob:
+        return ErrorInfo(
+            code="budget_exceeded",
+            message="claude reached the max-budget stop threshold "
+            "(a best-effort limit, not a hard cap).",
+            repair=_BUDGET_REPAIR,
+            retryable=True,
+        )
+    # An unknown flag / invalid value means the CLI contract drifted from what this
+    # plugin sends. Check last so an auth/budget message is never misread as drift.
+    if cli_contract.is_contract_drift(run.stderr, run.stdout):
+        return contract_changed_error()
+    return ErrorInfo(
+        code="nonzero_exit",
+        message=f"claude exited {run.exit_code}: {run.stderr.strip()[:200]}",
+        repair="Inspect the error; retry with a smaller request.",
+    )
+def contract_changed_error() -> ErrorInfo:
+    """Shared cli_contract_changed error, reused across every failure path so a
+    drift is reported identically whether it surfaces on the sync, envelope, or
+    async-job path."""
+    return ErrorInfo(
+        code="cli_contract_changed",
+        message="claude rejected a flag or value this plugin sent — its CLI "
+        "contract likely changed for your installed version.",
+        repair="Update cc-plugin-codex (or pin claude to a supported version); "
+        "run claude_status to check the version.",
+    )

cc_plugin_codex/cli_contract.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""Single source of truth for the external `claude` CLI contract.
+Every assumption this server makes about the `claude` CLI — its flags,
+subcommands, JSON-envelope keys, accepted effort levels, supported major
+versions, and the stderr phrasings that mean the contract drifted — lives here so
+an upstream breaking change is a one-file, greppable, testable edit. See
+COMPATIBILITY.md for the assumption -> upstream-source map.
+"""
+from __future__ import annotations
+CLAUDE_BIN = "claude"
+# Core invocation that CANNOT be dropped: -p (print mode) + JSON output. If these
+# disappear upstream the server cannot function, so a run must fail loudly rather
+# than silently degrade.
+CORE_INVOCATION = ("-p", "--output-format", "json")
+END_OF_OPTIONS = "--"
+# Subcommands / probes (free; no paid call).
+VERSION_ARGS = ("--version",)
+AUTH_STATUS_ARGS = ("auth", "status", "--text")
+HELP_ARGS = ("--help",)
+# --- Flag classes (see Item 5 of the resilience plan / COMPATIBILITY.md) --------
+# ALWAYS_SEND: guarantee-bearing flags, sent unconditionally and NEVER gated on
+# `--help` parsing. If upstream removes/renames one, `claude` rejects it at
+# arg-parse BEFORE any model call (zero spend) and classify_failure() labels it
+# cli_contract_changed. Gating these on the (inherently fuzzy) --help parse could
+# silently drop a security/cost/behavioral guarantee, so we never do. All are long
+# flags (the diagnostic in claude_status checks them against parsed --help).
+ALWAYS_SEND_FLAGS = frozenset(
+    {
+        "--output-format",  # core JSON output
+        "--no-chrome",  # no interactive picker hanging an unattended run
+        "--append-system-prompt",  # the independent-critic guardrails
+        "--max-budget-usd",  # best-effort spend stop threshold
+        "--tools",  # read-only / no-tool guarantee
+        "--strict-mcp-config",
+        "--mcp-config",  # strip the user's MCP fleet (security boundary)
+        "--setting-sources",  # scoped-mode isolation
+        "--bare",  # bare-mode isolation
+    }
+)
+# HELP_GATED: dropping one only reduces depth or relies on a still-present primary
+# guard — never a safety/cost regression. The value is whether the flag takes an
+# argument (so the gate skips the value token too). These are the ONLY flags gated
+# on `claude --help`; a false negative here merely drops a harmless flag.
+HELP_GATED_FLAGS = {
+    "--effort": True,  # reasoning depth only
+    "--model": True,  # falls back to the configured default model
+    "--disallowed-tools": True,  # defense-in-depth; --tools is the primary allowlist
+    "--no-session-persistence": False,  # without it a session merely persists to disk
+}
+# Cache TTL for the `claude --help` probe, so a long-lived server re-probes after
+# an in-place CLI upgrade instead of trusting a stale snapshot forever.
+HELP_CACHE_TTL_SECONDS = 300
+# --- Reasoning effort -----------------------------------------------------------
+VALID_EFFORTS = ("low", "medium", "high", "xhigh", "max")
+DEFAULT_EFFORT = "xhigh"
+# --- Supported `claude` major version(s) ----------------------------------------
+# A set (not a single int) so a future major can be added without a code change,
+# and overridable via env so a user can opt into an untested major themselves.
+SUPPORTED_MAJORS = frozenset({2})
+SUPPORTED_MAJORS_ENV = "CC_PLUGIN_CODEX_SUPPORTED_MAJORS"
+# --- JSON envelope keys read from `claude -p --output-format json` ---------------
+# normalize.py / apply_cost_usage parse these tolerantly with .get(); listing them
+# here keeps the consumed surface greppable and gives the golden-envelope test a
+# canonical reference.
+SUCCESS_SUBTYPES = (None, "success")
+ENVELOPE_KEYS = frozenset(
+    {
+        "is_error",
+        "subtype",
+        "result",
+        "total_cost_usd",
+        "usage",
+        "session_id",
+        "modelUsage",
+        "permission_denials",
+    }
+)
+USAGE_KEYS = frozenset(
+    {
+        "input_tokens",
+        "output_tokens",
+        "cache_read_input_tokens",
+        "cache_creation_input_tokens",
+    }
+)
+# --- Contract-drift stderr signatures -------------------------------------------
+# Phrasings a CLI prints when it rejects a flag or value we sent. Matching any
+# (case-insensitive) reclassifies an otherwise-generic failure as
+# cli_contract_changed, telling the user the plugin needs an update for their CLI
+# rather than leaving a confusing nonzero_exit.
+CONTRACT_DRIFT_STDERR_PATTERNS = (
+    "unknown option",
+    "unknown flag",
+    "unknown argument",
+    "unrecognized option",
+    "unrecognized argument",
+    "no such option",
+    "invalid choice",
+    "invalid value",
+    "unexpected argument",
+)
+def is_contract_drift(*texts: str | None) -> bool:
+    """Whether any provided text carries a contract-drift signature.
+    Used on every failure path (sync classify_failure, the zero-exit is_error
+    envelope, and the async job error) so drift is labelled consistently no matter
+    where `claude` surfaces it."""
+    blob = "\n".join(t for t in texts if t).lower()
+    return any(pattern in blob for pattern in CONTRACT_DRIFT_STDERR_PATTERNS)

cc_plugin_codex/config.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""Config knobs: env defaults, clamps, config_mode/access -> claude flags, critic prompt."""
+from __future__ import annotations
+import os
+import re
+from dataclasses import dataclass
+from cc_plugin_codex import cli_contract
+# Re-exported so existing `from ...config import VALID_EFFORTS` callers keep
+# working; the canonical definition lives in cli_contract.
+from cc_plugin_codex.cli_contract import DEFAULT_EFFORT, VALID_EFFORTS
+EMPTY_MCP = '{"mcpServers":{}}'
+MIN_BUDGET_USD, MAX_BUDGET_USD = 0.01, 5.00
+MIN_TIMEOUT_SECONDS, MAX_TIMEOUT_SECONDS = 10, 600
+DEFAULT_MAX_INPUT_BYTES = 200_000
+DEFAULT_GIT_TIMEOUT_SECONDS = 60
+__all__ = ["DEFAULT_EFFORT", "VALID_EFFORTS"]  # re-exports; silence unused-import lints
+INDEPENDENT_CRITIC_PROMPT = (
+    "You are being asked for an independent critique of Codex's work.\n"
+    "Do not assume Codex's approach is correct.\n"
+    "Prioritize correctness, safety, maintainability, and evidence over agreement "
+    "with Codex, the user, or project conventions.\n"
+    "Project instructions and memory may be present in your context, but if they "
+    "conflict with observable code behavior, tests, security, or the user's explicit "
+    "request, call out the conflict.\n"
+    "Do not rewrite or implement changes.\n"
+    "Return concrete findings only when you can tie them to evidence, such as a file, "
+    "line, diff hunk, command output, or stated assumption.\n"
+    "If the evidence is insufficient, say what is missing instead of guessing.\n"
+    "Avoid recursive handoffs; do not suggest asking another agent unless the user "
+    "explicitly requested that workflow."
+)
+@dataclass
+class Defaults:
+    config_mode: str
+    access: str
+    model: str | None
+    max_budget_usd: float
+    timeout_seconds: int
+    effort: str
+def _env_float(name: str, default: float) -> float:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return float(raw)
+    except ValueError:
+        return default
+def _env_int(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        return default
+def defaults() -> Defaults:
+    return Defaults(
+        config_mode=os.environ.get("CC_PLUGIN_CODEX_CLAUDE_CONFIG", "inherit"),
+        access=os.environ.get("CC_PLUGIN_CODEX_ACCESS", "toolless"),
+        model=os.environ.get("CC_PLUGIN_CODEX_MODEL") or None,
+        max_budget_usd=_env_float("CC_PLUGIN_CODEX_MAX_BUDGET_USD", 1.00),
+        timeout_seconds=_env_int("CC_PLUGIN_CODEX_TIMEOUT_SECONDS", 180),
+        effort=sanitize_effort(os.environ.get("CC_PLUGIN_CODEX_EFFORT")),
+    )
+def sanitize_effort(value: str | None) -> str:
+    """Normalize an effort value to a CLI-accepted level, falling back to the
+    default. An invalid env value must not break a paid call, so it degrades
+    rather than raising."""
+    return value if value in VALID_EFFORTS else DEFAULT_EFFORT
+def supported_majors() -> frozenset[int]:
+    """The `claude` CLI major versions this server is built against.
+    Defaults to cli_contract.SUPPORTED_MAJORS; overridable via
+    CC_PLUGIN_CODEX_SUPPORTED_MAJORS (comma-separated ints) so a user can opt into
+    an untested major. Any parse error falls back to the built-in set rather than
+    raising."""
+    raw = os.environ.get(cli_contract.SUPPORTED_MAJORS_ENV)
+    if not raw:
+        return cli_contract.SUPPORTED_MAJORS
+    try:
+        parsed = frozenset(int(part) for part in raw.split(",") if part.strip())
+    except ValueError:
+        return cli_contract.SUPPORTED_MAJORS
+    return parsed or cli_contract.SUPPORTED_MAJORS
+def version_supported(version: str | None) -> bool | None:
+    """Whether the installed `claude --version` major is in supported_majors().
+    Returns None when the version is unknown/unparseable (so callers can report
+    'unknown' rather than a false 'unsupported'). Advisory only: claude_status
+    surfaces a mismatch as a warning and never blocks paid calls on it."""
+    if not version:
+        return None
+    match = re.search(r"(\d+)\.\d+\.\d+", version)
+    if not match:
+        return None
+    return int(match.group(1)) in supported_majors()
+def clamp_budget(value: float) -> float:
+    return max(MIN_BUDGET_USD, min(MAX_BUDGET_USD, value))
+def clamp_timeout(value: int) -> int:
+    return max(MIN_TIMEOUT_SECONDS, min(MAX_TIMEOUT_SECONDS, value))
+def max_input_bytes() -> int:
+    return max(1_000, _env_int("CC_PLUGIN_CODEX_MAX_INPUT_BYTES", DEFAULT_MAX_INPUT_BYTES))
+def git_timeout_seconds() -> int:
+    return max(1, _env_int("CC_PLUGIN_CODEX_GIT_TIMEOUT_SECONDS", DEFAULT_GIT_TIMEOUT_SECONDS))
+def bare_available() -> bool:
+    return bool(os.environ.get("ANTHROPIC_API_KEY"))
+def config_mode_flags(mode: str) -> list[str]:
+    # All modes drop the user's MCP fleet (a reviewer never needs it, and it is a
+    # side-effect vector). inherit/scoped keep the user's login; bare needs an API key.
+    if mode == "inherit":
+        return ["--no-session-persistence", "--strict-mcp-config", "--mcp-config", EMPTY_MCP]
+    if mode == "scoped":
+        return [
+            "--setting-sources",
+            "project",
+            "--strict-mcp-config",
+            "--mcp-config",
+            EMPTY_MCP,
+            "--no-session-persistence",
+        ]
+    if mode == "bare":
+        return [
+            "--bare",
+            "--no-session-persistence",
+            "--strict-mcp-config",
+            "--mcp-config",
+            EMPTY_MCP,
+        ]
+    raise ValueError(f"unsupported config_mode: {mode}")
+def access_flags(access: str) -> list[str]:
+    if access == "toolless":
+        return ["--tools", ""]
+    if access == "readonly":
+        # --tools is the PRIMARY allowlist (read-only guarantee); --disallowed-tools is
+        # defense-in-depth only. Never widen --tools to include write/Bash tools.
+        return ["--tools", "Read,Grep,Glob", "--disallowed-tools", "Edit,Write,NotebookEdit,Bash"]
+    raise ValueError(f"unsupported access: {access}")