PyPI - ccproxy-api - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

ccproxy-api 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

ccproxy/_version.py +2 -2
ccproxy/adapters/codex/__init__.py +11 -0
ccproxy/adapters/openai/models.py +1 -1
ccproxy/adapters/openai/response_adapter.py +355 -0
ccproxy/adapters/openai/response_models.py +178 -0
ccproxy/api/app.py +31 -3
ccproxy/api/dependencies.py +1 -8
ccproxy/api/middleware/errors.py +15 -7
ccproxy/api/routes/codex.py +1251 -0
ccproxy/api/routes/health.py +228 -3
ccproxy/auth/openai/__init__.py +13 -0
ccproxy/auth/openai/credentials.py +166 -0
ccproxy/auth/openai/oauth_client.py +334 -0
ccproxy/auth/openai/storage.py +184 -0
ccproxy/claude_sdk/options.py +1 -1
ccproxy/cli/commands/auth.py +398 -1
ccproxy/cli/commands/serve.py +3 -1
ccproxy/config/claude.py +1 -1
ccproxy/config/codex.py +100 -0
ccproxy/config/scheduler.py +8 -8
ccproxy/config/settings.py +19 -0
ccproxy/core/codex_transformers.py +389 -0
ccproxy/core/http_transformers.py +153 -2
ccproxy/data/claude_headers_fallback.json +37 -0
ccproxy/data/codex_headers_fallback.json +14 -0
ccproxy/models/detection.py +82 -0
ccproxy/models/requests.py +22 -0
ccproxy/models/responses.py +16 -0
ccproxy/scheduler/manager.py +2 -2
ccproxy/scheduler/tasks.py +105 -65
ccproxy/services/claude_detection_service.py +7 -33
ccproxy/services/codex_detection_service.py +252 -0
ccproxy/services/proxy_service.py +530 -0
ccproxy/utils/model_mapping.py +7 -5
ccproxy/utils/startup_helpers.py +205 -12
ccproxy/utils/version_checker.py +6 -0
ccproxy_api-0.1.7.dist-info/METADATA +615 -0
{ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/RECORD +41 -28
ccproxy_api-0.1.5.dist-info/METADATA +0 -396
{ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/WHEEL +0 -0
{ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/entry_points.txt +0 -0
{ccproxy_api-0.1.5.dist-info → ccproxy_api-0.1.7.dist-info}/licenses/LICENSE +0 -0

ccproxy/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.5'
-__version_tuple__ = version_tuple = (0, 1, 5)
+__version__ = version = '0.1.7'
+__version_tuple__ = version_tuple = (0, 1, 7)

ccproxy/adapters/codex/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Codex adapter for format conversion."""
+from ccproxy.models.requests import CodexMessage, CodexRequest
+from ccproxy.models.responses import CodexResponse
+__all__ = [
+    "CodexMessage",
+    "CodexRequest",
+    "CodexResponse",
+]

ccproxy/adapters/openai/models.py CHANGED Viewed

@@ -286,7 +286,7 @@ class OpenAIChatCompletionResponse(BaseModel):
     created: int
     model: str
     choices: list[OpenAIChoice]
-    usage: OpenAIUsage
+    usage: OpenAIUsage | None = None
     system_fingerprint: str | None = None
     model_config = ConfigDict(extra="forbid")

ccproxy/adapters/openai/response_adapter.py ADDED Viewed

@@ -0,0 +1,355 @@
+"""Adapter for converting between OpenAI Chat Completions and Response API formats.
+This adapter handles bidirectional conversion between:
+- OpenAI Chat Completions API (used by most OpenAI clients)
+- OpenAI Response API (used by Codex/ChatGPT backend)
+"""
+from __future__ import annotations
+import json
+import time
+import uuid
+from collections.abc import AsyncIterator
+from typing import Any
+import structlog
+from ccproxy.adapters.openai.models import (
+    OpenAIChatCompletionRequest,
+    OpenAIChatCompletionResponse,
+    OpenAIChoice,
+    OpenAIResponseMessage,
+    OpenAIUsage,
+)
+from ccproxy.adapters.openai.response_models import (
+    ResponseCompleted,
+    ResponseMessage,
+    ResponseMessageContent,
+    ResponseReasoning,
+    ResponseRequest,
+)
+logger = structlog.get_logger(__name__)
+class ResponseAdapter:
+    """Adapter for OpenAI Response API format conversion."""
+    def chat_to_response_request(
+        self, chat_request: dict[str, Any] | OpenAIChatCompletionRequest
+    ) -> ResponseRequest:
+        """Convert Chat Completions request to Response API format.
+        Args:
+            chat_request: OpenAI Chat Completions request
+        Returns:
+            Response API formatted request
+        """
+        if isinstance(chat_request, OpenAIChatCompletionRequest):
+            chat_dict = chat_request.model_dump()
+        else:
+            chat_dict = chat_request
+        # Extract messages and convert to Response API format
+        messages = chat_dict.get("messages", [])
+        response_input = []
+        instructions = None
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            # System messages become instructions
+            if role == "system":
+                instructions = content
+                continue
+            # Convert user/assistant messages to Response API format
+            response_msg = ResponseMessage(
+                type="message",
+                id=None,
+                role=role if role in ["user", "assistant"] else "user",
+                content=[
+                    ResponseMessageContent(
+                        type="input_text" if role == "user" else "output_text",
+                        text=content if isinstance(content, str) else str(content),
+                    )
+                ],
+            )
+            response_input.append(response_msg)
+        # Leave instructions field unset to let codex_transformers inject them
+        # The backend validates instructions and needs the full Codex ones
+        instructions = None
+        # Actually, we need to not include the field at all if it's None
+        # Otherwise the backend complains "Instructions are required"
+        # Map model (Codex uses gpt-5)
+        model = chat_dict.get("model", "gpt-4")
+        # For Codex, we typically use gpt-5
+        response_model = (
+            "gpt-5" if "codex" in model.lower() or "gpt-5" in model.lower() else model
+        )
+        # Build Response API request
+        # Note: Response API always requires stream=true and store=false
+        # Also, Response API doesn't support temperature and other OpenAI-specific parameters
+        request = ResponseRequest(
+            model=response_model,
+            instructions=instructions,
+            input=response_input,
+            stream=True,  # Always use streaming for Response API
+            tool_choice="auto",
+            parallel_tool_calls=chat_dict.get("parallel_tool_calls", False),
+            reasoning=ResponseReasoning(effort="medium", summary="auto"),
+            store=False,  # Must be false for Response API
+            # The following parameters are not supported by Response API:
+            # temperature, max_output_tokens, top_p, frequency_penalty, presence_penalty
+        )
+        return request
+    def response_to_chat_completion(
+        self, response_data: dict[str, Any] | ResponseCompleted
+    ) -> OpenAIChatCompletionResponse:
+        """Convert Response API response to Chat Completions format.
+        Args:
+            response_data: Response API response
+        Returns:
+            Chat Completions formatted response
+        """
+        # Extract the actual response data
+        response_dict: dict[str, Any]
+        if isinstance(response_data, ResponseCompleted):
+            # Convert Pydantic model to dict
+            response_dict = response_data.response.model_dump()
+        else:  # isinstance(response_data, dict)
+            if "response" in response_data:
+                response_dict = response_data["response"]
+            else:
+                response_dict = response_data
+        # Extract content from Response API output
+        content = ""
+        output = response_dict.get("output", [])
+        # Look for message type output (skip reasoning)
+        for output_item in output:
+            if output_item.get("type") == "message":
+                output_content = output_item.get("content", [])
+                for content_block in output_content:
+                    if content_block.get("type") in ["output_text", "text"]:
+                        content += content_block.get("text", "")
+        # Build Chat Completions response
+        usage_data = response_dict.get("usage")
+        converted_usage = self._convert_usage(usage_data) if usage_data else None
+        return OpenAIChatCompletionResponse(
+            id=response_dict.get("id", f"resp_{uuid.uuid4().hex}"),
+            object="chat.completion",
+            created=response_dict.get("created_at", int(time.time())),
+            model=response_dict.get("model", "gpt-5"),
+            choices=[
+                OpenAIChoice(
+                    index=0,
+                    message=OpenAIResponseMessage(
+                        role="assistant", content=content or None
+                    ),
+                    finish_reason="stop",
+                )
+            ],
+            usage=converted_usage,
+            system_fingerprint=response_dict.get("safety_identifier"),
+        )
+    async def stream_response_to_chat(
+        self, response_stream: AsyncIterator[bytes]
+    ) -> AsyncIterator[dict[str, Any]]:
+        """Convert Response API SSE stream to Chat Completions format.
+        Args:
+            response_stream: Async iterator of SSE bytes from Response API
+        Yields:
+            Chat Completions formatted streaming chunks
+        """
+        stream_id = f"chatcmpl_{uuid.uuid4().hex[:29]}"
+        created = int(time.time())
+        accumulated_content = ""
+        buffer = ""
+        logger.debug("response_adapter_stream_started", stream_id=stream_id)
+        raw_chunk_count = 0
+        event_count = 0
+        async for chunk in response_stream:
+            raw_chunk_count += 1
+            chunk_size = len(chunk)
+            logger.debug(
+                "response_adapter_raw_chunk_received",
+                chunk_number=raw_chunk_count,
+                chunk_size=chunk_size,
+                buffer_size_before=len(buffer),
+            )
+            # Add chunk to buffer
+            buffer += chunk.decode("utf-8")
+            # Process complete SSE events (separated by double newlines)
+            while "\n\n" in buffer:
+                event_str, buffer = buffer.split("\n\n", 1)
+                event_count += 1
+                # Parse the SSE event
+                event_type = None
+                event_data = None
+                for line in event_str.strip().split("\n"):
+                    if not line:
+                        continue
+                    if line.startswith("event:"):
+                        event_type = line[6:].strip()
+                    elif line.startswith("data:"):
+                        data_str = line[5:].strip()
+                        if data_str == "[DONE]":
+                            logger.debug(
+                                "response_adapter_done_marker_found",
+                                event_number=event_count,
+                            )
+                            continue
+                        try:
+                            event_data = json.loads(data_str)
+                        except json.JSONDecodeError:
+                            logger.debug(
+                                "response_adapter_sse_parse_failed",
+                                data_preview=data_str[:100],
+                                event_number=event_count,
+                            )
+                            continue
+                # Process complete events
+                if event_type and event_data:
+                    logger.debug(
+                        "response_adapter_sse_event_parsed",
+                        event_type=event_type,
+                        event_number=event_count,
+                        has_output="output" in str(event_data),
+                    )
+                    if event_type in [
+                        "response.output.delta",
+                        "response.output_text.delta",
+                    ]:
+                        # Extract delta content
+                        delta_content = ""
+                        # Handle different event structures
+                        if event_type == "response.output_text.delta":
+                            # Direct text delta event
+                            delta_content = event_data.get("delta", "")
+                        else:
+                            # Standard output delta with nested structure
+                            output = event_data.get("output", [])
+                            if output:
+                                for output_item in output:
+                                    if output_item.get("type") == "message":
+                                        content_blocks = output_item.get("content", [])
+                                        for block in content_blocks:
+                                            if block.get("type") in [
+                                                "output_text",
+                                                "text",
+                                            ]:
+                                                delta_content += block.get("text", "")
+                        if delta_content:
+                            accumulated_content += delta_content
+                            logger.debug(
+                                "response_adapter_yielding_content",
+                                content_length=len(delta_content),
+                                accumulated_length=len(accumulated_content),
+                            )
+                            # Create Chat Completions streaming chunk
+                            yield {
+                                "id": stream_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": event_data.get("model", "gpt-5"),
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {"content": delta_content},
+                                        "finish_reason": None,
+                                    }
+                                ],
+                            }
+                    elif event_type == "response.completed":
+                        # Final chunk with usage info
+                        response = event_data.get("response", {})
+                        usage = response.get("usage")
+                        logger.debug(
+                            "response_adapter_stream_completed",
+                            total_content_length=len(accumulated_content),
+                            has_usage=usage is not None,
+                        )
+                        chunk_data = {
+                            "id": stream_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": response.get("model", "gpt-5"),
+                            "choices": [
+                                {"index": 0, "delta": {}, "finish_reason": "stop"}
+                            ],
+                        }
+                        # Add usage if available
+                        converted_usage = self._convert_usage(usage) if usage else None
+                        if converted_usage:
+                            chunk_data["usage"] = converted_usage.model_dump()
+                        yield chunk_data
+        logger.debug(
+            "response_adapter_stream_finished",
+            stream_id=stream_id,
+            total_raw_chunks=raw_chunk_count,
+            total_events=event_count,
+            final_buffer_size=len(buffer),
+        )
+    def _convert_usage(
+        self, response_usage: dict[str, Any] | None
+    ) -> OpenAIUsage | None:
+        """Convert Response API usage to Chat Completions format."""
+        if not response_usage:
+            return None
+        return OpenAIUsage(
+            prompt_tokens=response_usage.get("input_tokens", 0),
+            completion_tokens=response_usage.get("output_tokens", 0),
+            total_tokens=response_usage.get("total_tokens", 0),
+        )
+    def _get_default_codex_instructions(self) -> str:
+        """Get default Codex CLI instructions."""
+        return (
+            "You are a coding agent running in the Codex CLI, a terminal-based coding assistant. "
+            "Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful.\n\n"
+            "Your capabilities:\n"
+            "- Receive user prompts and other context provided by the harness, such as files in the workspace.\n"
+            "- Communicate with the user by streaming thinking & responses, and by making & updating plans.\n"
+            "- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, "
+            "you can request that these function calls be escalated to the user for approval before running. "
+            'More on this in the "Sandbox and approvals" section.\n\n'
+            "Within this context, Codex refers to the open-source agentic coding interface "
+            "(not the old Codex language model built by OpenAI)."
+        )

ccproxy/adapters/openai/response_models.py ADDED Viewed

@@ -0,0 +1,178 @@
+"""OpenAI Response API models.
+This module contains data models for OpenAI's Response API format
+used by Codex/ChatGPT backend.
+"""
+from __future__ import annotations
+from typing import Any, Literal
+from pydantic import BaseModel
+# Request Models
+class ResponseMessageContent(BaseModel):
+    """Content block in a Response API message."""
+    type: Literal["input_text", "output_text"]
+    text: str
+class ResponseMessage(BaseModel):
+    """Message in Response API format."""
+    type: Literal["message"]
+    id: str | None = None
+    role: Literal["user", "assistant", "system"]
+    content: list[ResponseMessageContent]
+class ResponseReasoning(BaseModel):
+    """Reasoning configuration for Response API."""
+    effort: Literal["low", "medium", "high"] = "medium"
+    summary: Literal["auto", "none"] | None = "auto"
+class ResponseRequest(BaseModel):
+    """OpenAI Response API request format."""
+    model: str
+    instructions: str | None = None
+    input: list[ResponseMessage]
+    stream: bool = True
+    tool_choice: Literal["auto", "none", "required"] | str = "auto"
+    parallel_tool_calls: bool = False
+    reasoning: ResponseReasoning | None = None
+    store: bool = False
+    include: list[str] | None = None
+    prompt_cache_key: str | None = None
+    # Note: The following OpenAI parameters are not supported by Response API (Codex backend):
+    # temperature, max_output_tokens, top_p, frequency_penalty, presence_penalty, metadata
+    # If included, they'll cause "Unsupported parameter" errors
+# Response Models
+class ResponseOutput(BaseModel):
+    """Output content in Response API."""
+    id: str
+    type: Literal["message"]
+    status: Literal["completed", "in_progress"]
+    content: list[ResponseMessageContent]
+    role: Literal["assistant"]
+class ResponseUsage(BaseModel):
+    """Usage statistics in Response API."""
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int
+    input_tokens_details: dict[str, Any] | None = None
+    output_tokens_details: dict[str, Any] | None = None
+class ResponseReasoningContent(BaseModel):
+    """Reasoning content in response."""
+    effort: Literal["low", "medium", "high"]
+    summary: str | None = None
+    encrypted_content: str | None = None
+class ResponseData(BaseModel):
+    """Complete response data structure."""
+    id: str
+    object: Literal["response"]
+    created_at: int
+    status: Literal["completed", "failed", "cancelled"]
+    background: bool = False
+    error: dict[str, Any] | None = None
+    incomplete_details: dict[str, Any] | None = None
+    instructions: str | None = None
+    max_output_tokens: int | None = None
+    model: str
+    output: list[ResponseOutput]
+    parallel_tool_calls: bool = False
+    previous_response_id: str | None = None
+    prompt_cache_key: str | None = None
+    reasoning: ResponseReasoningContent | None = None
+    safety_identifier: str | None = None
+    service_tier: str | None = None
+    store: bool = False
+    temperature: float | None = None
+    text: dict[str, Any] | None = None
+    tool_choice: str | None = None
+    tools: list[dict[str, Any]] | None = None
+    top_logprobs: int | None = None
+    top_p: float | None = None
+    truncation: str | None = None
+    usage: ResponseUsage | None = None
+    user: str | None = None
+    metadata: dict[str, Any] | None = None
+class ResponseCompleted(BaseModel):
+    """Complete response from Response API."""
+    type: Literal["response.completed"]
+    sequence_number: int
+    response: ResponseData
+# Streaming Models
+class StreamingDelta(BaseModel):
+    """Delta content in streaming response."""
+    content: str | None = None
+    role: Literal["assistant"] | None = None
+    reasoning_content: str | None = None
+    output: list[dict[str, Any]] | None = None
+class StreamingChoice(BaseModel):
+    """Choice in streaming response."""
+    index: int
+    delta: StreamingDelta
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"] | None = (
+        None
+    )
+class StreamingChunk(BaseModel):
+    """Streaming chunk from Response API."""
+    id: str
+    object: Literal["response.chunk", "chat.completion.chunk"]
+    created: int
+    model: str
+    choices: list[StreamingChoice]
+    usage: ResponseUsage | None = None
+    system_fingerprint: str | None = None
+class StreamingEvent(BaseModel):
+    """Server-sent event wrapper for streaming."""
+    event: (
+        Literal[
+            "response.created",
+            "response.output.started",
+            "response.output.delta",
+            "response.output.completed",
+            "response.completed",
+            "response.failed",
+        ]
+        | None
+    ) = None
+    data: dict[str, Any] | str

ccproxy/api/app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from ccproxy.api.middleware.request_content_logging import (
 from ccproxy.api.middleware.request_id import RequestIDMiddleware
 from ccproxy.api.middleware.server_header import ServerHeaderMiddleware
 from ccproxy.api.routes.claude import router as claude_router
+from ccproxy.api.routes.codex import router as codex_router
 from ccproxy.api.routes.health import router as health_router
 from ccproxy.api.routes.mcp import setup_mcp
 from ccproxy.api.routes.metrics import (
@@ -33,9 +34,12 @@ from ccproxy.core.logging import setup_logging
 from ccproxy.utils.models_provider import get_models_list
 from ccproxy.utils.startup_helpers import (
     check_claude_cli_startup,
+    check_codex_cli_startup,
+    check_version_updates_startup,
     flush_streaming_batches_shutdown,
     initialize_claude_detection_startup,
     initialize_claude_sdk_startup,
+    initialize_codex_detection_startup,
     initialize_log_storage_shutdown,
     initialize_log_storage_startup,
     initialize_permission_service_startup,
@@ -43,7 +47,8 @@ from ccproxy.utils.startup_helpers import (
     setup_scheduler_shutdown,
     setup_scheduler_startup,
     setup_session_manager_shutdown,
-    validate_authentication_startup,
+    validate_claude_authentication_startup,
+    validate_codex_authentication_startup,
 )
@@ -69,20 +74,40 @@ class ShutdownComponent(TypedDict):
 # Define lifecycle components for startup/shutdown organization
 LIFECYCLE_COMPONENTS: list[LifecycleComponent] = [
     {
-        "name": "Authentication",
-        "startup": validate_authentication_startup,
+        "name": "Claude Authentication",
+        "startup": validate_claude_authentication_startup,
         "shutdown": None,  # One-time validation, no cleanup needed
     },
+    {
+        "name": "Codex Authentication",
+        "startup": validate_codex_authentication_startup,
+        "shutdown": None,  # One-time validation, no cleanup needed
+    },
+    {
+        "name": "Version Check",
+        "startup": check_version_updates_startup,
+        "shutdown": None,  # One-time check, no cleanup needed
+    },
     {
         "name": "Claude CLI",
         "startup": check_claude_cli_startup,
         "shutdown": None,  # Detection only, no cleanup needed
     },
+    {
+        "name": "Codex CLI",
+        "startup": check_codex_cli_startup,
+        "shutdown": None,  # Detection only, no cleanup needed
+    },
     {
         "name": "Claude Detection",
         "startup": initialize_claude_detection_startup,
         "shutdown": None,  # No cleanup needed
     },
+    {
+        "name": "Codex Detection",
+        "startup": initialize_codex_detection_startup,
+        "shutdown": None,  # No cleanup needed
+    },
     {
         "name": "Claude SDK",
         "startup": initialize_claude_sdk_startup,
@@ -282,6 +307,9 @@ def create_app(settings: Settings | None = None) -> FastAPI:
     app.include_router(oauth_router, prefix="/oauth", tags=["oauth"])
+    # Codex routes for OpenAI integration
+    app.include_router(codex_router, tags=["codex"])
     # New /sdk/ routes for Claude SDK endpoints
     app.include_router(claude_router, prefix="/sdk", tags=["claude-sdk"])

ccproxy/api/dependencies.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Annotated
 from fastapi import Depends, Request
 from structlog import get_logger
-from ccproxy.auth.dependencies import AuthManagerDep
 from ccproxy.config.settings import Settings, get_settings
 from ccproxy.core.http import BaseProxyClient
 from ccproxy.observability import PrometheusMetrics, get_metrics
@@ -70,11 +69,8 @@ def get_cached_claude_service(request: Request) -> ClaudeSDKService:
         )
         # Get dependencies manually for fallback
         settings = get_cached_settings(request)
-        # Create a simple auth manager for fallback
-        from ccproxy.auth.credentials_adapter import CredentialsAuthManager
-        auth_manager = CredentialsAuthManager()
-        claude_service = get_claude_service(settings, auth_manager)
+        claude_service = get_claude_service(settings)
     return claude_service
@@ -84,13 +80,11 @@ SettingsDep = Annotated[Settings, Depends(get_cached_settings)]
 def get_claude_service(
     settings: SettingsDep,
-    auth_manager: AuthManagerDep,
 ) -> ClaudeSDKService:
     """Get Claude SDK service instance.
     Args:
         settings: Application settings dependency
-        auth_manager: Authentication manager dependency
     Returns:
         Claude SDK service instance
@@ -114,7 +108,6 @@ def get_claude_service(
         # This dependency function should not create stateful resources
     return ClaudeSDKService(
-        auth_manager=auth_manager,
         metrics=metrics,
         settings=settings,
         session_manager=session_manager,

ccproxy-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

ccproxy-api 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl