PyPI - ccproxy-api - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ccproxy-api 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

ccproxy/__init__.py +4 -0
ccproxy/__main__.py +7 -0
ccproxy/_version.py +21 -0
ccproxy/adapters/__init__.py +11 -0
ccproxy/adapters/base.py +80 -0
ccproxy/adapters/openai/__init__.py +43 -0
ccproxy/adapters/openai/adapter.py +915 -0
ccproxy/adapters/openai/models.py +412 -0
ccproxy/adapters/openai/streaming.py +449 -0
ccproxy/api/__init__.py +28 -0
ccproxy/api/app.py +225 -0
ccproxy/api/dependencies.py +140 -0
ccproxy/api/middleware/__init__.py +11 -0
ccproxy/api/middleware/auth.py +0 -0
ccproxy/api/middleware/cors.py +55 -0
ccproxy/api/middleware/errors.py +703 -0
ccproxy/api/middleware/headers.py +51 -0
ccproxy/api/middleware/logging.py +175 -0
ccproxy/api/middleware/request_id.py +69 -0
ccproxy/api/middleware/server_header.py +62 -0
ccproxy/api/responses.py +84 -0
ccproxy/api/routes/__init__.py +16 -0
ccproxy/api/routes/claude.py +181 -0
ccproxy/api/routes/health.py +489 -0
ccproxy/api/routes/metrics.py +1033 -0
ccproxy/api/routes/proxy.py +238 -0
ccproxy/auth/__init__.py +75 -0
ccproxy/auth/bearer.py +68 -0
ccproxy/auth/credentials_adapter.py +93 -0
ccproxy/auth/dependencies.py +229 -0
ccproxy/auth/exceptions.py +79 -0
ccproxy/auth/manager.py +102 -0
ccproxy/auth/models.py +118 -0
ccproxy/auth/oauth/__init__.py +26 -0
ccproxy/auth/oauth/models.py +49 -0
ccproxy/auth/oauth/routes.py +396 -0
ccproxy/auth/oauth/storage.py +0 -0
ccproxy/auth/storage/__init__.py +12 -0
ccproxy/auth/storage/base.py +57 -0
ccproxy/auth/storage/json_file.py +159 -0
ccproxy/auth/storage/keyring.py +192 -0
ccproxy/claude_sdk/__init__.py +20 -0
ccproxy/claude_sdk/client.py +169 -0
ccproxy/claude_sdk/converter.py +331 -0
ccproxy/claude_sdk/options.py +120 -0
ccproxy/cli/__init__.py +14 -0
ccproxy/cli/commands/__init__.py +8 -0
ccproxy/cli/commands/auth.py +553 -0
ccproxy/cli/commands/config/__init__.py +14 -0
ccproxy/cli/commands/config/commands.py +766 -0
ccproxy/cli/commands/config/schema_commands.py +119 -0
ccproxy/cli/commands/serve.py +630 -0
ccproxy/cli/docker/__init__.py +34 -0
ccproxy/cli/docker/adapter_factory.py +157 -0
ccproxy/cli/docker/params.py +278 -0
ccproxy/cli/helpers.py +144 -0
ccproxy/cli/main.py +193 -0
ccproxy/cli/options/__init__.py +14 -0
ccproxy/cli/options/claude_options.py +216 -0
ccproxy/cli/options/core_options.py +40 -0
ccproxy/cli/options/security_options.py +48 -0
ccproxy/cli/options/server_options.py +117 -0
ccproxy/config/__init__.py +40 -0
ccproxy/config/auth.py +154 -0
ccproxy/config/claude.py +124 -0
ccproxy/config/cors.py +79 -0
ccproxy/config/discovery.py +87 -0
ccproxy/config/docker_settings.py +265 -0
ccproxy/config/loader.py +108 -0
ccproxy/config/observability.py +158 -0
ccproxy/config/pricing.py +88 -0
ccproxy/config/reverse_proxy.py +31 -0
ccproxy/config/scheduler.py +89 -0
ccproxy/config/security.py +14 -0
ccproxy/config/server.py +81 -0
ccproxy/config/settings.py +534 -0
ccproxy/config/validators.py +231 -0
ccproxy/core/__init__.py +274 -0
ccproxy/core/async_utils.py +675 -0
ccproxy/core/constants.py +97 -0
ccproxy/core/errors.py +256 -0
ccproxy/core/http.py +328 -0
ccproxy/core/http_transformers.py +428 -0
ccproxy/core/interfaces.py +247 -0
ccproxy/core/logging.py +189 -0
ccproxy/core/middleware.py +114 -0
ccproxy/core/proxy.py +143 -0
ccproxy/core/system.py +38 -0
ccproxy/core/transformers.py +259 -0
ccproxy/core/types.py +129 -0
ccproxy/core/validators.py +288 -0
ccproxy/docker/__init__.py +67 -0
ccproxy/docker/adapter.py +588 -0
ccproxy/docker/docker_path.py +207 -0
ccproxy/docker/middleware.py +103 -0
ccproxy/docker/models.py +228 -0
ccproxy/docker/protocol.py +192 -0
ccproxy/docker/stream_process.py +264 -0
ccproxy/docker/validators.py +173 -0
ccproxy/models/__init__.py +123 -0
ccproxy/models/errors.py +42 -0
ccproxy/models/messages.py +243 -0
ccproxy/models/requests.py +85 -0
ccproxy/models/responses.py +227 -0
ccproxy/models/types.py +102 -0
ccproxy/observability/__init__.py +51 -0
ccproxy/observability/access_logger.py +400 -0
ccproxy/observability/context.py +447 -0
ccproxy/observability/metrics.py +539 -0
ccproxy/observability/pushgateway.py +366 -0
ccproxy/observability/sse_events.py +303 -0
ccproxy/observability/stats_printer.py +755 -0
ccproxy/observability/storage/__init__.py +1 -0
ccproxy/observability/storage/duckdb_simple.py +665 -0
ccproxy/observability/storage/models.py +55 -0
ccproxy/pricing/__init__.py +19 -0
ccproxy/pricing/cache.py +212 -0
ccproxy/pricing/loader.py +267 -0
ccproxy/pricing/models.py +106 -0
ccproxy/pricing/updater.py +309 -0
ccproxy/scheduler/__init__.py +39 -0
ccproxy/scheduler/core.py +335 -0
ccproxy/scheduler/exceptions.py +34 -0
ccproxy/scheduler/manager.py +186 -0
ccproxy/scheduler/registry.py +150 -0
ccproxy/scheduler/tasks.py +484 -0
ccproxy/services/__init__.py +10 -0
ccproxy/services/claude_sdk_service.py +614 -0
ccproxy/services/credentials/__init__.py +55 -0
ccproxy/services/credentials/config.py +105 -0
ccproxy/services/credentials/manager.py +562 -0
ccproxy/services/credentials/oauth_client.py +482 -0
ccproxy/services/proxy_service.py +1536 -0
ccproxy/static/.keep +0 -0
ccproxy/testing/__init__.py +34 -0
ccproxy/testing/config.py +148 -0
ccproxy/testing/content_generation.py +197 -0
ccproxy/testing/mock_responses.py +262 -0
ccproxy/testing/response_handlers.py +161 -0
ccproxy/testing/scenarios.py +241 -0
ccproxy/utils/__init__.py +6 -0
ccproxy/utils/cost_calculator.py +210 -0
ccproxy/utils/streaming_metrics.py +199 -0
ccproxy_api-0.1.0.dist-info/METADATA +253 -0
ccproxy_api-0.1.0.dist-info/RECORD +148 -0
ccproxy_api-0.1.0.dist-info/WHEEL +4 -0
ccproxy_api-0.1.0.dist-info/entry_points.txt +2 -0
ccproxy_api-0.1.0.dist-info/licenses/LICENSE +21 -0

ccproxy/static/.keep ADDED Viewed

File without changes

ccproxy/testing/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Testing utilities and mock response generation for CCProxy.
+This package provides comprehensive testing utilities including:
+- Mock response generation for bypass mode
+- Request payload builders for dual-format testing
+- Response processing and metrics collection
+- Traffic pattern generation and scenario management
+"""
+from ccproxy.testing.config import (
+    MockResponseConfig,
+    RequestScenario,
+    TrafficConfig,
+    TrafficMetrics,
+)
+from ccproxy.testing.content_generation import MessageContentGenerator, PayloadBuilder
+from ccproxy.testing.mock_responses import RealisticMockResponseGenerator
+from ccproxy.testing.response_handlers import MetricsExtractor, ResponseHandler
+from ccproxy.testing.scenarios import ScenarioGenerator, TrafficPatternAnalyzer
+__all__ = [
+    "MockResponseConfig",
+    "RequestScenario",
+    "TrafficConfig",
+    "TrafficMetrics",
+    "MessageContentGenerator",
+    "PayloadBuilder",
+    "RealisticMockResponseGenerator",
+    "MetricsExtractor",
+    "ResponseHandler",
+    "ScenarioGenerator",
+    "TrafficPatternAnalyzer",
+]

ccproxy/testing/config.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Configuration models for testing utilities."""
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Literal
+from pydantic import BaseModel
+# Type aliases for traffic patterns and response types
+TrafficPattern = Literal["constant", "burst", "ramping", "realistic"]
+ResponseType = Literal["success", "error", "mixed", "unavailable"]
+AuthType = Literal["none", "bearer", "configured", "credentials"]
+class MockResponseConfig(BaseModel):
+    """Configuration for realistic mock responses."""
+    # Token range configurations
+    input_token_range: tuple[int, int] = (10, 500)  # Min/max input tokens
+    output_token_range: tuple[int, int] = (5, 1000)  # Min/max output tokens
+    cache_token_probability: float = 0.3  # Chance of cache tokens
+    cache_read_range: tuple[int, int] = (50, 200)  # Cache read token range
+    cache_write_range: tuple[int, int] = (20, 100)  # Cache write token range
+    # Latency simulation
+    base_latency_ms: tuple[int, int] = (100, 2000)  # Base response latency
+    streaming_chunk_delay_ms: tuple[int, int] = (10, 100)  # Per-chunk delay
+    # Content variation
+    response_length_variety: bool = True  # Vary response length
+    short_response_range: tuple[int, int] = (1, 3)  # Short response sentences
+    long_response_range: tuple[int, int] = (5, 15)  # Long response sentences
+    # Error simulation
+    simulate_errors: bool = True  # Include error scenarios
+    error_probability: float = 0.05  # Chance of error response
+    # Realistic timing
+    token_generation_rate: float = 50.0  # Tokens per second for streaming
+class TrafficConfig(BaseModel):
+    """Configuration for traffic generation scenarios."""
+    # Basic settings
+    duration_seconds: int = 60
+    requests_per_second: float = 1.0
+    pattern: TrafficPattern = "constant"
+    # Target Configuration
+    target_url: str = "http://localhost:8000"  # Proxy server URL
+    api_formats: list[str] = ["anthropic", "openai"]  # Which formats to test
+    format_distribution: dict[str, float] = {  # % distribution of formats
+        "anthropic": 0.7,
+        "openai": 0.3,
+    }
+    # Request configuration
+    models: list[str] = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
+    message_types: list[str] = ["short", "long", "tool_use"]
+    streaming_probability: float = 0.3
+    # Advanced Request Types
+    advanced_scenarios: bool = False  # Enable complex scenarios
+    tool_use_probability: float = 0.2  # Specific probability for tool use
+    # Response configuration
+    response_type: ResponseType = "mixed"
+    error_probability: float = 0.1
+    latency_ms_min: int = 100
+    latency_ms_max: int = 2000
+    # Authentication and Testing
+    bypass_mode: bool = True  # Use bypass headers (test mode)
+    real_api_keys: dict[str, str] = {}  # Real API keys when bypass_mode=False
+    # Timeframe simulation
+    simulate_historical: bool = False
+    start_timestamp: datetime | None = None
+    end_timestamp: datetime | None = None
+    # Output configuration
+    output_file: Path | None = None
+    log_requests: bool = True
+    log_responses: bool = False
+    log_format_conversions: bool = True  # Log API format transformations
+class RequestScenario(BaseModel):
+    """Individual request scenario configuration."""
+    model: str
+    message_type: str
+    streaming: bool
+    response_type: ResponseType
+    timestamp: datetime
+    # API Format and Endpoint Control
+    api_format: Literal["anthropic", "openai"] = "anthropic"
+    endpoint_path: str = (
+        "/api/v1/messages"  # "/api/v1/messages" or "/api/v1/chat/completions"
+    )
+    # Request Control
+    bypass_upstream: bool = True  # Add bypass header to prevent real API calls
+    use_real_auth: bool = False  # Use real API keys vs test mode
+    # Enhanced Headers
+    headers: dict[str, str] = {}  # All request headers
+    # Target URL
+    target_url: str = "http://localhost:8000"  # Full base URL for request
+    # Payload Customization
+    custom_payload: dict[str, Any] | None = None  # Override default payload generation
+class TrafficMetrics(BaseModel):
+    """Enhanced metrics for dual-format testing."""
+    total_requests: int = 0
+    successful_requests: int = 0
+    failed_requests: int = 0
+    error_requests: int = 0
+    average_latency_ms: float = 0.0
+    requests_per_second: float = 0.0
+    start_time: datetime
+    end_time: datetime | None = None
+    # Format-specific metrics
+    anthropic_requests: int = 0
+    openai_requests: int = 0
+    # Streaming vs non-streaming
+    streaming_requests: int = 0
+    standard_requests: int = 0
+    # Format validation
+    format_validation_errors: int = 0
+    # Response time by format
+    anthropic_avg_latency_ms: float = 0.0
+    openai_avg_latency_ms: float = 0.0
+    # Token usage
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0

ccproxy/testing/content_generation.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""Content generation utilities for testing requests and responses."""
+import random
+from typing import Any
+from ccproxy.testing.config import RequestScenario
+class MessageContentGenerator:
+    """Generate realistic message content for testing."""
+    def __init__(self) -> None:
+        self.response_templates = self._load_response_templates()
+        self.request_templates = self._load_request_templates()
+    def _load_response_templates(self) -> dict[str, list[str]]:
+        """Load variety of response templates."""
+        return {
+            "short": [
+                "Hello! How can I help you today?",
+                "I'm happy to assist you.",
+                "What would you like to know?",
+                "I'm here to help!",
+                "How may I assist you?",
+            ],
+            "medium": [
+                "I'd be happy to help you with that. Let me provide you with some information that should be useful for your question.",
+                "That's an interesting question. Here's what I can tell you about this topic based on my knowledge.",
+                "I understand what you're asking about. Let me break this down into a clear explanation for you.",
+            ],
+            "long": [
+                "This is a comprehensive topic that requires a detailed explanation. Let me walk you through the key concepts step by step. First, it's important to understand the foundational principles. Then we can explore the more advanced aspects. Finally, I'll provide some practical examples to illustrate the concepts.",
+                "That's an excellent question that touches on several important areas. To give you a complete answer, I need to cover multiple aspects. Let me start with the basic framework, then dive into the specifics, and conclude with some recommendations based on best practices in this field.",
+            ],
+            "tool_use": [
+                "I'll help you with that calculation.",
+                "Let me solve that mathematical problem for you.",
+                "I can compute that result using the calculator tool.",
+            ],
+        }
+    def _load_request_templates(self) -> dict[str, list[str]]:
+        """Load variety of request message templates."""
+        return {
+            "short": [
+                "Hello!",
+                "How are you?",
+                "What's the weather like?",
+                "Tell me a joke.",
+                "What time is it?",
+            ],
+            "long": [
+                "I need help writing a detailed technical document about API design patterns. Can you provide a comprehensive guide covering REST principles, authentication methods, error handling, and best practices for scalable API development?",
+                "Please explain the differences between various machine learning algorithms including supervised learning, unsupervised learning, and reinforcement learning. Include examples of when to use each approach and their respective advantages and disadvantages.",
+                "I'm planning a complex software architecture for a distributed system. Can you help me understand microservices patterns, database sharding strategies, caching layers, and how to handle eventual consistency in distributed transactions?",
+            ],
+            "tool_use": [
+                "Calculate 23 * 45 + 67 for me",
+                "What's the result of (150 / 3) * 2.5?",
+                "Help me calculate the compound interest on $1000 at 5% for 3 years",
+            ],
+        }
+    def get_request_message_content(self, message_type: str) -> str:
+        """Get request message content based on type."""
+        if message_type in self.request_templates:
+            return random.choice(self.request_templates[message_type])
+        else:
+            # Fallback to short message for unknown types
+            return random.choice(self.request_templates["short"])
+    def get_response_content(
+        self, message_type: str, model: str
+    ) -> tuple[str, int, int]:
+        """Generate response content with realistic token counts."""
+        # Select base template
+        if message_type == "tool_use":
+            base_content = random.choice(self.response_templates["tool_use"])
+            # Add calculation result
+            result = random.randint(1, 1000)
+            content = f"{base_content} The result is {result}."
+        elif message_type in self.response_templates:
+            content = random.choice(self.response_templates[message_type])
+        else:
+            # Mix of different lengths for unknown types
+            template_type = random.choice(["short", "medium", "long"])
+            content = random.choice(self.response_templates[template_type])
+        # Calculate realistic token counts based on content
+        # Rough estimate: ~4 characters per token
+        estimated_output_tokens = max(1, len(content) // 4)
+        # Add some randomness but keep it realistic
+        output_tokens = random.randint(
+            max(1, estimated_output_tokens - 10), estimated_output_tokens + 20
+        )
+        # Input tokens based on typical request sizes (10-500 range)
+        input_tokens = random.randint(10, 500)
+        return content, input_tokens, output_tokens
+class PayloadBuilder:
+    """Build request payloads for different API formats."""
+    def __init__(self) -> None:
+        self.content_generator = MessageContentGenerator()
+    def build_anthropic_payload(self, scenario: RequestScenario) -> dict[str, Any]:
+        """Build Anthropic format payload."""
+        payload = {
+            "model": scenario.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": self.content_generator.get_request_message_content(
+                        scenario.message_type
+                    ),
+                }
+            ],
+            "stream": scenario.streaming,
+            "max_tokens": random.randint(100, 4000),  # Realistic token limits
+        }
+        if scenario.message_type == "tool_use":
+            payload["tools"] = [
+                {
+                    "name": "calculator",
+                    "description": "Perform basic calculations",
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "expression": {
+                                "type": "string",
+                                "description": "Math expression to evaluate",
+                            }
+                        },
+                        "required": ["expression"],
+                    },
+                }
+            ]
+        return payload
+    def build_openai_payload(self, scenario: RequestScenario) -> dict[str, Any]:
+        """Build OpenAI format payload."""
+        messages = [
+            {
+                "role": "user",
+                "content": self.content_generator.get_request_message_content(
+                    scenario.message_type
+                ),
+            }
+        ]
+        payload = {
+            "model": scenario.model,
+            "messages": messages,
+            "stream": scenario.streaming,
+            "max_tokens": random.randint(100, 4000),  # Realistic token limits
+        }
+        if scenario.message_type == "tool_use":
+            payload["tools"] = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "calculator",
+                        "description": "Perform basic calculations",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "expression": {
+                                    "type": "string",
+                                    "description": "Math expression to evaluate",
+                                }
+                            },
+                            "required": ["expression"],
+                        },
+                    },
+                }
+            ]
+        return payload
+    def build_payload(self, scenario: RequestScenario) -> dict[str, Any]:
+        """Build request payload based on scenario format."""
+        # Use custom payload if provided
+        if scenario.custom_payload:
+            return scenario.custom_payload
+        # Build format-specific payload
+        if scenario.api_format == "openai":
+            return self.build_openai_payload(scenario)
+        else:
+            return self.build_anthropic_payload(scenario)

ccproxy/testing/mock_responses.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""Mock response generation for realistic testing."""
+import json
+import random
+import time
+from typing import Any
+from ccproxy.testing.config import MockResponseConfig
+from ccproxy.testing.content_generation import MessageContentGenerator
+class RealisticMockResponseGenerator:
+    """Generate realistic mock responses with proper randomization."""
+    def __init__(self, config: MockResponseConfig | None = None):
+        self.config = config or MockResponseConfig()
+        self.content_generator: MessageContentGenerator = MessageContentGenerator()
+    def generate_response_content(
+        self, message_type: str, model: str
+    ) -> tuple[str, int, int]:
+        """Generate response content with realistic token counts."""
+        return self.content_generator.get_response_content(message_type, model)
+    def generate_cache_tokens(self) -> tuple[int, int]:
+        """Generate realistic cache token counts."""
+        if random.random() < self.config.cache_token_probability:
+            cache_read = random.randint(*self.config.cache_read_range)
+            cache_write = random.randint(*self.config.cache_write_range)
+            return cache_read, cache_write
+        return 0, 0
+    def should_simulate_error(self) -> bool:
+        """Determine if this response should be an error."""
+        return (
+            self.config.simulate_errors
+            and random.random() < self.config.error_probability
+        )
+    def generate_error_response(self, api_format: str) -> tuple[dict[str, Any], int]:
+        """Generate realistic error response."""
+        error_types = [
+            {
+                "type": "rate_limit_error",
+                "message": "Rate limit exceeded. Please try again later.",
+                "status_code": 429,
+            },
+            {
+                "type": "invalid_request_error",
+                "message": "Invalid request format.",
+                "status_code": 400,
+            },
+            {
+                "type": "overloaded_error",
+                "message": "Service temporarily overloaded.",
+                "status_code": 503,
+            },
+        ]
+        error = random.choice(error_types)
+        status_code: int = error["status_code"]  # type: ignore[assignment]
+        if api_format == "openai":
+            return {
+                "error": {
+                    "message": error["message"],
+                    "type": error["type"],
+                    "code": error["type"],
+                }
+            }, status_code
+        else:
+            return {
+                "type": "error",
+                "error": {"type": error["type"], "message": error["message"]},
+            }, status_code
+    def generate_realistic_anthropic_stream(
+        self,
+        request_id: str,
+        model: str,
+        content: str,
+        input_tokens: int,
+        output_tokens: int,
+        cache_read_tokens: int,
+        cache_write_tokens: int,
+    ) -> list[dict[str, Any]]:
+        """Generate realistic Anthropic streaming chunks."""
+        chunks = []
+        # Message start
+        chunks.append(
+            {
+                "type": "message_start",
+                "message": {
+                    "id": request_id,
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [],
+                    "model": model,
+                    "stop_reason": None,
+                    "stop_sequence": None,
+                    "usage": {"input_tokens": input_tokens, "output_tokens": 0},
+                },
+            }
+        )
+        # Content block start
+        chunk_start: dict[str, Any] = {
+            "type": "content_block_start",
+            "index": 0,
+            "content_block": {"type": "text", "text": ""},
+        }
+        chunks.append(chunk_start)
+        # Split content into realistic chunks (by words)
+        words = content.split()
+        chunk_sizes = []
+        # Generate realistic chunk sizes
+        i = 0
+        while i < len(words):
+            # Random chunk size between 1-5 words
+            chunk_size = random.randint(1, min(5, len(words) - i))
+            chunk_sizes.append(chunk_size)
+            i += chunk_size
+        # Generate content deltas
+        word_index = 0
+        for chunk_size in chunk_sizes:
+            chunk_words = words[word_index : word_index + chunk_size]
+            chunk_text = (
+                " " + " ".join(chunk_words) if word_index > 0 else " ".join(chunk_words)
+            )
+            chunk_delta: dict[str, Any] = {
+                "type": "content_block_delta",
+                "index": 0,
+                "delta": {"type": "text_delta", "text": chunk_text},
+            }
+            chunks.append(chunk_delta)
+            word_index += chunk_size
+        # Content block stop
+        chunk_stop: dict[str, Any] = {"type": "content_block_stop", "index": 0}
+        chunks.append(chunk_stop)
+        # Message delta with final usage
+        chunks.append(
+            {
+                "type": "message_delta",
+                "delta": {"stop_reason": "end_turn", "stop_sequence": None},
+                "usage": {
+                    "output_tokens": output_tokens,
+                    "cache_creation_input_tokens": cache_write_tokens,
+                    "cache_read_input_tokens": cache_read_tokens,
+                },
+            }
+        )
+        # Message stop
+        chunks.append({"type": "message_stop"})
+        return chunks
+    def generate_realistic_openai_stream(
+        self,
+        request_id: str,
+        model: str,
+        content: str,
+        input_tokens: int,
+        output_tokens: int,
+    ) -> list[dict[str, Any]]:
+        """Generate realistic OpenAI streaming chunks by converting Anthropic format."""
+        # Generate Anthropic chunks first
+        anthropic_chunks = self.generate_realistic_anthropic_stream(
+            request_id, model, content, input_tokens, output_tokens, 0, 0
+        )
+        # Convert to OpenAI format
+        openai_chunks = []
+        for chunk in anthropic_chunks:
+            # Use simplified conversion logic
+            if chunk.get("type") == "message_start":
+                openai_chunks.append(
+                    {
+                        "id": f"chatcmpl-{request_id}",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"role": "assistant", "content": ""},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                )
+            elif chunk.get("type") == "content_block_delta":
+                delta_text = chunk.get("delta", {}).get("text", "")
+                openai_chunks.append(
+                    {
+                        "id": f"chatcmpl-{request_id}",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"content": delta_text},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                )
+            elif chunk.get("type") == "message_stop":
+                openai_chunks.append(
+                    {
+                        "id": f"chatcmpl-{request_id}",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                    }
+                )
+        return openai_chunks
+    def calculate_realistic_cost(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        model: str,
+        cache_read_tokens: int,
+        cache_write_tokens: int,
+    ) -> float:
+        """Calculate realistic cost based on current Claude pricing."""
+        # Simplified pricing (should use actual cost calculator)
+        if "sonnet" in model.lower():
+            input_cost_per_token = 0.000003  # $3 per million tokens
+            output_cost_per_token = 0.000015  # $15 per million tokens
+        elif "haiku" in model.lower():
+            input_cost_per_token = 0.00000025  # $0.25 per million tokens
+            output_cost_per_token = 0.00000125  # $1.25 per million tokens
+        else:
+            input_cost_per_token = 0.000003
+            output_cost_per_token = 0.000015
+        base_cost = (
+            input_tokens * input_cost_per_token + output_tokens * output_cost_per_token
+        )
+        # Cache costs (typically lower)
+        cache_cost = (
+            cache_read_tokens * input_cost_per_token * 0.1  # 10% of input cost
+            + cache_write_tokens * input_cost_per_token * 0.5  # 50% of input cost
+        )
+        return round(base_cost + cache_cost, 6)