ccproxy-api 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/__init__.py +4 -0
- ccproxy/__main__.py +7 -0
- ccproxy/_version.py +21 -0
- ccproxy/adapters/__init__.py +11 -0
- ccproxy/adapters/base.py +80 -0
- ccproxy/adapters/openai/__init__.py +43 -0
- ccproxy/adapters/openai/adapter.py +915 -0
- ccproxy/adapters/openai/models.py +412 -0
- ccproxy/adapters/openai/streaming.py +449 -0
- ccproxy/api/__init__.py +28 -0
- ccproxy/api/app.py +225 -0
- ccproxy/api/dependencies.py +140 -0
- ccproxy/api/middleware/__init__.py +11 -0
- ccproxy/api/middleware/auth.py +0 -0
- ccproxy/api/middleware/cors.py +55 -0
- ccproxy/api/middleware/errors.py +703 -0
- ccproxy/api/middleware/headers.py +51 -0
- ccproxy/api/middleware/logging.py +175 -0
- ccproxy/api/middleware/request_id.py +69 -0
- ccproxy/api/middleware/server_header.py +62 -0
- ccproxy/api/responses.py +84 -0
- ccproxy/api/routes/__init__.py +16 -0
- ccproxy/api/routes/claude.py +181 -0
- ccproxy/api/routes/health.py +489 -0
- ccproxy/api/routes/metrics.py +1033 -0
- ccproxy/api/routes/proxy.py +238 -0
- ccproxy/auth/__init__.py +75 -0
- ccproxy/auth/bearer.py +68 -0
- ccproxy/auth/credentials_adapter.py +93 -0
- ccproxy/auth/dependencies.py +229 -0
- ccproxy/auth/exceptions.py +79 -0
- ccproxy/auth/manager.py +102 -0
- ccproxy/auth/models.py +118 -0
- ccproxy/auth/oauth/__init__.py +26 -0
- ccproxy/auth/oauth/models.py +49 -0
- ccproxy/auth/oauth/routes.py +396 -0
- ccproxy/auth/oauth/storage.py +0 -0
- ccproxy/auth/storage/__init__.py +12 -0
- ccproxy/auth/storage/base.py +57 -0
- ccproxy/auth/storage/json_file.py +159 -0
- ccproxy/auth/storage/keyring.py +192 -0
- ccproxy/claude_sdk/__init__.py +20 -0
- ccproxy/claude_sdk/client.py +169 -0
- ccproxy/claude_sdk/converter.py +331 -0
- ccproxy/claude_sdk/options.py +120 -0
- ccproxy/cli/__init__.py +14 -0
- ccproxy/cli/commands/__init__.py +8 -0
- ccproxy/cli/commands/auth.py +553 -0
- ccproxy/cli/commands/config/__init__.py +14 -0
- ccproxy/cli/commands/config/commands.py +766 -0
- ccproxy/cli/commands/config/schema_commands.py +119 -0
- ccproxy/cli/commands/serve.py +630 -0
- ccproxy/cli/docker/__init__.py +34 -0
- ccproxy/cli/docker/adapter_factory.py +157 -0
- ccproxy/cli/docker/params.py +278 -0
- ccproxy/cli/helpers.py +144 -0
- ccproxy/cli/main.py +193 -0
- ccproxy/cli/options/__init__.py +14 -0
- ccproxy/cli/options/claude_options.py +216 -0
- ccproxy/cli/options/core_options.py +40 -0
- ccproxy/cli/options/security_options.py +48 -0
- ccproxy/cli/options/server_options.py +117 -0
- ccproxy/config/__init__.py +40 -0
- ccproxy/config/auth.py +154 -0
- ccproxy/config/claude.py +124 -0
- ccproxy/config/cors.py +79 -0
- ccproxy/config/discovery.py +87 -0
- ccproxy/config/docker_settings.py +265 -0
- ccproxy/config/loader.py +108 -0
- ccproxy/config/observability.py +158 -0
- ccproxy/config/pricing.py +88 -0
- ccproxy/config/reverse_proxy.py +31 -0
- ccproxy/config/scheduler.py +89 -0
- ccproxy/config/security.py +14 -0
- ccproxy/config/server.py +81 -0
- ccproxy/config/settings.py +534 -0
- ccproxy/config/validators.py +231 -0
- ccproxy/core/__init__.py +274 -0
- ccproxy/core/async_utils.py +675 -0
- ccproxy/core/constants.py +97 -0
- ccproxy/core/errors.py +256 -0
- ccproxy/core/http.py +328 -0
- ccproxy/core/http_transformers.py +428 -0
- ccproxy/core/interfaces.py +247 -0
- ccproxy/core/logging.py +189 -0
- ccproxy/core/middleware.py +114 -0
- ccproxy/core/proxy.py +143 -0
- ccproxy/core/system.py +38 -0
- ccproxy/core/transformers.py +259 -0
- ccproxy/core/types.py +129 -0
- ccproxy/core/validators.py +288 -0
- ccproxy/docker/__init__.py +67 -0
- ccproxy/docker/adapter.py +588 -0
- ccproxy/docker/docker_path.py +207 -0
- ccproxy/docker/middleware.py +103 -0
- ccproxy/docker/models.py +228 -0
- ccproxy/docker/protocol.py +192 -0
- ccproxy/docker/stream_process.py +264 -0
- ccproxy/docker/validators.py +173 -0
- ccproxy/models/__init__.py +123 -0
- ccproxy/models/errors.py +42 -0
- ccproxy/models/messages.py +243 -0
- ccproxy/models/requests.py +85 -0
- ccproxy/models/responses.py +227 -0
- ccproxy/models/types.py +102 -0
- ccproxy/observability/__init__.py +51 -0
- ccproxy/observability/access_logger.py +400 -0
- ccproxy/observability/context.py +447 -0
- ccproxy/observability/metrics.py +539 -0
- ccproxy/observability/pushgateway.py +366 -0
- ccproxy/observability/sse_events.py +303 -0
- ccproxy/observability/stats_printer.py +755 -0
- ccproxy/observability/storage/__init__.py +1 -0
- ccproxy/observability/storage/duckdb_simple.py +665 -0
- ccproxy/observability/storage/models.py +55 -0
- ccproxy/pricing/__init__.py +19 -0
- ccproxy/pricing/cache.py +212 -0
- ccproxy/pricing/loader.py +267 -0
- ccproxy/pricing/models.py +106 -0
- ccproxy/pricing/updater.py +309 -0
- ccproxy/scheduler/__init__.py +39 -0
- ccproxy/scheduler/core.py +335 -0
- ccproxy/scheduler/exceptions.py +34 -0
- ccproxy/scheduler/manager.py +186 -0
- ccproxy/scheduler/registry.py +150 -0
- ccproxy/scheduler/tasks.py +484 -0
- ccproxy/services/__init__.py +10 -0
- ccproxy/services/claude_sdk_service.py +614 -0
- ccproxy/services/credentials/__init__.py +55 -0
- ccproxy/services/credentials/config.py +105 -0
- ccproxy/services/credentials/manager.py +562 -0
- ccproxy/services/credentials/oauth_client.py +482 -0
- ccproxy/services/proxy_service.py +1536 -0
- ccproxy/static/.keep +0 -0
- ccproxy/testing/__init__.py +34 -0
- ccproxy/testing/config.py +148 -0
- ccproxy/testing/content_generation.py +197 -0
- ccproxy/testing/mock_responses.py +262 -0
- ccproxy/testing/response_handlers.py +161 -0
- ccproxy/testing/scenarios.py +241 -0
- ccproxy/utils/__init__.py +6 -0
- ccproxy/utils/cost_calculator.py +210 -0
- ccproxy/utils/streaming_metrics.py +199 -0
- ccproxy_api-0.1.0.dist-info/METADATA +253 -0
- ccproxy_api-0.1.0.dist-info/RECORD +148 -0
- ccproxy_api-0.1.0.dist-info/WHEEL +4 -0
- ccproxy_api-0.1.0.dist-info/entry_points.txt +2 -0
- ccproxy_api-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Response processing utilities for testing."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from ccproxy.testing.config import RequestScenario
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResponseHandler:
|
|
12
|
+
"""Handle responses from both Anthropic and OpenAI formats."""
|
|
13
|
+
|
|
14
|
+
def process_response(
|
|
15
|
+
self, response: httpx.Response, scenario: RequestScenario
|
|
16
|
+
) -> dict[str, Any]:
|
|
17
|
+
"""Process response based on format and streaming."""
|
|
18
|
+
|
|
19
|
+
if scenario.streaming:
|
|
20
|
+
return self._process_streaming_response(response, scenario)
|
|
21
|
+
else:
|
|
22
|
+
return self._process_standard_response(response, scenario)
|
|
23
|
+
|
|
24
|
+
def _process_standard_response(
|
|
25
|
+
self, response: httpx.Response, scenario: RequestScenario
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Process non-streaming response."""
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
response_data = response.json()
|
|
31
|
+
|
|
32
|
+
# Extract metrics based on format
|
|
33
|
+
if scenario.api_format == "openai":
|
|
34
|
+
tokens_input = response_data.get("usage", {}).get("prompt_tokens")
|
|
35
|
+
tokens_output = response_data.get("usage", {}).get("completion_tokens")
|
|
36
|
+
content = (
|
|
37
|
+
response_data.get("choices", [{}])[0]
|
|
38
|
+
.get("message", {})
|
|
39
|
+
.get("content")
|
|
40
|
+
)
|
|
41
|
+
else: # anthropic
|
|
42
|
+
usage = response_data.get("usage", {})
|
|
43
|
+
tokens_input = usage.get("input_tokens")
|
|
44
|
+
tokens_output = usage.get("output_tokens")
|
|
45
|
+
content = ""
|
|
46
|
+
for block in response_data.get("content", []):
|
|
47
|
+
if block.get("type") == "text":
|
|
48
|
+
content += block.get("text", "")
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"status_code": response.status_code,
|
|
52
|
+
"headers": dict(response.headers),
|
|
53
|
+
"data": response_data,
|
|
54
|
+
"tokens_input": tokens_input,
|
|
55
|
+
"tokens_output": tokens_output,
|
|
56
|
+
"content_preview": content[:100] if content else None,
|
|
57
|
+
"format": scenario.api_format,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
except Exception as e:
|
|
61
|
+
return {
|
|
62
|
+
"status_code": response.status_code,
|
|
63
|
+
"headers": dict(response.headers),
|
|
64
|
+
"error": f"Failed to parse {scenario.api_format} response: {str(e)}",
|
|
65
|
+
"raw_text": response.text[:500] if hasattr(response, "text") else "",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def _process_streaming_response(
|
|
69
|
+
self, response: httpx.Response, scenario: RequestScenario
|
|
70
|
+
) -> dict[str, Any]:
|
|
71
|
+
"""Process streaming response."""
|
|
72
|
+
|
|
73
|
+
chunks = []
|
|
74
|
+
total_content = ""
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
for line in response.iter_lines():
|
|
78
|
+
if line.startswith("data: "):
|
|
79
|
+
data_str = line[6:].strip()
|
|
80
|
+
if data_str and data_str != "[DONE]":
|
|
81
|
+
try:
|
|
82
|
+
chunk_data = json.loads(data_str)
|
|
83
|
+
chunks.append(chunk_data)
|
|
84
|
+
|
|
85
|
+
# Extract content based on format
|
|
86
|
+
if scenario.api_format == "openai":
|
|
87
|
+
delta_content = (
|
|
88
|
+
chunk_data.get("choices", [{}])[0]
|
|
89
|
+
.get("delta", {})
|
|
90
|
+
.get("content", "")
|
|
91
|
+
)
|
|
92
|
+
total_content += delta_content
|
|
93
|
+
else: # anthropic
|
|
94
|
+
if chunk_data.get("type") == "content_block_delta":
|
|
95
|
+
delta_text = chunk_data.get("delta", {}).get(
|
|
96
|
+
"text", ""
|
|
97
|
+
)
|
|
98
|
+
total_content += delta_text
|
|
99
|
+
except json.JSONDecodeError:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
"status_code": response.status_code,
|
|
104
|
+
"headers": dict(response.headers),
|
|
105
|
+
"chunks": chunks,
|
|
106
|
+
"chunk_count": len(chunks),
|
|
107
|
+
"total_content": total_content,
|
|
108
|
+
"content_preview": total_content[:100] if total_content else None,
|
|
109
|
+
"format": scenario.api_format,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
return {
|
|
114
|
+
"status_code": response.status_code,
|
|
115
|
+
"headers": dict(response.headers),
|
|
116
|
+
"error": f"Failed to process {scenario.api_format} stream: {str(e)}",
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class MetricsExtractor:
|
|
121
|
+
"""Extract metrics from API responses."""
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def extract_token_metrics(
|
|
125
|
+
response_data: dict[str, Any], api_format: str
|
|
126
|
+
) -> dict[str, int | None]:
|
|
127
|
+
"""Extract token usage from response data."""
|
|
128
|
+
if api_format == "openai":
|
|
129
|
+
usage = response_data.get("usage", {})
|
|
130
|
+
return {
|
|
131
|
+
"input_tokens": usage.get("prompt_tokens"),
|
|
132
|
+
"output_tokens": usage.get("completion_tokens"),
|
|
133
|
+
"cache_read_tokens": None, # OpenAI doesn't expose cache metrics
|
|
134
|
+
"cache_write_tokens": None,
|
|
135
|
+
}
|
|
136
|
+
else: # anthropic
|
|
137
|
+
usage = response_data.get("usage", {})
|
|
138
|
+
return {
|
|
139
|
+
"input_tokens": usage.get("input_tokens"),
|
|
140
|
+
"output_tokens": usage.get("output_tokens"),
|
|
141
|
+
"cache_read_tokens": usage.get("cache_read_input_tokens"),
|
|
142
|
+
"cache_write_tokens": usage.get("cache_creation_input_tokens"),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def extract_content(response_data: dict[str, Any], api_format: str) -> str:
|
|
147
|
+
"""Extract text content from response data."""
|
|
148
|
+
if api_format == "openai":
|
|
149
|
+
content = (
|
|
150
|
+
response_data.get("choices", [{}])[0]
|
|
151
|
+
.get("message", {})
|
|
152
|
+
.get("content", "")
|
|
153
|
+
)
|
|
154
|
+
return content if isinstance(content, str) else ""
|
|
155
|
+
else: # anthropic
|
|
156
|
+
content = ""
|
|
157
|
+
for block in response_data.get("content", []):
|
|
158
|
+
if block.get("type") == "text":
|
|
159
|
+
text = block.get("text", "")
|
|
160
|
+
content += text if isinstance(text, str) else ""
|
|
161
|
+
return content
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Scenario generation and traffic pattern utilities."""
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from datetime import UTC, datetime, timedelta
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
from ccproxy.testing.config import RequestScenario, ResponseType, TrafficConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ScenarioGenerator:
|
|
11
|
+
"""Generate request scenarios based on traffic configuration."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, config: TrafficConfig):
|
|
14
|
+
self.config = config
|
|
15
|
+
|
|
16
|
+
def generate_scenarios(self) -> list[RequestScenario]:
|
|
17
|
+
"""Generate request scenarios based on configuration."""
|
|
18
|
+
total_requests = int(
|
|
19
|
+
self.config.duration_seconds * self.config.requests_per_second
|
|
20
|
+
)
|
|
21
|
+
scenarios = []
|
|
22
|
+
|
|
23
|
+
# Calculate timeframe
|
|
24
|
+
start_time = self.config.start_timestamp or datetime.now(UTC)
|
|
25
|
+
time_span = self.config.duration_seconds
|
|
26
|
+
|
|
27
|
+
for i in range(total_requests):
|
|
28
|
+
# Determine timing based on pattern
|
|
29
|
+
time_offset = self._calculate_time_offset(i, total_requests, time_span)
|
|
30
|
+
request_time = start_time + time_offset
|
|
31
|
+
|
|
32
|
+
# Select random parameters
|
|
33
|
+
model = random.choice(self.config.models)
|
|
34
|
+
message_type = random.choice(self.config.message_types)
|
|
35
|
+
streaming = random.random() < self.config.streaming_probability
|
|
36
|
+
|
|
37
|
+
# Determine response type
|
|
38
|
+
response_type = self._determine_response_type()
|
|
39
|
+
|
|
40
|
+
# Determine API format based on distribution
|
|
41
|
+
api_format = self._determine_api_format()
|
|
42
|
+
|
|
43
|
+
# Set endpoint path based on format
|
|
44
|
+
endpoint_path = (
|
|
45
|
+
"/api/v1/chat/completions"
|
|
46
|
+
if api_format == "openai"
|
|
47
|
+
else "/api/v1/messages"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Generate headers with bypass and format-specific headers
|
|
51
|
+
headers = self._generate_headers(api_format, streaming)
|
|
52
|
+
|
|
53
|
+
scenarios.append(
|
|
54
|
+
RequestScenario(
|
|
55
|
+
model=model,
|
|
56
|
+
message_type=message_type,
|
|
57
|
+
streaming=streaming,
|
|
58
|
+
response_type=response_type,
|
|
59
|
+
timestamp=request_time,
|
|
60
|
+
api_format=api_format,
|
|
61
|
+
endpoint_path=endpoint_path,
|
|
62
|
+
bypass_upstream=self.config.bypass_mode,
|
|
63
|
+
use_real_auth=not self.config.bypass_mode,
|
|
64
|
+
headers=headers,
|
|
65
|
+
target_url=self.config.target_url,
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return scenarios
|
|
70
|
+
|
|
71
|
+
def _calculate_time_offset(
|
|
72
|
+
self, request_index: int, total_requests: int, time_span: int
|
|
73
|
+
) -> timedelta:
|
|
74
|
+
"""Calculate time offset for request based on traffic pattern."""
|
|
75
|
+
if self.config.pattern == "constant":
|
|
76
|
+
return timedelta(seconds=request_index / self.config.requests_per_second)
|
|
77
|
+
elif self.config.pattern == "burst":
|
|
78
|
+
# Front-load requests in bursts
|
|
79
|
+
burst_size = max(1, int(total_requests * 0.1))
|
|
80
|
+
if request_index < burst_size:
|
|
81
|
+
return timedelta(seconds=request_index * 0.1)
|
|
82
|
+
else:
|
|
83
|
+
remaining_time = time_span - (burst_size * 0.1)
|
|
84
|
+
remaining_requests = total_requests - burst_size
|
|
85
|
+
return timedelta(
|
|
86
|
+
seconds=(burst_size * 0.1)
|
|
87
|
+
+ ((request_index - burst_size) / remaining_requests)
|
|
88
|
+
* remaining_time
|
|
89
|
+
)
|
|
90
|
+
elif self.config.pattern == "ramping":
|
|
91
|
+
# Gradually increase request rate
|
|
92
|
+
normalized_time = request_index / total_requests
|
|
93
|
+
accelerated_time = normalized_time**2
|
|
94
|
+
return timedelta(seconds=accelerated_time * time_span)
|
|
95
|
+
else: # realistic
|
|
96
|
+
# Add some randomness to simulate real user behavior
|
|
97
|
+
base_time = request_index / self.config.requests_per_second
|
|
98
|
+
jitter = random.uniform(-0.5, 0.5)
|
|
99
|
+
return timedelta(seconds=max(0, base_time + jitter))
|
|
100
|
+
|
|
101
|
+
def _determine_response_type(self) -> ResponseType:
|
|
102
|
+
"""Determine response type based on configuration."""
|
|
103
|
+
if self.config.response_type == "mixed":
|
|
104
|
+
rand = random.random()
|
|
105
|
+
if rand < self.config.error_probability:
|
|
106
|
+
return "error"
|
|
107
|
+
elif rand < self.config.error_probability * 1.2:
|
|
108
|
+
return "unavailable"
|
|
109
|
+
else:
|
|
110
|
+
return "success"
|
|
111
|
+
else:
|
|
112
|
+
return self.config.response_type
|
|
113
|
+
|
|
114
|
+
def _determine_api_format(self) -> Literal["anthropic", "openai"]:
|
|
115
|
+
"""Determine API format based on distribution configuration."""
|
|
116
|
+
if len(self.config.api_formats) == 1:
|
|
117
|
+
format_name = self.config.api_formats[0]
|
|
118
|
+
if format_name == "anthropic":
|
|
119
|
+
return "anthropic"
|
|
120
|
+
elif format_name == "openai":
|
|
121
|
+
return "openai"
|
|
122
|
+
return "anthropic" # Default fallback
|
|
123
|
+
|
|
124
|
+
# Use weighted random selection based on format_distribution
|
|
125
|
+
rand = random.random()
|
|
126
|
+
cumulative = 0.0
|
|
127
|
+
|
|
128
|
+
for format_name in self.config.api_formats:
|
|
129
|
+
weight = self.config.format_distribution.get(format_name, 0.0)
|
|
130
|
+
cumulative += weight
|
|
131
|
+
if rand <= cumulative:
|
|
132
|
+
if format_name == "anthropic":
|
|
133
|
+
return "anthropic"
|
|
134
|
+
elif format_name == "openai":
|
|
135
|
+
return "openai"
|
|
136
|
+
|
|
137
|
+
# Fallback to first format if distribution doesn't add up
|
|
138
|
+
format_name = self.config.api_formats[0]
|
|
139
|
+
if format_name == "anthropic":
|
|
140
|
+
return "anthropic"
|
|
141
|
+
elif format_name == "openai":
|
|
142
|
+
return "openai"
|
|
143
|
+
return "anthropic" # Default fallback
|
|
144
|
+
|
|
145
|
+
def _generate_headers(self, api_format: str, streaming: bool) -> dict[str, str]:
|
|
146
|
+
"""Generate headers with bypass and format-specific headers."""
|
|
147
|
+
headers = {}
|
|
148
|
+
|
|
149
|
+
# Add bypass header if in bypass mode
|
|
150
|
+
if self.config.bypass_mode:
|
|
151
|
+
headers["X-CCProxy-Bypass-Upstream"] = "true"
|
|
152
|
+
|
|
153
|
+
# Add real API authentication if not in bypass mode
|
|
154
|
+
if not self.config.bypass_mode and self.config.real_api_keys:
|
|
155
|
+
if api_format == "openai" and "openai" in self.config.real_api_keys:
|
|
156
|
+
headers["Authorization"] = (
|
|
157
|
+
f"Bearer {self.config.real_api_keys['openai']}"
|
|
158
|
+
)
|
|
159
|
+
elif api_format == "anthropic" and "anthropic" in self.config.real_api_keys:
|
|
160
|
+
headers["Authorization"] = (
|
|
161
|
+
f"Bearer {self.config.real_api_keys['anthropic']}"
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Format-specific headers
|
|
165
|
+
if api_format == "openai":
|
|
166
|
+
headers["Content-Type"] = "application/json"
|
|
167
|
+
headers["Accept"] = "application/json"
|
|
168
|
+
else: # anthropic
|
|
169
|
+
headers["Content-Type"] = "application/json"
|
|
170
|
+
headers["Accept"] = "application/json"
|
|
171
|
+
headers["anthropic-version"] = "2023-06-01"
|
|
172
|
+
|
|
173
|
+
# Streaming-specific headers
|
|
174
|
+
if streaming:
|
|
175
|
+
headers["Accept"] = "text/event-stream"
|
|
176
|
+
headers["Cache-Control"] = "no-cache"
|
|
177
|
+
|
|
178
|
+
return headers
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class TrafficPatternAnalyzer:
|
|
182
|
+
"""Analyze and validate traffic patterns."""
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def analyze_distribution(scenarios: list[RequestScenario]) -> dict[str, Any]:
|
|
186
|
+
"""Analyze the distribution of scenarios."""
|
|
187
|
+
analysis = {
|
|
188
|
+
"total_scenarios": len(scenarios),
|
|
189
|
+
"api_format_distribution": {},
|
|
190
|
+
"model_distribution": {},
|
|
191
|
+
"message_type_distribution": {},
|
|
192
|
+
"streaming_percentage": 0.0,
|
|
193
|
+
"time_span_seconds": 0.0,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if not scenarios:
|
|
197
|
+
return analysis
|
|
198
|
+
|
|
199
|
+
# Count distributions
|
|
200
|
+
api_formats: dict[str, int] = {}
|
|
201
|
+
models: dict[str, int] = {}
|
|
202
|
+
message_types: dict[str, int] = {}
|
|
203
|
+
streaming_count = 0
|
|
204
|
+
|
|
205
|
+
for scenario in scenarios:
|
|
206
|
+
# API format distribution
|
|
207
|
+
api_formats[scenario.api_format] = (
|
|
208
|
+
api_formats.get(scenario.api_format, 0) + 1
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Model distribution
|
|
212
|
+
models[scenario.model] = models.get(scenario.model, 0) + 1
|
|
213
|
+
|
|
214
|
+
# Message type distribution
|
|
215
|
+
message_types[scenario.message_type] = (
|
|
216
|
+
message_types.get(scenario.message_type, 0) + 1
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Streaming count
|
|
220
|
+
if scenario.streaming:
|
|
221
|
+
streaming_count += 1
|
|
222
|
+
|
|
223
|
+
# Calculate percentages
|
|
224
|
+
total = len(scenarios)
|
|
225
|
+
analysis["api_format_distribution"] = {
|
|
226
|
+
k: v / total for k, v in api_formats.items()
|
|
227
|
+
}
|
|
228
|
+
analysis["model_distribution"] = {k: v / total for k, v in models.items()}
|
|
229
|
+
analysis["message_type_distribution"] = {
|
|
230
|
+
k: v / total for k, v in message_types.items()
|
|
231
|
+
}
|
|
232
|
+
analysis["streaming_percentage"] = streaming_count / total
|
|
233
|
+
|
|
234
|
+
# Calculate time span
|
|
235
|
+
timestamps = [scenario.timestamp for scenario in scenarios]
|
|
236
|
+
if timestamps:
|
|
237
|
+
analysis["time_span_seconds"] = (
|
|
238
|
+
max(timestamps) - min(timestamps)
|
|
239
|
+
).total_seconds()
|
|
240
|
+
|
|
241
|
+
return analysis
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Cost calculation utilities for token-based pricing.
|
|
2
|
+
|
|
3
|
+
This module provides shared cost calculation functionality that can be used
|
|
4
|
+
across different services to ensure consistent pricing calculations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = structlog.get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def calculate_token_cost(
|
|
14
|
+
tokens_input: int | None,
|
|
15
|
+
tokens_output: int | None,
|
|
16
|
+
model: str | None,
|
|
17
|
+
cache_read_tokens: int | None = None,
|
|
18
|
+
cache_write_tokens: int | None = None,
|
|
19
|
+
) -> float | None:
|
|
20
|
+
"""Calculate cost in USD for the given token usage including cache tokens.
|
|
21
|
+
|
|
22
|
+
This is a shared utility function that provides consistent cost calculation
|
|
23
|
+
across all services using the pricing data from the pricing system.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
tokens_input: Number of input tokens
|
|
27
|
+
tokens_output: Number of output tokens
|
|
28
|
+
model: Model name for pricing lookup
|
|
29
|
+
cache_read_tokens: Number of cache read tokens
|
|
30
|
+
cache_write_tokens: Number of cache write tokens
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Cost in USD or None if calculation not possible
|
|
34
|
+
"""
|
|
35
|
+
if not model or (
|
|
36
|
+
not tokens_input
|
|
37
|
+
and not tokens_output
|
|
38
|
+
and not cache_read_tokens
|
|
39
|
+
and not cache_write_tokens
|
|
40
|
+
):
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# Import pricing system components
|
|
45
|
+
from ccproxy.config.pricing import PricingSettings
|
|
46
|
+
from ccproxy.pricing.cache import PricingCache
|
|
47
|
+
from ccproxy.pricing.loader import PricingLoader
|
|
48
|
+
|
|
49
|
+
# Get canonical model name
|
|
50
|
+
canonical_model = PricingLoader.get_canonical_model_name(model)
|
|
51
|
+
|
|
52
|
+
# Create pricing components with dependency injection
|
|
53
|
+
settings = PricingSettings()
|
|
54
|
+
cache = PricingCache(settings)
|
|
55
|
+
cached_data = cache.load_cached_data()
|
|
56
|
+
|
|
57
|
+
# If cache is expired, try to use stale cache as fallback
|
|
58
|
+
if not cached_data:
|
|
59
|
+
try:
|
|
60
|
+
import json
|
|
61
|
+
|
|
62
|
+
if cache.cache_file.exists():
|
|
63
|
+
with cache.cache_file.open(encoding="utf-8") as f:
|
|
64
|
+
cached_data = json.load(f)
|
|
65
|
+
logger.debug(
|
|
66
|
+
"cost_calculation_using_stale_cache",
|
|
67
|
+
cache_age_hours=cache.get_cache_info().get("age_hours"),
|
|
68
|
+
)
|
|
69
|
+
except (OSError, json.JSONDecodeError):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
if not cached_data:
|
|
73
|
+
logger.debug("cost_calculation_skipped", reason="no_pricing_data")
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
# Load pricing data
|
|
77
|
+
pricing_data = PricingLoader.load_pricing_from_data(cached_data, verbose=False)
|
|
78
|
+
if not pricing_data or canonical_model not in pricing_data:
|
|
79
|
+
logger.debug(
|
|
80
|
+
"cost_calculation_skipped",
|
|
81
|
+
model=canonical_model,
|
|
82
|
+
reason="model_not_found",
|
|
83
|
+
)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
model_pricing = pricing_data[canonical_model]
|
|
87
|
+
|
|
88
|
+
# Calculate cost (pricing is per 1M tokens)
|
|
89
|
+
input_cost = ((tokens_input or 0) / 1_000_000) * float(model_pricing.input)
|
|
90
|
+
output_cost = ((tokens_output or 0) / 1_000_000) * float(model_pricing.output)
|
|
91
|
+
cache_read_cost = ((cache_read_tokens or 0) / 1_000_000) * float(
|
|
92
|
+
model_pricing.cache_read
|
|
93
|
+
)
|
|
94
|
+
cache_write_cost = ((cache_write_tokens or 0) / 1_000_000) * float(
|
|
95
|
+
model_pricing.cache_write
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
total_cost = input_cost + output_cost + cache_read_cost + cache_write_cost
|
|
99
|
+
|
|
100
|
+
logger.debug(
|
|
101
|
+
"cost_calculated",
|
|
102
|
+
model=canonical_model,
|
|
103
|
+
tokens_input=tokens_input,
|
|
104
|
+
tokens_output=tokens_output,
|
|
105
|
+
cache_read_tokens=cache_read_tokens,
|
|
106
|
+
cache_write_tokens=cache_write_tokens,
|
|
107
|
+
input_cost=input_cost,
|
|
108
|
+
output_cost=output_cost,
|
|
109
|
+
cache_read_cost=cache_read_cost,
|
|
110
|
+
cache_write_cost=cache_write_cost,
|
|
111
|
+
cost_usd=total_cost,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return total_cost
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.debug("cost_calculation_error", error=str(e), model=model)
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def calculate_cost_breakdown(
|
|
122
|
+
tokens_input: int | None,
|
|
123
|
+
tokens_output: int | None,
|
|
124
|
+
model: str | None,
|
|
125
|
+
cache_read_tokens: int | None = None,
|
|
126
|
+
cache_write_tokens: int | None = None,
|
|
127
|
+
) -> dict[str, float | str] | None:
|
|
128
|
+
"""Calculate detailed cost breakdown for the given token usage.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
tokens_input: Number of input tokens
|
|
132
|
+
tokens_output: Number of output tokens
|
|
133
|
+
model: Model name for pricing lookup
|
|
134
|
+
cache_read_tokens: Number of cache read tokens
|
|
135
|
+
cache_write_tokens: Number of cache write tokens
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Dictionary with cost breakdown or None if calculation not possible
|
|
139
|
+
"""
|
|
140
|
+
if not model or (
|
|
141
|
+
not tokens_input
|
|
142
|
+
and not tokens_output
|
|
143
|
+
and not cache_read_tokens
|
|
144
|
+
and not cache_write_tokens
|
|
145
|
+
):
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Import pricing system components
|
|
150
|
+
from ccproxy.config.pricing import PricingSettings
|
|
151
|
+
from ccproxy.pricing.cache import PricingCache
|
|
152
|
+
from ccproxy.pricing.loader import PricingLoader
|
|
153
|
+
|
|
154
|
+
# Get canonical model name
|
|
155
|
+
canonical_model = PricingLoader.get_canonical_model_name(model)
|
|
156
|
+
|
|
157
|
+
# Create pricing components with dependency injection
|
|
158
|
+
settings = PricingSettings()
|
|
159
|
+
cache = PricingCache(settings)
|
|
160
|
+
cached_data = cache.load_cached_data()
|
|
161
|
+
|
|
162
|
+
# If cache is expired, try to use stale cache as fallback
|
|
163
|
+
if not cached_data:
|
|
164
|
+
try:
|
|
165
|
+
import json
|
|
166
|
+
|
|
167
|
+
if cache.cache_file.exists():
|
|
168
|
+
with cache.cache_file.open(encoding="utf-8") as f:
|
|
169
|
+
cached_data = json.load(f)
|
|
170
|
+
logger.debug(
|
|
171
|
+
"cost_breakdown_using_stale_cache",
|
|
172
|
+
cache_age_hours=cache.get_cache_info().get("age_hours"),
|
|
173
|
+
)
|
|
174
|
+
except (OSError, json.JSONDecodeError):
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
if not cached_data:
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
# Load pricing data
|
|
181
|
+
pricing_data = PricingLoader.load_pricing_from_data(cached_data, verbose=False)
|
|
182
|
+
if not pricing_data or canonical_model not in pricing_data:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
model_pricing = pricing_data[canonical_model]
|
|
186
|
+
|
|
187
|
+
# Calculate individual costs (pricing is per 1M tokens)
|
|
188
|
+
input_cost = ((tokens_input or 0) / 1_000_000) * float(model_pricing.input)
|
|
189
|
+
output_cost = ((tokens_output or 0) / 1_000_000) * float(model_pricing.output)
|
|
190
|
+
cache_read_cost = ((cache_read_tokens or 0) / 1_000_000) * float(
|
|
191
|
+
model_pricing.cache_read
|
|
192
|
+
)
|
|
193
|
+
cache_write_cost = ((cache_write_tokens or 0) / 1_000_000) * float(
|
|
194
|
+
model_pricing.cache_write
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
total_cost = input_cost + output_cost + cache_read_cost + cache_write_cost
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
"input_cost": input_cost,
|
|
201
|
+
"output_cost": output_cost,
|
|
202
|
+
"cache_read_cost": cache_read_cost,
|
|
203
|
+
"cache_write_cost": cache_write_cost,
|
|
204
|
+
"total_cost": total_cost,
|
|
205
|
+
"model": canonical_model,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.debug("cost_breakdown_error", error=str(e), model=model)
|
|
210
|
+
return None
|