ccproxy-api 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/__init__.py +4 -0
- ccproxy/__main__.py +7 -0
- ccproxy/_version.py +21 -0
- ccproxy/adapters/__init__.py +11 -0
- ccproxy/adapters/base.py +80 -0
- ccproxy/adapters/openai/__init__.py +43 -0
- ccproxy/adapters/openai/adapter.py +915 -0
- ccproxy/adapters/openai/models.py +412 -0
- ccproxy/adapters/openai/streaming.py +449 -0
- ccproxy/api/__init__.py +28 -0
- ccproxy/api/app.py +225 -0
- ccproxy/api/dependencies.py +140 -0
- ccproxy/api/middleware/__init__.py +11 -0
- ccproxy/api/middleware/auth.py +0 -0
- ccproxy/api/middleware/cors.py +55 -0
- ccproxy/api/middleware/errors.py +703 -0
- ccproxy/api/middleware/headers.py +51 -0
- ccproxy/api/middleware/logging.py +175 -0
- ccproxy/api/middleware/request_id.py +69 -0
- ccproxy/api/middleware/server_header.py +62 -0
- ccproxy/api/responses.py +84 -0
- ccproxy/api/routes/__init__.py +16 -0
- ccproxy/api/routes/claude.py +181 -0
- ccproxy/api/routes/health.py +489 -0
- ccproxy/api/routes/metrics.py +1033 -0
- ccproxy/api/routes/proxy.py +238 -0
- ccproxy/auth/__init__.py +75 -0
- ccproxy/auth/bearer.py +68 -0
- ccproxy/auth/credentials_adapter.py +93 -0
- ccproxy/auth/dependencies.py +229 -0
- ccproxy/auth/exceptions.py +79 -0
- ccproxy/auth/manager.py +102 -0
- ccproxy/auth/models.py +118 -0
- ccproxy/auth/oauth/__init__.py +26 -0
- ccproxy/auth/oauth/models.py +49 -0
- ccproxy/auth/oauth/routes.py +396 -0
- ccproxy/auth/oauth/storage.py +0 -0
- ccproxy/auth/storage/__init__.py +12 -0
- ccproxy/auth/storage/base.py +57 -0
- ccproxy/auth/storage/json_file.py +159 -0
- ccproxy/auth/storage/keyring.py +192 -0
- ccproxy/claude_sdk/__init__.py +20 -0
- ccproxy/claude_sdk/client.py +169 -0
- ccproxy/claude_sdk/converter.py +331 -0
- ccproxy/claude_sdk/options.py +120 -0
- ccproxy/cli/__init__.py +14 -0
- ccproxy/cli/commands/__init__.py +8 -0
- ccproxy/cli/commands/auth.py +553 -0
- ccproxy/cli/commands/config/__init__.py +14 -0
- ccproxy/cli/commands/config/commands.py +766 -0
- ccproxy/cli/commands/config/schema_commands.py +119 -0
- ccproxy/cli/commands/serve.py +630 -0
- ccproxy/cli/docker/__init__.py +34 -0
- ccproxy/cli/docker/adapter_factory.py +157 -0
- ccproxy/cli/docker/params.py +278 -0
- ccproxy/cli/helpers.py +144 -0
- ccproxy/cli/main.py +193 -0
- ccproxy/cli/options/__init__.py +14 -0
- ccproxy/cli/options/claude_options.py +216 -0
- ccproxy/cli/options/core_options.py +40 -0
- ccproxy/cli/options/security_options.py +48 -0
- ccproxy/cli/options/server_options.py +117 -0
- ccproxy/config/__init__.py +40 -0
- ccproxy/config/auth.py +154 -0
- ccproxy/config/claude.py +124 -0
- ccproxy/config/cors.py +79 -0
- ccproxy/config/discovery.py +87 -0
- ccproxy/config/docker_settings.py +265 -0
- ccproxy/config/loader.py +108 -0
- ccproxy/config/observability.py +158 -0
- ccproxy/config/pricing.py +88 -0
- ccproxy/config/reverse_proxy.py +31 -0
- ccproxy/config/scheduler.py +89 -0
- ccproxy/config/security.py +14 -0
- ccproxy/config/server.py +81 -0
- ccproxy/config/settings.py +534 -0
- ccproxy/config/validators.py +231 -0
- ccproxy/core/__init__.py +274 -0
- ccproxy/core/async_utils.py +675 -0
- ccproxy/core/constants.py +97 -0
- ccproxy/core/errors.py +256 -0
- ccproxy/core/http.py +328 -0
- ccproxy/core/http_transformers.py +428 -0
- ccproxy/core/interfaces.py +247 -0
- ccproxy/core/logging.py +189 -0
- ccproxy/core/middleware.py +114 -0
- ccproxy/core/proxy.py +143 -0
- ccproxy/core/system.py +38 -0
- ccproxy/core/transformers.py +259 -0
- ccproxy/core/types.py +129 -0
- ccproxy/core/validators.py +288 -0
- ccproxy/docker/__init__.py +67 -0
- ccproxy/docker/adapter.py +588 -0
- ccproxy/docker/docker_path.py +207 -0
- ccproxy/docker/middleware.py +103 -0
- ccproxy/docker/models.py +228 -0
- ccproxy/docker/protocol.py +192 -0
- ccproxy/docker/stream_process.py +264 -0
- ccproxy/docker/validators.py +173 -0
- ccproxy/models/__init__.py +123 -0
- ccproxy/models/errors.py +42 -0
- ccproxy/models/messages.py +243 -0
- ccproxy/models/requests.py +85 -0
- ccproxy/models/responses.py +227 -0
- ccproxy/models/types.py +102 -0
- ccproxy/observability/__init__.py +51 -0
- ccproxy/observability/access_logger.py +400 -0
- ccproxy/observability/context.py +447 -0
- ccproxy/observability/metrics.py +539 -0
- ccproxy/observability/pushgateway.py +366 -0
- ccproxy/observability/sse_events.py +303 -0
- ccproxy/observability/stats_printer.py +755 -0
- ccproxy/observability/storage/__init__.py +1 -0
- ccproxy/observability/storage/duckdb_simple.py +665 -0
- ccproxy/observability/storage/models.py +55 -0
- ccproxy/pricing/__init__.py +19 -0
- ccproxy/pricing/cache.py +212 -0
- ccproxy/pricing/loader.py +267 -0
- ccproxy/pricing/models.py +106 -0
- ccproxy/pricing/updater.py +309 -0
- ccproxy/scheduler/__init__.py +39 -0
- ccproxy/scheduler/core.py +335 -0
- ccproxy/scheduler/exceptions.py +34 -0
- ccproxy/scheduler/manager.py +186 -0
- ccproxy/scheduler/registry.py +150 -0
- ccproxy/scheduler/tasks.py +484 -0
- ccproxy/services/__init__.py +10 -0
- ccproxy/services/claude_sdk_service.py +614 -0
- ccproxy/services/credentials/__init__.py +55 -0
- ccproxy/services/credentials/config.py +105 -0
- ccproxy/services/credentials/manager.py +562 -0
- ccproxy/services/credentials/oauth_client.py +482 -0
- ccproxy/services/proxy_service.py +1536 -0
- ccproxy/static/.keep +0 -0
- ccproxy/testing/__init__.py +34 -0
- ccproxy/testing/config.py +148 -0
- ccproxy/testing/content_generation.py +197 -0
- ccproxy/testing/mock_responses.py +262 -0
- ccproxy/testing/response_handlers.py +161 -0
- ccproxy/testing/scenarios.py +241 -0
- ccproxy/utils/__init__.py +6 -0
- ccproxy/utils/cost_calculator.py +210 -0
- ccproxy/utils/streaming_metrics.py +199 -0
- ccproxy_api-0.1.0.dist-info/METADATA +253 -0
- ccproxy_api-0.1.0.dist-info/RECORD +148 -0
- ccproxy_api-0.1.0.dist-info/WHEEL +4 -0
- ccproxy_api-0.1.0.dist-info/entry_points.txt +2 -0
- ccproxy_api-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
"""Claude SDK service orchestration for business logic."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import AsyncIterator
|
|
5
|
+
from dataclasses import asdict, is_dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from claude_code_sdk import (
|
|
10
|
+
AssistantMessage,
|
|
11
|
+
ClaudeCodeOptions,
|
|
12
|
+
ResultMessage,
|
|
13
|
+
SystemMessage,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from ccproxy.adapters.openai import adapter
|
|
17
|
+
from ccproxy.auth.manager import AuthManager
|
|
18
|
+
from ccproxy.claude_sdk.client import ClaudeSDKClient
|
|
19
|
+
from ccproxy.claude_sdk.converter import MessageConverter
|
|
20
|
+
from ccproxy.claude_sdk.options import OptionsHandler
|
|
21
|
+
from ccproxy.core.errors import (
|
|
22
|
+
ClaudeProxyError,
|
|
23
|
+
ServiceUnavailableError,
|
|
24
|
+
)
|
|
25
|
+
from ccproxy.observability.access_logger import log_request_access
|
|
26
|
+
from ccproxy.observability.context import RequestContext, request_context
|
|
27
|
+
from ccproxy.observability.metrics import PrometheusMetrics
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
logger = structlog.get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ClaudeSDKService:
|
|
34
|
+
"""
|
|
35
|
+
Service layer for Claude SDK operations orchestration.
|
|
36
|
+
|
|
37
|
+
This class handles business logic coordination between the pure SDK client,
|
|
38
|
+
authentication, metrics, and format conversion while maintaining clean
|
|
39
|
+
separation of concerns.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
sdk_client: ClaudeSDKClient | None = None,
|
|
45
|
+
auth_manager: AuthManager | None = None,
|
|
46
|
+
metrics: PrometheusMetrics | None = None,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Initialize Claude SDK service.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
sdk_client: Claude SDK client instance
|
|
53
|
+
auth_manager: Authentication manager (optional)
|
|
54
|
+
metrics: Prometheus metrics instance (optional)
|
|
55
|
+
"""
|
|
56
|
+
self.sdk_client = sdk_client or ClaudeSDKClient()
|
|
57
|
+
self.auth_manager = auth_manager
|
|
58
|
+
self.metrics = metrics
|
|
59
|
+
self.message_converter = MessageConverter()
|
|
60
|
+
self.options_handler = OptionsHandler()
|
|
61
|
+
|
|
62
|
+
async def create_completion(
|
|
63
|
+
self,
|
|
64
|
+
messages: list[dict[str, Any]],
|
|
65
|
+
model: str,
|
|
66
|
+
temperature: float | None = None,
|
|
67
|
+
max_tokens: int | None = None,
|
|
68
|
+
stream: bool = False,
|
|
69
|
+
user_id: str | None = None,
|
|
70
|
+
**kwargs: Any,
|
|
71
|
+
) -> dict[str, Any] | AsyncIterator[dict[str, Any]]:
|
|
72
|
+
"""
|
|
73
|
+
Create a completion using Claude SDK with business logic orchestration.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
messages: List of messages in Anthropic format
|
|
77
|
+
model: The model to use
|
|
78
|
+
temperature: Temperature for response generation
|
|
79
|
+
max_tokens: Maximum tokens in response
|
|
80
|
+
stream: Whether to stream responses
|
|
81
|
+
user_id: User identifier for auth/metrics
|
|
82
|
+
**kwargs: Additional arguments
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Response dict or async iterator of response chunks if streaming
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
ClaudeProxyError: If request fails
|
|
89
|
+
ServiceUnavailableError: If service is unavailable
|
|
90
|
+
"""
|
|
91
|
+
# Validate authentication if auth manager is configured
|
|
92
|
+
if self.auth_manager and user_id:
|
|
93
|
+
try:
|
|
94
|
+
await self._validate_user_auth(user_id)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.error(
|
|
97
|
+
"authentication_failed",
|
|
98
|
+
user_id=user_id,
|
|
99
|
+
error=str(e),
|
|
100
|
+
error_type=type(e).__name__,
|
|
101
|
+
exc_info=True,
|
|
102
|
+
)
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
# Extract system message and create options
|
|
106
|
+
system_message = self.options_handler.extract_system_message(messages)
|
|
107
|
+
|
|
108
|
+
# Map model to Claude model
|
|
109
|
+
model = adapter.map_openai_model_to_claude(model)
|
|
110
|
+
|
|
111
|
+
options = self.options_handler.create_options(
|
|
112
|
+
model=model,
|
|
113
|
+
temperature=temperature,
|
|
114
|
+
max_tokens=max_tokens,
|
|
115
|
+
system_message=system_message,
|
|
116
|
+
**kwargs,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Convert messages to prompt format
|
|
120
|
+
prompt = self.message_converter.format_messages_to_prompt(messages)
|
|
121
|
+
|
|
122
|
+
# Generate request ID for correlation
|
|
123
|
+
from uuid import uuid4
|
|
124
|
+
|
|
125
|
+
request_id = str(uuid4())
|
|
126
|
+
|
|
127
|
+
# Use request context for observability
|
|
128
|
+
endpoint = "messages" # Claude SDK uses messages endpoint
|
|
129
|
+
async with request_context(
|
|
130
|
+
method="POST",
|
|
131
|
+
path=f"/sdk/v1/{endpoint}",
|
|
132
|
+
endpoint=endpoint,
|
|
133
|
+
model=model,
|
|
134
|
+
streaming=stream,
|
|
135
|
+
service_type="claude_sdk_service",
|
|
136
|
+
metrics=self.metrics, # Pass metrics for active request tracking
|
|
137
|
+
) as ctx:
|
|
138
|
+
try:
|
|
139
|
+
if stream:
|
|
140
|
+
# For streaming, return the async iterator directly
|
|
141
|
+
# Pass context to streaming method
|
|
142
|
+
return self._stream_completion(
|
|
143
|
+
prompt, options, model, request_id, ctx
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
result = await self._complete_non_streaming(
|
|
147
|
+
prompt, options, model, request_id, ctx
|
|
148
|
+
)
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
# Log error via access logger (includes metrics)
|
|
153
|
+
await log_request_access(
|
|
154
|
+
context=ctx,
|
|
155
|
+
method="POST",
|
|
156
|
+
error_message=str(e),
|
|
157
|
+
metrics=self.metrics,
|
|
158
|
+
error_type=type(e).__name__,
|
|
159
|
+
)
|
|
160
|
+
raise
|
|
161
|
+
|
|
162
|
+
async def _complete_non_streaming(
|
|
163
|
+
self,
|
|
164
|
+
prompt: str,
|
|
165
|
+
options: ClaudeCodeOptions,
|
|
166
|
+
model: str,
|
|
167
|
+
request_id: str | None = None,
|
|
168
|
+
ctx: RequestContext | None = None,
|
|
169
|
+
) -> dict[str, Any]:
|
|
170
|
+
"""
|
|
171
|
+
Complete a non-streaming request with business logic.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
prompt: The formatted prompt
|
|
175
|
+
options: Claude SDK options
|
|
176
|
+
model: The model being used
|
|
177
|
+
request_id: The request ID for metrics correlation
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Response in Anthropic format
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
ClaudeProxyError: If completion fails
|
|
184
|
+
"""
|
|
185
|
+
messages = []
|
|
186
|
+
result_message = None
|
|
187
|
+
assistant_message = None
|
|
188
|
+
|
|
189
|
+
async for message in self.sdk_client.query_completion(
|
|
190
|
+
prompt, options, request_id
|
|
191
|
+
):
|
|
192
|
+
messages.append(message)
|
|
193
|
+
if isinstance(message, AssistantMessage):
|
|
194
|
+
assistant_message = message
|
|
195
|
+
elif isinstance(message, ResultMessage):
|
|
196
|
+
result_message = message
|
|
197
|
+
|
|
198
|
+
# Get Claude API call timing
|
|
199
|
+
claude_api_call_ms = self.sdk_client.get_last_api_call_time_ms()
|
|
200
|
+
|
|
201
|
+
if result_message is None:
|
|
202
|
+
raise ClaudeProxyError(
|
|
203
|
+
message="No result message received from Claude SDK",
|
|
204
|
+
error_type="internal_server_error",
|
|
205
|
+
status_code=500,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if assistant_message is None:
|
|
209
|
+
raise ClaudeProxyError(
|
|
210
|
+
message="No assistant response received from Claude SDK",
|
|
211
|
+
error_type="internal_server_error",
|
|
212
|
+
status_code=500,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
logger.debug("claude_sdk_completion_received")
|
|
216
|
+
# Convert to Anthropic format
|
|
217
|
+
response = self.message_converter.convert_to_anthropic_response(
|
|
218
|
+
assistant_message, result_message, model
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Extract token usage and cost from result message using direct access
|
|
222
|
+
cost_usd = result_message.total_cost_usd
|
|
223
|
+
if result_message.usage:
|
|
224
|
+
tokens_input = result_message.usage.get("input_tokens")
|
|
225
|
+
tokens_output = result_message.usage.get("output_tokens")
|
|
226
|
+
cache_read_tokens = result_message.usage.get("cache_read_input_tokens")
|
|
227
|
+
cache_write_tokens = result_message.usage.get("cache_creation_input_tokens")
|
|
228
|
+
else:
|
|
229
|
+
tokens_input = tokens_output = cache_read_tokens = cache_write_tokens = None
|
|
230
|
+
|
|
231
|
+
# Add cost to response usage section if available
|
|
232
|
+
if cost_usd is not None and "usage" in response:
|
|
233
|
+
response["usage"]["cost_usd"] = cost_usd
|
|
234
|
+
|
|
235
|
+
# Log metrics for observability
|
|
236
|
+
logger.debug(
|
|
237
|
+
"claude_sdk_completion_completed",
|
|
238
|
+
model=model,
|
|
239
|
+
tokens_input=tokens_input,
|
|
240
|
+
tokens_output=tokens_output,
|
|
241
|
+
cache_read_tokens=cache_read_tokens,
|
|
242
|
+
cache_write_tokens=cache_write_tokens,
|
|
243
|
+
cost_usd=cost_usd,
|
|
244
|
+
request_id=request_id,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Update context with metrics if available
|
|
248
|
+
if ctx:
|
|
249
|
+
ctx.add_metadata(
|
|
250
|
+
status_code=200,
|
|
251
|
+
tokens_input=tokens_input,
|
|
252
|
+
tokens_output=tokens_output,
|
|
253
|
+
cache_read_tokens=cache_read_tokens,
|
|
254
|
+
cache_write_tokens=cache_write_tokens,
|
|
255
|
+
cost_usd=cost_usd,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Log comprehensive access log (includes Prometheus metrics)
|
|
259
|
+
await log_request_access(
|
|
260
|
+
context=ctx,
|
|
261
|
+
status_code=200,
|
|
262
|
+
method="POST",
|
|
263
|
+
metrics=self.metrics,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return response
|
|
267
|
+
|
|
268
|
+
async def _stream_completion(
|
|
269
|
+
self,
|
|
270
|
+
prompt: str,
|
|
271
|
+
options: ClaudeCodeOptions,
|
|
272
|
+
model: str,
|
|
273
|
+
request_id: str | None = None,
|
|
274
|
+
ctx: RequestContext | None = None,
|
|
275
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
276
|
+
"""
|
|
277
|
+
Stream completion responses with business logic.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
prompt: The formatted prompt
|
|
281
|
+
options: Claude SDK options
|
|
282
|
+
model: The model being used
|
|
283
|
+
|
|
284
|
+
Yields:
|
|
285
|
+
Response chunks in Anthropic format
|
|
286
|
+
"""
|
|
287
|
+
import asyncio
|
|
288
|
+
|
|
289
|
+
first_chunk = True
|
|
290
|
+
message_count = 0
|
|
291
|
+
assistant_messages = []
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
async for message in self.sdk_client.query_completion(
|
|
295
|
+
prompt, options, request_id
|
|
296
|
+
):
|
|
297
|
+
message_count += 1
|
|
298
|
+
logger.debug(
|
|
299
|
+
"streaming_message_received",
|
|
300
|
+
message_count=message_count,
|
|
301
|
+
message_type=type(message).__name__,
|
|
302
|
+
request_id=request_id,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if first_chunk:
|
|
306
|
+
# Send initial chunk
|
|
307
|
+
yield self.message_converter.create_streaming_start_chunk(
|
|
308
|
+
f"msg_{id(message)}", model
|
|
309
|
+
)
|
|
310
|
+
first_chunk = False
|
|
311
|
+
|
|
312
|
+
# TODO: instead of creating one message we should create a list of messages
|
|
313
|
+
# and this will be serialized back in one messsage by the adapter.
|
|
314
|
+
# to do that we have to create the different type of messsages
|
|
315
|
+
# in anthropic models
|
|
316
|
+
if isinstance(message, SystemMessage):
|
|
317
|
+
# Serialize dataclass to JSON
|
|
318
|
+
text_content = f"<system>{json.dumps(asdict(message))}</system>"
|
|
319
|
+
yield self.message_converter.create_streaming_delta_chunk(
|
|
320
|
+
text_content
|
|
321
|
+
)
|
|
322
|
+
elif isinstance(message, AssistantMessage):
|
|
323
|
+
assistant_messages.append(message)
|
|
324
|
+
|
|
325
|
+
# Send content delta
|
|
326
|
+
text_content = self.message_converter.extract_contents(
|
|
327
|
+
message.content
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if text_content:
|
|
331
|
+
text_content = f"<assistant>{text_content}</assistant>"
|
|
332
|
+
yield self.message_converter.create_streaming_delta_chunk(
|
|
333
|
+
text_content
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
elif isinstance(message, ResultMessage):
|
|
337
|
+
# Get Claude API call timing
|
|
338
|
+
claude_api_call_ms = self.sdk_client.get_last_api_call_time_ms()
|
|
339
|
+
|
|
340
|
+
# Extract cost and tokens from result message using direct access
|
|
341
|
+
cost_usd = message.total_cost_usd
|
|
342
|
+
if message.usage:
|
|
343
|
+
tokens_input = message.usage.get("input_tokens")
|
|
344
|
+
tokens_output = message.usage.get("output_tokens")
|
|
345
|
+
cache_read_tokens = message.usage.get("cache_read_input_tokens")
|
|
346
|
+
cache_write_tokens = message.usage.get(
|
|
347
|
+
"cache_creation_input_tokens"
|
|
348
|
+
)
|
|
349
|
+
else:
|
|
350
|
+
tokens_input = tokens_output = cache_read_tokens = (
|
|
351
|
+
cache_write_tokens
|
|
352
|
+
) = None
|
|
353
|
+
|
|
354
|
+
# Log streaming completion metrics
|
|
355
|
+
logger.debug(
|
|
356
|
+
"streaming_completion_completed",
|
|
357
|
+
model=model,
|
|
358
|
+
tokens_input=tokens_input,
|
|
359
|
+
tokens_output=tokens_output,
|
|
360
|
+
cache_read_tokens=cache_read_tokens,
|
|
361
|
+
cache_write_tokens=cache_write_tokens,
|
|
362
|
+
cost_usd=cost_usd,
|
|
363
|
+
message_count=message_count,
|
|
364
|
+
request_id=request_id,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Update context with metrics if available
|
|
368
|
+
if ctx:
|
|
369
|
+
ctx.add_metadata(
|
|
370
|
+
status_code=200,
|
|
371
|
+
tokens_input=tokens_input,
|
|
372
|
+
tokens_output=tokens_output,
|
|
373
|
+
cache_read_tokens=cache_read_tokens,
|
|
374
|
+
cache_write_tokens=cache_write_tokens,
|
|
375
|
+
cost_usd=cost_usd,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Log comprehensive access log for streaming completion
|
|
379
|
+
await log_request_access(
|
|
380
|
+
context=ctx,
|
|
381
|
+
status_code=200,
|
|
382
|
+
method="POST",
|
|
383
|
+
metrics=self.metrics,
|
|
384
|
+
event_type="streaming_complete",
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Send final chunk with usage and cost information
|
|
388
|
+
final_chunk = self.message_converter.create_streaming_end_chunk()
|
|
389
|
+
|
|
390
|
+
# Add usage information to final chunk
|
|
391
|
+
if tokens_input or tokens_output or cost_usd:
|
|
392
|
+
usage_info = {}
|
|
393
|
+
if tokens_input:
|
|
394
|
+
usage_info["input_tokens"] = tokens_input
|
|
395
|
+
if tokens_output:
|
|
396
|
+
usage_info["output_tokens"] = tokens_output
|
|
397
|
+
if cost_usd is not None:
|
|
398
|
+
usage_info["cost_usd"] = cost_usd
|
|
399
|
+
|
|
400
|
+
# Update the usage in the final chunk
|
|
401
|
+
final_chunk["usage"].update(usage_info)
|
|
402
|
+
|
|
403
|
+
yield final_chunk
|
|
404
|
+
|
|
405
|
+
break
|
|
406
|
+
|
|
407
|
+
except asyncio.CancelledError:
|
|
408
|
+
logger.debug("streaming_completion_cancelled", request_id=request_id)
|
|
409
|
+
raise
|
|
410
|
+
except Exception as e:
|
|
411
|
+
logger.error(
|
|
412
|
+
"streaming_completion_failed",
|
|
413
|
+
error=str(e),
|
|
414
|
+
error_type=type(e).__name__,
|
|
415
|
+
request_id=request_id,
|
|
416
|
+
exc_info=True,
|
|
417
|
+
)
|
|
418
|
+
# Don't yield error chunk - let exception propagate for proper HTTP error response
|
|
419
|
+
raise
|
|
420
|
+
|
|
421
|
+
async def _validate_user_auth(self, user_id: str) -> None:
|
|
422
|
+
"""
|
|
423
|
+
Validate user authentication.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
user_id: User identifier
|
|
427
|
+
|
|
428
|
+
Raises:
|
|
429
|
+
AuthenticationError: If authentication fails
|
|
430
|
+
"""
|
|
431
|
+
if not self.auth_manager:
|
|
432
|
+
return
|
|
433
|
+
|
|
434
|
+
# Implement authentication validation logic
|
|
435
|
+
# This is a placeholder for future auth integration
|
|
436
|
+
logger.debug("user_auth_validation_start", user_id=user_id)
|
|
437
|
+
|
|
438
|
+
def _calculate_cost(
|
|
439
|
+
self,
|
|
440
|
+
tokens_input: int | None,
|
|
441
|
+
tokens_output: int | None,
|
|
442
|
+
model: str | None,
|
|
443
|
+
cache_read_tokens: int | None = None,
|
|
444
|
+
cache_write_tokens: int | None = None,
|
|
445
|
+
) -> float | None:
|
|
446
|
+
"""
|
|
447
|
+
Calculate cost in USD for the given token usage including cache tokens.
|
|
448
|
+
|
|
449
|
+
Note: This method is provided for consistency, but the Claude SDK already
|
|
450
|
+
provides accurate cost calculation in ResultMessage.total_cost_usd which
|
|
451
|
+
should be preferred when available.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
tokens_input: Number of input tokens
|
|
455
|
+
tokens_output: Number of output tokens
|
|
456
|
+
model: Model name for pricing lookup
|
|
457
|
+
cache_read_tokens: Number of cache read tokens
|
|
458
|
+
cache_write_tokens: Number of cache write tokens
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Cost in USD or None if calculation not possible
|
|
462
|
+
"""
|
|
463
|
+
from ccproxy.utils.cost_calculator import calculate_token_cost
|
|
464
|
+
|
|
465
|
+
return calculate_token_cost(
|
|
466
|
+
tokens_input, tokens_output, model, cache_read_tokens, cache_write_tokens
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
async def list_models(self) -> dict[str, Any]:
|
|
470
|
+
"""
|
|
471
|
+
List available Claude models and recent OpenAI models.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Dictionary with combined list of models in mixed format
|
|
475
|
+
"""
|
|
476
|
+
# Get Claude models
|
|
477
|
+
supported_models = self.options_handler.get_supported_models()
|
|
478
|
+
|
|
479
|
+
# Create Anthropic-style model entries
|
|
480
|
+
anthropic_models = []
|
|
481
|
+
for model_id in supported_models:
|
|
482
|
+
anthropic_models.append(
|
|
483
|
+
{
|
|
484
|
+
"type": "model",
|
|
485
|
+
"id": model_id,
|
|
486
|
+
"display_name": self._get_display_name(model_id),
|
|
487
|
+
"created_at": self._get_created_timestamp(model_id),
|
|
488
|
+
}
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Add recent OpenAI models (GPT-4 variants and O1 models)
|
|
492
|
+
openai_models = [
|
|
493
|
+
{
|
|
494
|
+
"id": "gpt-4o",
|
|
495
|
+
"object": "model",
|
|
496
|
+
"created": 1715367049,
|
|
497
|
+
"owned_by": "openai",
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
"id": "gpt-4o-mini",
|
|
501
|
+
"object": "model",
|
|
502
|
+
"created": 1721172741,
|
|
503
|
+
"owned_by": "openai",
|
|
504
|
+
},
|
|
505
|
+
{
|
|
506
|
+
"id": "gpt-4-turbo",
|
|
507
|
+
"object": "model",
|
|
508
|
+
"created": 1712361441,
|
|
509
|
+
"owned_by": "openai",
|
|
510
|
+
},
|
|
511
|
+
{
|
|
512
|
+
"id": "gpt-4-turbo-preview",
|
|
513
|
+
"object": "model",
|
|
514
|
+
"created": 1706037777,
|
|
515
|
+
"owned_by": "openai",
|
|
516
|
+
},
|
|
517
|
+
{
|
|
518
|
+
"id": "o1",
|
|
519
|
+
"object": "model",
|
|
520
|
+
"created": 1734375816,
|
|
521
|
+
"owned_by": "openai",
|
|
522
|
+
},
|
|
523
|
+
{
|
|
524
|
+
"id": "o1-mini",
|
|
525
|
+
"object": "model",
|
|
526
|
+
"created": 1725649008,
|
|
527
|
+
"owned_by": "openai",
|
|
528
|
+
},
|
|
529
|
+
{
|
|
530
|
+
"id": "o1-preview",
|
|
531
|
+
"object": "model",
|
|
532
|
+
"created": 1725648897,
|
|
533
|
+
"owned_by": "openai",
|
|
534
|
+
},
|
|
535
|
+
{
|
|
536
|
+
"id": "o3",
|
|
537
|
+
"object": "model",
|
|
538
|
+
"created": 1744225308,
|
|
539
|
+
"owned_by": "openai",
|
|
540
|
+
},
|
|
541
|
+
{
|
|
542
|
+
"id": "o3-mini",
|
|
543
|
+
"object": "model",
|
|
544
|
+
"created": 1737146383,
|
|
545
|
+
"owned_by": "openai",
|
|
546
|
+
},
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
# Return combined response in mixed format
|
|
550
|
+
return {
|
|
551
|
+
"data": anthropic_models + openai_models,
|
|
552
|
+
"has_more": False,
|
|
553
|
+
"object": "list",
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
def _get_display_name(self, model_id: str) -> str:
|
|
557
|
+
"""Get display name for a model ID."""
|
|
558
|
+
display_names = {
|
|
559
|
+
"claude-opus-4-20250514": "Claude Opus 4",
|
|
560
|
+
"claude-sonnet-4-20250514": "Claude Sonnet 4",
|
|
561
|
+
"claude-3-7-sonnet-20250219": "Claude Sonnet 3.7",
|
|
562
|
+
"claude-3-5-sonnet-20241022": "Claude Sonnet 3.5 (New)",
|
|
563
|
+
"claude-3-5-haiku-20241022": "Claude Haiku 3.5",
|
|
564
|
+
"claude-3-5-haiku-latest": "Claude Haiku 3.5",
|
|
565
|
+
"claude-3-5-sonnet-20240620": "Claude Sonnet 3.5 (Old)",
|
|
566
|
+
"claude-3-haiku-20240307": "Claude Haiku 3",
|
|
567
|
+
"claude-3-opus-20240229": "Claude Opus 3",
|
|
568
|
+
}
|
|
569
|
+
return display_names.get(model_id, model_id)
|
|
570
|
+
|
|
571
|
+
def _get_created_timestamp(self, model_id: str) -> int:
|
|
572
|
+
"""Get created timestamp for a model ID."""
|
|
573
|
+
timestamps = {
|
|
574
|
+
"claude-opus-4-20250514": 1747526400, # 2025-05-22
|
|
575
|
+
"claude-sonnet-4-20250514": 1747526400, # 2025-05-22
|
|
576
|
+
"claude-3-7-sonnet-20250219": 1740268800, # 2025-02-24
|
|
577
|
+
"claude-3-5-sonnet-20241022": 1729555200, # 2024-10-22
|
|
578
|
+
"claude-3-5-haiku-20241022": 1729555200, # 2024-10-22
|
|
579
|
+
"claude-3-5-haiku-latest": 1729555200, # 2024-10-22
|
|
580
|
+
"claude-3-5-sonnet-20240620": 1718841600, # 2024-06-20
|
|
581
|
+
"claude-3-haiku-20240307": 1709769600, # 2024-03-07
|
|
582
|
+
"claude-3-opus-20240229": 1709164800, # 2024-02-29
|
|
583
|
+
}
|
|
584
|
+
return timestamps.get(model_id, 1677610602) # Default timestamp
|
|
585
|
+
|
|
586
|
+
async def validate_health(self) -> bool:
|
|
587
|
+
"""
|
|
588
|
+
Validate that the service is healthy.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
True if healthy, False otherwise
|
|
592
|
+
"""
|
|
593
|
+
try:
|
|
594
|
+
return await self.sdk_client.validate_health()
|
|
595
|
+
except Exception as e:
|
|
596
|
+
logger.error(
|
|
597
|
+
"health_check_failed",
|
|
598
|
+
error=str(e),
|
|
599
|
+
error_type=type(e).__name__,
|
|
600
|
+
exc_info=True,
|
|
601
|
+
)
|
|
602
|
+
return False
|
|
603
|
+
|
|
604
|
+
async def close(self) -> None:
|
|
605
|
+
"""Close the service and cleanup resources."""
|
|
606
|
+
await self.sdk_client.close()
|
|
607
|
+
|
|
608
|
+
async def __aenter__(self) -> "ClaudeSDKService":
|
|
609
|
+
"""Async context manager entry."""
|
|
610
|
+
return self
|
|
611
|
+
|
|
612
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
613
|
+
"""Async context manager exit."""
|
|
614
|
+
await self.close()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Credentials management package."""
|
|
2
|
+
|
|
3
|
+
from ccproxy.auth.exceptions import (
|
|
4
|
+
CredentialsError,
|
|
5
|
+
CredentialsExpiredError,
|
|
6
|
+
CredentialsInvalidError,
|
|
7
|
+
CredentialsNotFoundError,
|
|
8
|
+
CredentialsStorageError,
|
|
9
|
+
OAuthCallbackError,
|
|
10
|
+
OAuthError,
|
|
11
|
+
OAuthLoginError,
|
|
12
|
+
OAuthTokenRefreshError,
|
|
13
|
+
)
|
|
14
|
+
from ccproxy.auth.models import (
|
|
15
|
+
AccountInfo,
|
|
16
|
+
ClaudeCredentials,
|
|
17
|
+
OAuthToken,
|
|
18
|
+
OrganizationInfo,
|
|
19
|
+
UserProfile,
|
|
20
|
+
)
|
|
21
|
+
from ccproxy.auth.storage import JsonFileTokenStorage as JsonFileStorage
|
|
22
|
+
from ccproxy.auth.storage import TokenStorage as CredentialsStorageBackend
|
|
23
|
+
from ccproxy.services.credentials.config import CredentialsConfig, OAuthConfig
|
|
24
|
+
from ccproxy.services.credentials.manager import CredentialsManager
|
|
25
|
+
from ccproxy.services.credentials.oauth_client import OAuthClient
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Manager
|
|
30
|
+
"CredentialsManager",
|
|
31
|
+
# Config
|
|
32
|
+
"CredentialsConfig",
|
|
33
|
+
"OAuthConfig",
|
|
34
|
+
# Models
|
|
35
|
+
"ClaudeCredentials",
|
|
36
|
+
"OAuthToken",
|
|
37
|
+
"OrganizationInfo",
|
|
38
|
+
"AccountInfo",
|
|
39
|
+
"UserProfile",
|
|
40
|
+
# Storage
|
|
41
|
+
"CredentialsStorageBackend",
|
|
42
|
+
"JsonFileStorage",
|
|
43
|
+
# OAuth
|
|
44
|
+
"OAuthClient",
|
|
45
|
+
# Exceptions
|
|
46
|
+
"CredentialsError",
|
|
47
|
+
"CredentialsNotFoundError",
|
|
48
|
+
"CredentialsInvalidError",
|
|
49
|
+
"CredentialsExpiredError",
|
|
50
|
+
"CredentialsStorageError",
|
|
51
|
+
"OAuthError",
|
|
52
|
+
"OAuthLoginError",
|
|
53
|
+
"OAuthTokenRefreshError",
|
|
54
|
+
"OAuthCallbackError",
|
|
55
|
+
]
|