ccproxy-api 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/openai/__init__.py +1 -2
- ccproxy/adapters/openai/adapter.py +218 -180
- ccproxy/adapters/openai/streaming.py +247 -65
- ccproxy/api/__init__.py +0 -3
- ccproxy/api/app.py +173 -40
- ccproxy/api/dependencies.py +62 -3
- ccproxy/api/middleware/errors.py +3 -7
- ccproxy/api/middleware/headers.py +0 -2
- ccproxy/api/middleware/logging.py +4 -3
- ccproxy/api/middleware/request_content_logging.py +297 -0
- ccproxy/api/middleware/request_id.py +5 -0
- ccproxy/api/middleware/server_header.py +0 -4
- ccproxy/api/routes/__init__.py +9 -1
- ccproxy/api/routes/claude.py +23 -32
- ccproxy/api/routes/health.py +58 -4
- ccproxy/api/routes/mcp.py +171 -0
- ccproxy/api/routes/metrics.py +4 -8
- ccproxy/api/routes/permissions.py +217 -0
- ccproxy/api/routes/proxy.py +0 -53
- ccproxy/api/services/__init__.py +6 -0
- ccproxy/api/services/permission_service.py +368 -0
- ccproxy/api/ui/__init__.py +6 -0
- ccproxy/api/ui/permission_handler_protocol.py +33 -0
- ccproxy/api/ui/terminal_permission_handler.py +593 -0
- ccproxy/auth/conditional.py +2 -2
- ccproxy/auth/dependencies.py +1 -1
- ccproxy/auth/oauth/models.py +0 -1
- ccproxy/auth/oauth/routes.py +1 -3
- ccproxy/auth/storage/json_file.py +0 -1
- ccproxy/auth/storage/keyring.py +0 -3
- ccproxy/claude_sdk/__init__.py +2 -0
- ccproxy/claude_sdk/client.py +91 -8
- ccproxy/claude_sdk/converter.py +405 -210
- ccproxy/claude_sdk/options.py +76 -29
- ccproxy/claude_sdk/parser.py +200 -0
- ccproxy/claude_sdk/streaming.py +286 -0
- ccproxy/cli/commands/__init__.py +5 -2
- ccproxy/cli/commands/auth.py +2 -4
- ccproxy/cli/commands/permission_handler.py +553 -0
- ccproxy/cli/commands/serve.py +30 -12
- ccproxy/cli/docker/params.py +0 -4
- ccproxy/cli/helpers.py +0 -2
- ccproxy/cli/main.py +5 -16
- ccproxy/cli/options/claude_options.py +19 -1
- ccproxy/cli/options/core_options.py +0 -3
- ccproxy/cli/options/security_options.py +0 -2
- ccproxy/cli/options/server_options.py +3 -2
- ccproxy/config/auth.py +0 -1
- ccproxy/config/claude.py +78 -2
- ccproxy/config/discovery.py +0 -1
- ccproxy/config/docker_settings.py +0 -1
- ccproxy/config/loader.py +1 -4
- ccproxy/config/scheduler.py +20 -0
- ccproxy/config/security.py +7 -2
- ccproxy/config/server.py +5 -0
- ccproxy/config/settings.py +13 -7
- ccproxy/config/validators.py +1 -1
- ccproxy/core/async_utils.py +1 -4
- ccproxy/core/errors.py +45 -1
- ccproxy/core/http_transformers.py +4 -3
- ccproxy/core/interfaces.py +2 -2
- ccproxy/core/logging.py +97 -95
- ccproxy/core/middleware.py +1 -1
- ccproxy/core/proxy.py +1 -1
- ccproxy/core/transformers.py +1 -1
- ccproxy/core/types.py +1 -1
- ccproxy/docker/models.py +1 -1
- ccproxy/docker/protocol.py +0 -3
- ccproxy/models/__init__.py +41 -0
- ccproxy/models/claude_sdk.py +420 -0
- ccproxy/models/messages.py +45 -18
- ccproxy/models/permissions.py +115 -0
- ccproxy/models/requests.py +1 -1
- ccproxy/models/responses.py +29 -2
- ccproxy/observability/access_logger.py +1 -2
- ccproxy/observability/context.py +17 -1
- ccproxy/observability/metrics.py +1 -3
- ccproxy/observability/pushgateway.py +0 -2
- ccproxy/observability/stats_printer.py +2 -4
- ccproxy/observability/storage/duckdb_simple.py +1 -1
- ccproxy/observability/storage/models.py +0 -1
- ccproxy/pricing/cache.py +0 -1
- ccproxy/pricing/loader.py +5 -21
- ccproxy/pricing/updater.py +0 -1
- ccproxy/scheduler/__init__.py +1 -0
- ccproxy/scheduler/core.py +6 -6
- ccproxy/scheduler/manager.py +35 -7
- ccproxy/scheduler/registry.py +1 -1
- ccproxy/scheduler/tasks.py +127 -2
- ccproxy/services/claude_sdk_service.py +220 -328
- ccproxy/services/credentials/manager.py +0 -1
- ccproxy/services/credentials/oauth_client.py +1 -2
- ccproxy/services/proxy_service.py +93 -222
- ccproxy/testing/config.py +1 -1
- ccproxy/testing/mock_responses.py +0 -1
- ccproxy/utils/model_mapping.py +197 -0
- ccproxy/utils/models_provider.py +150 -0
- ccproxy/utils/simple_request_logger.py +284 -0
- ccproxy/utils/version_checker.py +184 -0
- {ccproxy_api-0.1.2.dist-info → ccproxy_api-0.1.3.dist-info}/METADATA +63 -2
- ccproxy_api-0.1.3.dist-info/RECORD +166 -0
- ccproxy/cli/commands/permission.py +0 -128
- ccproxy_api-0.1.2.dist-info/RECORD +0 -150
- /ccproxy/scheduler/{exceptions.py → errors.py} +0 -0
- {ccproxy_api-0.1.2.dist-info → ccproxy_api-0.1.3.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.2.dist-info → ccproxy_api-0.1.3.dist-info}/entry_points.txt +0 -0
- {ccproxy_api-0.1.2.dist-info → ccproxy_api-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
"""Claude SDK service orchestration for business logic."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from collections.abc import AsyncIterator
|
|
5
|
-
from dataclasses import asdict, is_dataclass
|
|
6
4
|
from typing import Any
|
|
7
5
|
|
|
8
6
|
import structlog
|
|
9
|
-
from claude_code_sdk import
|
|
10
|
-
AssistantMessage,
|
|
11
|
-
ClaudeCodeOptions,
|
|
12
|
-
ResultMessage,
|
|
13
|
-
SystemMessage,
|
|
14
|
-
)
|
|
7
|
+
from claude_code_sdk import ClaudeCodeOptions
|
|
15
8
|
|
|
16
|
-
from ccproxy.adapters.openai import adapter
|
|
17
9
|
from ccproxy.auth.manager import AuthManager
|
|
18
10
|
from ccproxy.claude_sdk.client import ClaudeSDKClient
|
|
19
11
|
from ccproxy.claude_sdk.converter import MessageConverter
|
|
20
12
|
from ccproxy.claude_sdk.options import OptionsHandler
|
|
13
|
+
from ccproxy.claude_sdk.streaming import ClaudeStreamProcessor
|
|
14
|
+
from ccproxy.config.claude import SDKMessageMode
|
|
21
15
|
from ccproxy.config.settings import Settings
|
|
22
16
|
from ccproxy.core.errors import (
|
|
17
|
+
AuthenticationError,
|
|
23
18
|
ClaudeProxyError,
|
|
24
19
|
ServiceUnavailableError,
|
|
25
20
|
)
|
|
21
|
+
from ccproxy.models import claude_sdk as sdk_models
|
|
22
|
+
from ccproxy.models.messages import MessageResponse
|
|
26
23
|
from ccproxy.observability.access_logger import log_request_access
|
|
27
24
|
from ccproxy.observability.context import RequestContext, request_context
|
|
28
25
|
from ccproxy.observability.metrics import PrometheusMetrics
|
|
26
|
+
from ccproxy.utils.model_mapping import map_model_to_claude
|
|
27
|
+
from ccproxy.utils.simple_request_logger import write_request_log
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
logger = structlog.get_logger(__name__)
|
|
@@ -62,6 +61,10 @@ class ClaudeSDKService:
|
|
|
62
61
|
self.settings = settings
|
|
63
62
|
self.message_converter = MessageConverter()
|
|
64
63
|
self.options_handler = OptionsHandler(settings=settings)
|
|
64
|
+
self.stream_processor = ClaudeStreamProcessor(
|
|
65
|
+
message_converter=self.message_converter,
|
|
66
|
+
metrics=self.metrics,
|
|
67
|
+
)
|
|
65
68
|
|
|
66
69
|
async def create_completion(
|
|
67
70
|
self,
|
|
@@ -72,7 +75,7 @@ class ClaudeSDKService:
|
|
|
72
75
|
stream: bool = False,
|
|
73
76
|
user_id: str | None = None,
|
|
74
77
|
**kwargs: Any,
|
|
75
|
-
) ->
|
|
78
|
+
) -> MessageResponse | AsyncIterator[dict[str, Any]]:
|
|
76
79
|
"""
|
|
77
80
|
Create a completion using Claude SDK with business logic orchestration.
|
|
78
81
|
|
|
@@ -92,6 +95,7 @@ class ClaudeSDKService:
|
|
|
92
95
|
ClaudeProxyError: If request fails
|
|
93
96
|
ServiceUnavailableError: If service is unavailable
|
|
94
97
|
"""
|
|
98
|
+
|
|
95
99
|
# Validate authentication if auth manager is configured
|
|
96
100
|
if self.auth_manager and user_id:
|
|
97
101
|
try:
|
|
@@ -110,7 +114,7 @@ class ClaudeSDKService:
|
|
|
110
114
|
system_message = self.options_handler.extract_system_message(messages)
|
|
111
115
|
|
|
112
116
|
# Map model to Claude model
|
|
113
|
-
model =
|
|
117
|
+
model = map_model_to_claude(model)
|
|
114
118
|
|
|
115
119
|
options = self.options_handler.create_options(
|
|
116
120
|
model=model,
|
|
@@ -140,19 +144,34 @@ class ClaudeSDKService:
|
|
|
140
144
|
metrics=self.metrics, # Pass metrics for active request tracking
|
|
141
145
|
) as ctx:
|
|
142
146
|
try:
|
|
147
|
+
# Log SDK request parameters
|
|
148
|
+
timestamp = ctx.get_log_timestamp_prefix() if ctx else None
|
|
149
|
+
await self._log_sdk_request(
|
|
150
|
+
request_id, prompt, options, model, stream, timestamp
|
|
151
|
+
)
|
|
152
|
+
|
|
143
153
|
if stream:
|
|
144
154
|
# For streaming, return the async iterator directly
|
|
145
155
|
# Pass context to streaming method
|
|
146
156
|
return self._stream_completion(
|
|
147
|
-
prompt, options, model, request_id, ctx
|
|
157
|
+
prompt, options, model, request_id, ctx, timestamp
|
|
148
158
|
)
|
|
149
159
|
else:
|
|
150
160
|
result = await self._complete_non_streaming(
|
|
151
|
-
prompt, options, model, request_id, ctx
|
|
161
|
+
prompt, options, model, request_id, ctx, timestamp
|
|
152
162
|
)
|
|
153
163
|
return result
|
|
154
164
|
|
|
155
|
-
except
|
|
165
|
+
except AuthenticationError as e:
|
|
166
|
+
logger.error(
|
|
167
|
+
"authentication_failed",
|
|
168
|
+
user_id=user_id,
|
|
169
|
+
error=str(e),
|
|
170
|
+
error_type=type(e).__name__,
|
|
171
|
+
exc_info=True,
|
|
172
|
+
)
|
|
173
|
+
raise
|
|
174
|
+
except (ClaudeProxyError, ServiceUnavailableError) as e:
|
|
156
175
|
# Log error via access logger (includes metrics)
|
|
157
176
|
await log_request_access(
|
|
158
177
|
context=ctx,
|
|
@@ -166,11 +185,12 @@ class ClaudeSDKService:
|
|
|
166
185
|
async def _complete_non_streaming(
|
|
167
186
|
self,
|
|
168
187
|
prompt: str,
|
|
169
|
-
options: ClaudeCodeOptions,
|
|
188
|
+
options: "ClaudeCodeOptions",
|
|
170
189
|
model: str,
|
|
171
190
|
request_id: str | None = None,
|
|
172
191
|
ctx: RequestContext | None = None,
|
|
173
|
-
|
|
192
|
+
timestamp: str | None = None,
|
|
193
|
+
) -> MessageResponse:
|
|
174
194
|
"""
|
|
175
195
|
Complete a non-streaming request with business logic.
|
|
176
196
|
|
|
@@ -186,21 +206,19 @@ class ClaudeSDKService:
|
|
|
186
206
|
Raises:
|
|
187
207
|
ClaudeProxyError: If completion fails
|
|
188
208
|
"""
|
|
189
|
-
|
|
190
|
-
result_message = None
|
|
191
|
-
assistant_message = None
|
|
209
|
+
# SDK request already logged in create_completion
|
|
192
210
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if isinstance(message, AssistantMessage):
|
|
198
|
-
assistant_message = message
|
|
199
|
-
elif isinstance(message, ResultMessage):
|
|
200
|
-
result_message = message
|
|
211
|
+
messages = [
|
|
212
|
+
m
|
|
213
|
+
async for m in self.sdk_client.query_completion(prompt, options, request_id)
|
|
214
|
+
]
|
|
201
215
|
|
|
202
|
-
|
|
203
|
-
|
|
216
|
+
result_message = next(
|
|
217
|
+
(m for m in messages if isinstance(m, sdk_models.ResultMessage)), None
|
|
218
|
+
)
|
|
219
|
+
assistant_message = next(
|
|
220
|
+
(m for m in messages if isinstance(m, sdk_models.AssistantMessage)), None
|
|
221
|
+
)
|
|
204
222
|
|
|
205
223
|
if result_message is None:
|
|
206
224
|
raise ClaudeProxyError(
|
|
@@ -217,65 +235,104 @@ class ClaudeSDKService:
|
|
|
217
235
|
)
|
|
218
236
|
|
|
219
237
|
logger.debug("claude_sdk_completion_received")
|
|
220
|
-
|
|
238
|
+
mode = (
|
|
239
|
+
self.settings.claude.sdk_message_mode
|
|
240
|
+
if self.settings
|
|
241
|
+
else SDKMessageMode.FORWARD
|
|
242
|
+
)
|
|
243
|
+
pretty_format = self.settings.claude.pretty_format if self.settings else True
|
|
244
|
+
|
|
221
245
|
response = self.message_converter.convert_to_anthropic_response(
|
|
222
|
-
assistant_message, result_message, model
|
|
246
|
+
assistant_message, result_message, model, mode, pretty_format
|
|
223
247
|
)
|
|
224
248
|
|
|
225
|
-
#
|
|
249
|
+
# Add other message types to the content block
|
|
250
|
+
all_messages = [
|
|
251
|
+
m
|
|
252
|
+
for m in messages
|
|
253
|
+
if not isinstance(m, sdk_models.AssistantMessage | sdk_models.ResultMessage)
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
if mode != SDKMessageMode.IGNORE and response.content:
|
|
257
|
+
for message in all_messages:
|
|
258
|
+
if isinstance(message, sdk_models.SystemMessage):
|
|
259
|
+
content_block = self.message_converter._create_sdk_content_block(
|
|
260
|
+
sdk_object=message,
|
|
261
|
+
mode=mode,
|
|
262
|
+
pretty_format=pretty_format,
|
|
263
|
+
xml_tag="system_message",
|
|
264
|
+
forward_converter=lambda obj: {
|
|
265
|
+
"type": "system_message",
|
|
266
|
+
"text": obj.model_dump_json(separators=(",", ":")),
|
|
267
|
+
},
|
|
268
|
+
)
|
|
269
|
+
if content_block:
|
|
270
|
+
# Only validate as SDKMessageMode if it's a system_message type
|
|
271
|
+
if content_block.get("type") == "system_message":
|
|
272
|
+
response.content.append(
|
|
273
|
+
sdk_models.SDKMessageMode.model_validate(content_block)
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
# For other types (like text blocks in FORMATTED mode), create appropriate content block
|
|
277
|
+
if content_block.get("type") == "text":
|
|
278
|
+
response.content.append(
|
|
279
|
+
sdk_models.TextBlock.model_validate(content_block)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
# Fallback for other content block types
|
|
283
|
+
logger.warning(
|
|
284
|
+
"unknown_content_block_type",
|
|
285
|
+
content_block_type=content_block.get("type"),
|
|
286
|
+
)
|
|
287
|
+
elif isinstance(message, sdk_models.UserMessage):
|
|
288
|
+
for block in message.content:
|
|
289
|
+
if isinstance(block, sdk_models.ToolResultBlock):
|
|
290
|
+
response.content.append(block)
|
|
291
|
+
|
|
226
292
|
cost_usd = result_message.total_cost_usd
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
else:
|
|
233
|
-
tokens_input = tokens_output = cache_read_tokens = cache_write_tokens = None
|
|
234
|
-
|
|
235
|
-
# Add cost to response usage section if available
|
|
236
|
-
if cost_usd is not None and "usage" in response:
|
|
237
|
-
response["usage"]["cost_usd"] = cost_usd
|
|
238
|
-
|
|
239
|
-
# Log metrics for observability
|
|
293
|
+
usage = result_message.usage_model
|
|
294
|
+
|
|
295
|
+
# if cost_usd is not None and response.usage:
|
|
296
|
+
# response.usage.cost_usd = cost_usd
|
|
297
|
+
|
|
240
298
|
logger.debug(
|
|
241
299
|
"claude_sdk_completion_completed",
|
|
242
300
|
model=model,
|
|
243
|
-
tokens_input=
|
|
244
|
-
tokens_output=
|
|
245
|
-
cache_read_tokens=
|
|
246
|
-
cache_write_tokens=
|
|
301
|
+
tokens_input=usage.input_tokens,
|
|
302
|
+
tokens_output=usage.output_tokens,
|
|
303
|
+
cache_read_tokens=usage.cache_read_input_tokens,
|
|
304
|
+
cache_write_tokens=usage.cache_creation_input_tokens,
|
|
247
305
|
cost_usd=cost_usd,
|
|
248
306
|
request_id=request_id,
|
|
249
307
|
)
|
|
250
308
|
|
|
251
|
-
# Update context with metrics if available
|
|
252
309
|
if ctx:
|
|
253
310
|
ctx.add_metadata(
|
|
254
311
|
status_code=200,
|
|
255
|
-
tokens_input=
|
|
256
|
-
tokens_output=
|
|
257
|
-
cache_read_tokens=
|
|
258
|
-
cache_write_tokens=
|
|
312
|
+
tokens_input=usage.input_tokens,
|
|
313
|
+
tokens_output=usage.output_tokens,
|
|
314
|
+
cache_read_tokens=usage.cache_read_input_tokens,
|
|
315
|
+
cache_write_tokens=usage.cache_creation_input_tokens,
|
|
259
316
|
cost_usd=cost_usd,
|
|
260
317
|
)
|
|
261
|
-
|
|
262
|
-
# Log comprehensive access log (includes Prometheus metrics)
|
|
263
318
|
await log_request_access(
|
|
264
|
-
context=ctx,
|
|
265
|
-
status_code=200,
|
|
266
|
-
method="POST",
|
|
267
|
-
metrics=self.metrics,
|
|
319
|
+
context=ctx, status_code=200, method="POST", metrics=self.metrics
|
|
268
320
|
)
|
|
269
321
|
|
|
322
|
+
# Log SDK response
|
|
323
|
+
if request_id:
|
|
324
|
+
await self._log_sdk_response(request_id, response, timestamp)
|
|
325
|
+
|
|
270
326
|
return response
|
|
271
327
|
|
|
272
328
|
async def _stream_completion(
|
|
273
329
|
self,
|
|
274
330
|
prompt: str,
|
|
275
|
-
options: ClaudeCodeOptions,
|
|
331
|
+
options: "ClaudeCodeOptions",
|
|
276
332
|
model: str,
|
|
277
333
|
request_id: str | None = None,
|
|
278
334
|
ctx: RequestContext | None = None,
|
|
335
|
+
timestamp: str | None = None,
|
|
279
336
|
) -> AsyncIterator[dict[str, Any]]:
|
|
280
337
|
"""
|
|
281
338
|
Stream completion responses with business logic.
|
|
@@ -284,143 +341,33 @@ class ClaudeSDKService:
|
|
|
284
341
|
prompt: The formatted prompt
|
|
285
342
|
options: Claude SDK options
|
|
286
343
|
model: The model being used
|
|
344
|
+
request_id: Optional request ID for logging
|
|
345
|
+
ctx: Optional request context for metrics
|
|
287
346
|
|
|
288
347
|
Yields:
|
|
289
348
|
Response chunks in Anthropic format
|
|
290
349
|
"""
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
try:
|
|
298
|
-
async for message in self.sdk_client.query_completion(
|
|
299
|
-
prompt, options, request_id
|
|
300
|
-
):
|
|
301
|
-
message_count += 1
|
|
302
|
-
logger.debug(
|
|
303
|
-
"streaming_message_received",
|
|
304
|
-
message_count=message_count,
|
|
305
|
-
message_type=type(message).__name__,
|
|
306
|
-
request_id=request_id,
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
if first_chunk:
|
|
310
|
-
# Send initial chunk
|
|
311
|
-
yield self.message_converter.create_streaming_start_chunk(
|
|
312
|
-
f"msg_{id(message)}", model
|
|
313
|
-
)
|
|
314
|
-
first_chunk = False
|
|
315
|
-
|
|
316
|
-
# TODO: instead of creating one message we should create a list of messages
|
|
317
|
-
# and this will be serialized back in one messsage by the adapter.
|
|
318
|
-
# to do that we have to create the different type of messsages
|
|
319
|
-
# in anthropic models
|
|
320
|
-
if isinstance(message, SystemMessage):
|
|
321
|
-
# Serialize dataclass to JSON
|
|
322
|
-
text_content = f"<system>{json.dumps(asdict(message))}</system>"
|
|
323
|
-
yield self.message_converter.create_streaming_delta_chunk(
|
|
324
|
-
text_content
|
|
325
|
-
)
|
|
326
|
-
elif isinstance(message, AssistantMessage):
|
|
327
|
-
assistant_messages.append(message)
|
|
350
|
+
sdk_message_mode = (
|
|
351
|
+
self.settings.claude.sdk_message_mode
|
|
352
|
+
if self.settings
|
|
353
|
+
else SDKMessageMode.FORWARD
|
|
354
|
+
)
|
|
355
|
+
pretty_format = self.settings.claude.pretty_format if self.settings else True
|
|
328
356
|
|
|
329
|
-
|
|
330
|
-
text_content = self.message_converter.extract_contents(
|
|
331
|
-
message.content
|
|
332
|
-
)
|
|
357
|
+
sdk_stream = self.sdk_client.query_completion(prompt, options, request_id)
|
|
333
358
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if message.usage:
|
|
347
|
-
tokens_input = message.usage.get("input_tokens")
|
|
348
|
-
tokens_output = message.usage.get("output_tokens")
|
|
349
|
-
cache_read_tokens = message.usage.get("cache_read_input_tokens")
|
|
350
|
-
cache_write_tokens = message.usage.get(
|
|
351
|
-
"cache_creation_input_tokens"
|
|
352
|
-
)
|
|
353
|
-
else:
|
|
354
|
-
tokens_input = tokens_output = cache_read_tokens = (
|
|
355
|
-
cache_write_tokens
|
|
356
|
-
) = None
|
|
357
|
-
|
|
358
|
-
# Log streaming completion metrics
|
|
359
|
-
logger.debug(
|
|
360
|
-
"streaming_completion_completed",
|
|
361
|
-
model=model,
|
|
362
|
-
tokens_input=tokens_input,
|
|
363
|
-
tokens_output=tokens_output,
|
|
364
|
-
cache_read_tokens=cache_read_tokens,
|
|
365
|
-
cache_write_tokens=cache_write_tokens,
|
|
366
|
-
cost_usd=cost_usd,
|
|
367
|
-
message_count=message_count,
|
|
368
|
-
request_id=request_id,
|
|
369
|
-
)
|
|
370
|
-
|
|
371
|
-
# Update context with metrics if available
|
|
372
|
-
if ctx:
|
|
373
|
-
ctx.add_metadata(
|
|
374
|
-
status_code=200,
|
|
375
|
-
tokens_input=tokens_input,
|
|
376
|
-
tokens_output=tokens_output,
|
|
377
|
-
cache_read_tokens=cache_read_tokens,
|
|
378
|
-
cache_write_tokens=cache_write_tokens,
|
|
379
|
-
cost_usd=cost_usd,
|
|
380
|
-
)
|
|
381
|
-
|
|
382
|
-
# Log comprehensive access log for streaming completion
|
|
383
|
-
await log_request_access(
|
|
384
|
-
context=ctx,
|
|
385
|
-
status_code=200,
|
|
386
|
-
method="POST",
|
|
387
|
-
metrics=self.metrics,
|
|
388
|
-
event_type="streaming_complete",
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
# Send final chunk with usage and cost information
|
|
392
|
-
final_chunk = self.message_converter.create_streaming_end_chunk()
|
|
393
|
-
|
|
394
|
-
# Add usage information to final chunk
|
|
395
|
-
if tokens_input or tokens_output or cost_usd:
|
|
396
|
-
usage_info = {}
|
|
397
|
-
if tokens_input:
|
|
398
|
-
usage_info["input_tokens"] = tokens_input
|
|
399
|
-
if tokens_output:
|
|
400
|
-
usage_info["output_tokens"] = tokens_output
|
|
401
|
-
if cost_usd is not None:
|
|
402
|
-
usage_info["cost_usd"] = cost_usd
|
|
403
|
-
|
|
404
|
-
# Update the usage in the final chunk
|
|
405
|
-
final_chunk["usage"].update(usage_info)
|
|
406
|
-
|
|
407
|
-
yield final_chunk
|
|
408
|
-
|
|
409
|
-
break
|
|
410
|
-
|
|
411
|
-
except asyncio.CancelledError:
|
|
412
|
-
logger.debug("streaming_completion_cancelled", request_id=request_id)
|
|
413
|
-
raise
|
|
414
|
-
except Exception as e:
|
|
415
|
-
logger.error(
|
|
416
|
-
"streaming_completion_failed",
|
|
417
|
-
error=str(e),
|
|
418
|
-
error_type=type(e).__name__,
|
|
419
|
-
request_id=request_id,
|
|
420
|
-
exc_info=True,
|
|
421
|
-
)
|
|
422
|
-
# Don't yield error chunk - let exception propagate for proper HTTP error response
|
|
423
|
-
raise
|
|
359
|
+
async for chunk in self.stream_processor.process_stream(
|
|
360
|
+
sdk_stream=sdk_stream,
|
|
361
|
+
model=model,
|
|
362
|
+
request_id=request_id,
|
|
363
|
+
ctx=ctx,
|
|
364
|
+
sdk_message_mode=sdk_message_mode,
|
|
365
|
+
pretty_format=pretty_format,
|
|
366
|
+
):
|
|
367
|
+
# Log streaming chunk
|
|
368
|
+
if request_id:
|
|
369
|
+
await self._log_sdk_streaming_chunk(request_id, chunk, timestamp)
|
|
370
|
+
yield chunk
|
|
424
371
|
|
|
425
372
|
async def _validate_user_auth(self, user_id: str) -> None:
|
|
426
373
|
"""
|
|
@@ -434,158 +381,103 @@ class ClaudeSDKService:
|
|
|
434
381
|
"""
|
|
435
382
|
if not self.auth_manager:
|
|
436
383
|
return
|
|
437
|
-
|
|
438
|
-
# Implement authentication validation logic
|
|
439
|
-
# This is a placeholder for future auth integration
|
|
440
384
|
logger.debug("user_auth_validation_start", user_id=user_id)
|
|
441
385
|
|
|
442
|
-
def
|
|
386
|
+
async def _log_sdk_request(
|
|
443
387
|
self,
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
388
|
+
request_id: str,
|
|
389
|
+
prompt: str,
|
|
390
|
+
options: "ClaudeCodeOptions",
|
|
391
|
+
model: str,
|
|
392
|
+
stream: bool,
|
|
393
|
+
timestamp: str | None = None,
|
|
394
|
+
) -> None:
|
|
395
|
+
"""Log SDK input parameters as JSON dump.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
request_id: Request identifier
|
|
399
|
+
prompt: The formatted prompt
|
|
400
|
+
options: Claude SDK options
|
|
401
|
+
model: The model being used
|
|
402
|
+
stream: Whether streaming is enabled
|
|
403
|
+
timestamp: Optional timestamp prefix
|
|
450
404
|
"""
|
|
451
|
-
|
|
405
|
+
# timestamp is already provided from context, no need for fallback
|
|
406
|
+
|
|
407
|
+
# JSON dump of the parameters passed to SDK completion
|
|
408
|
+
sdk_request_data = {
|
|
409
|
+
"prompt": prompt,
|
|
410
|
+
"options": options.model_dump()
|
|
411
|
+
if hasattr(options, "model_dump")
|
|
412
|
+
else str(options),
|
|
413
|
+
"model": model,
|
|
414
|
+
"stream": stream,
|
|
415
|
+
"request_id": request_id,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
await write_request_log(
|
|
419
|
+
request_id=request_id,
|
|
420
|
+
log_type="sdk_request",
|
|
421
|
+
data=sdk_request_data,
|
|
422
|
+
timestamp=timestamp,
|
|
423
|
+
)
|
|
452
424
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
425
|
+
async def _log_sdk_response(
|
|
426
|
+
self,
|
|
427
|
+
request_id: str,
|
|
428
|
+
result: Any,
|
|
429
|
+
timestamp: str | None = None,
|
|
430
|
+
) -> None:
|
|
431
|
+
"""Log SDK response result as JSON dump.
|
|
456
432
|
|
|
457
433
|
Args:
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
cache_read_tokens: Number of cache read tokens
|
|
462
|
-
cache_write_tokens: Number of cache write tokens
|
|
463
|
-
|
|
464
|
-
Returns:
|
|
465
|
-
Cost in USD or None if calculation not possible
|
|
434
|
+
request_id: Request identifier
|
|
435
|
+
result: The result from _complete_non_streaming
|
|
436
|
+
timestamp: Optional timestamp prefix
|
|
466
437
|
"""
|
|
467
|
-
from
|
|
438
|
+
# timestamp is already provided from context, no need for fallback
|
|
468
439
|
|
|
469
|
-
|
|
470
|
-
|
|
440
|
+
# JSON dump of the result from _complete_non_streaming
|
|
441
|
+
sdk_response_data = {
|
|
442
|
+
"result": result.model_dump()
|
|
443
|
+
if hasattr(result, "model_dump")
|
|
444
|
+
else str(result),
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
await write_request_log(
|
|
448
|
+
request_id=request_id,
|
|
449
|
+
log_type="sdk_response",
|
|
450
|
+
data=sdk_response_data,
|
|
451
|
+
timestamp=timestamp,
|
|
471
452
|
)
|
|
472
453
|
|
|
473
|
-
async def
|
|
474
|
-
|
|
475
|
-
|
|
454
|
+
async def _log_sdk_streaming_chunk(
|
|
455
|
+
self,
|
|
456
|
+
request_id: str,
|
|
457
|
+
chunk: dict[str, Any],
|
|
458
|
+
timestamp: str | None = None,
|
|
459
|
+
) -> None:
|
|
460
|
+
"""Log streaming chunk as JSON dump.
|
|
476
461
|
|
|
477
|
-
|
|
478
|
-
|
|
462
|
+
Args:
|
|
463
|
+
request_id: Request identifier
|
|
464
|
+
chunk: The streaming chunk from process_stream
|
|
465
|
+
timestamp: Optional timestamp prefix
|
|
479
466
|
"""
|
|
480
|
-
#
|
|
481
|
-
supported_models = self.options_handler.get_supported_models()
|
|
482
|
-
|
|
483
|
-
# Create Anthropic-style model entries
|
|
484
|
-
anthropic_models = []
|
|
485
|
-
for model_id in supported_models:
|
|
486
|
-
anthropic_models.append(
|
|
487
|
-
{
|
|
488
|
-
"type": "model",
|
|
489
|
-
"id": model_id,
|
|
490
|
-
"display_name": self._get_display_name(model_id),
|
|
491
|
-
"created_at": self._get_created_timestamp(model_id),
|
|
492
|
-
}
|
|
493
|
-
)
|
|
467
|
+
# timestamp is already provided from context, no need for fallback
|
|
494
468
|
|
|
495
|
-
#
|
|
496
|
-
|
|
497
|
-
{
|
|
498
|
-
"id": "gpt-4o",
|
|
499
|
-
"object": "model",
|
|
500
|
-
"created": 1715367049,
|
|
501
|
-
"owned_by": "openai",
|
|
502
|
-
},
|
|
503
|
-
{
|
|
504
|
-
"id": "gpt-4o-mini",
|
|
505
|
-
"object": "model",
|
|
506
|
-
"created": 1721172741,
|
|
507
|
-
"owned_by": "openai",
|
|
508
|
-
},
|
|
509
|
-
{
|
|
510
|
-
"id": "gpt-4-turbo",
|
|
511
|
-
"object": "model",
|
|
512
|
-
"created": 1712361441,
|
|
513
|
-
"owned_by": "openai",
|
|
514
|
-
},
|
|
515
|
-
{
|
|
516
|
-
"id": "gpt-4-turbo-preview",
|
|
517
|
-
"object": "model",
|
|
518
|
-
"created": 1706037777,
|
|
519
|
-
"owned_by": "openai",
|
|
520
|
-
},
|
|
521
|
-
{
|
|
522
|
-
"id": "o1",
|
|
523
|
-
"object": "model",
|
|
524
|
-
"created": 1734375816,
|
|
525
|
-
"owned_by": "openai",
|
|
526
|
-
},
|
|
527
|
-
{
|
|
528
|
-
"id": "o1-mini",
|
|
529
|
-
"object": "model",
|
|
530
|
-
"created": 1725649008,
|
|
531
|
-
"owned_by": "openai",
|
|
532
|
-
},
|
|
533
|
-
{
|
|
534
|
-
"id": "o1-preview",
|
|
535
|
-
"object": "model",
|
|
536
|
-
"created": 1725648897,
|
|
537
|
-
"owned_by": "openai",
|
|
538
|
-
},
|
|
539
|
-
{
|
|
540
|
-
"id": "o3",
|
|
541
|
-
"object": "model",
|
|
542
|
-
"created": 1744225308,
|
|
543
|
-
"owned_by": "openai",
|
|
544
|
-
},
|
|
545
|
-
{
|
|
546
|
-
"id": "o3-mini",
|
|
547
|
-
"object": "model",
|
|
548
|
-
"created": 1737146383,
|
|
549
|
-
"owned_by": "openai",
|
|
550
|
-
},
|
|
551
|
-
]
|
|
469
|
+
# Append streaming chunk as JSON to raw file
|
|
470
|
+
import json
|
|
552
471
|
|
|
553
|
-
|
|
554
|
-
return {
|
|
555
|
-
"data": anthropic_models + openai_models,
|
|
556
|
-
"has_more": False,
|
|
557
|
-
"object": "list",
|
|
558
|
-
}
|
|
472
|
+
from ccproxy.utils.simple_request_logger import append_streaming_log
|
|
559
473
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
"
|
|
564
|
-
"
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
"claude-3-5-haiku-20241022": "Claude Haiku 3.5",
|
|
568
|
-
"claude-3-5-haiku-latest": "Claude Haiku 3.5",
|
|
569
|
-
"claude-3-5-sonnet-20240620": "Claude Sonnet 3.5 (Old)",
|
|
570
|
-
"claude-3-haiku-20240307": "Claude Haiku 3",
|
|
571
|
-
"claude-3-opus-20240229": "Claude Opus 3",
|
|
572
|
-
}
|
|
573
|
-
return display_names.get(model_id, model_id)
|
|
574
|
-
|
|
575
|
-
def _get_created_timestamp(self, model_id: str) -> int:
|
|
576
|
-
"""Get created timestamp for a model ID."""
|
|
577
|
-
timestamps = {
|
|
578
|
-
"claude-opus-4-20250514": 1747526400, # 2025-05-22
|
|
579
|
-
"claude-sonnet-4-20250514": 1747526400, # 2025-05-22
|
|
580
|
-
"claude-3-7-sonnet-20250219": 1740268800, # 2025-02-24
|
|
581
|
-
"claude-3-5-sonnet-20241022": 1729555200, # 2024-10-22
|
|
582
|
-
"claude-3-5-haiku-20241022": 1729555200, # 2024-10-22
|
|
583
|
-
"claude-3-5-haiku-latest": 1729555200, # 2024-10-22
|
|
584
|
-
"claude-3-5-sonnet-20240620": 1718841600, # 2024-06-20
|
|
585
|
-
"claude-3-haiku-20240307": 1709769600, # 2024-03-07
|
|
586
|
-
"claude-3-opus-20240229": 1709164800, # 2024-02-29
|
|
587
|
-
}
|
|
588
|
-
return timestamps.get(model_id, 1677610602) # Default timestamp
|
|
474
|
+
chunk_data = json.dumps(chunk, default=str) + "\n"
|
|
475
|
+
await append_streaming_log(
|
|
476
|
+
request_id=request_id,
|
|
477
|
+
log_type="sdk_streaming",
|
|
478
|
+
data=chunk_data.encode("utf-8"),
|
|
479
|
+
timestamp=timestamp,
|
|
480
|
+
)
|
|
589
481
|
|
|
590
482
|
async def validate_health(self) -> bool:
|
|
591
483
|
"""
|