ccproxy-api 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccproxy/_version.py +2 -2
- ccproxy/adapters/openai/__init__.py +1 -2
- ccproxy/adapters/openai/adapter.py +218 -180
- ccproxy/adapters/openai/streaming.py +247 -65
- ccproxy/api/__init__.py +0 -3
- ccproxy/api/app.py +173 -40
- ccproxy/api/dependencies.py +65 -3
- ccproxy/api/middleware/errors.py +3 -7
- ccproxy/api/middleware/headers.py +0 -2
- ccproxy/api/middleware/logging.py +4 -3
- ccproxy/api/middleware/request_content_logging.py +297 -0
- ccproxy/api/middleware/request_id.py +5 -0
- ccproxy/api/middleware/server_header.py +0 -4
- ccproxy/api/routes/__init__.py +9 -1
- ccproxy/api/routes/claude.py +23 -32
- ccproxy/api/routes/health.py +58 -4
- ccproxy/api/routes/mcp.py +171 -0
- ccproxy/api/routes/metrics.py +4 -8
- ccproxy/api/routes/permissions.py +217 -0
- ccproxy/api/routes/proxy.py +0 -53
- ccproxy/api/services/__init__.py +6 -0
- ccproxy/api/services/permission_service.py +368 -0
- ccproxy/api/ui/__init__.py +6 -0
- ccproxy/api/ui/permission_handler_protocol.py +33 -0
- ccproxy/api/ui/terminal_permission_handler.py +593 -0
- ccproxy/auth/conditional.py +2 -2
- ccproxy/auth/dependencies.py +1 -1
- ccproxy/auth/oauth/models.py +0 -1
- ccproxy/auth/oauth/routes.py +1 -3
- ccproxy/auth/storage/json_file.py +0 -1
- ccproxy/auth/storage/keyring.py +0 -3
- ccproxy/claude_sdk/__init__.py +2 -0
- ccproxy/claude_sdk/client.py +91 -8
- ccproxy/claude_sdk/converter.py +405 -210
- ccproxy/claude_sdk/options.py +88 -19
- ccproxy/claude_sdk/parser.py +200 -0
- ccproxy/claude_sdk/streaming.py +286 -0
- ccproxy/cli/commands/__init__.py +5 -1
- ccproxy/cli/commands/auth.py +2 -4
- ccproxy/cli/commands/permission_handler.py +553 -0
- ccproxy/cli/commands/serve.py +52 -12
- ccproxy/cli/docker/params.py +0 -4
- ccproxy/cli/helpers.py +0 -2
- ccproxy/cli/main.py +6 -17
- ccproxy/cli/options/claude_options.py +41 -1
- ccproxy/cli/options/core_options.py +0 -3
- ccproxy/cli/options/security_options.py +0 -2
- ccproxy/cli/options/server_options.py +3 -2
- ccproxy/config/auth.py +0 -1
- ccproxy/config/claude.py +78 -2
- ccproxy/config/discovery.py +0 -1
- ccproxy/config/docker_settings.py +0 -1
- ccproxy/config/loader.py +1 -4
- ccproxy/config/scheduler.py +20 -0
- ccproxy/config/security.py +7 -2
- ccproxy/config/server.py +5 -0
- ccproxy/config/settings.py +15 -7
- ccproxy/config/validators.py +1 -1
- ccproxy/core/async_utils.py +1 -4
- ccproxy/core/errors.py +45 -1
- ccproxy/core/http_transformers.py +4 -3
- ccproxy/core/interfaces.py +2 -2
- ccproxy/core/logging.py +97 -95
- ccproxy/core/middleware.py +1 -1
- ccproxy/core/proxy.py +1 -1
- ccproxy/core/transformers.py +1 -1
- ccproxy/core/types.py +1 -1
- ccproxy/docker/models.py +1 -1
- ccproxy/docker/protocol.py +0 -3
- ccproxy/models/__init__.py +41 -0
- ccproxy/models/claude_sdk.py +420 -0
- ccproxy/models/messages.py +45 -18
- ccproxy/models/permissions.py +115 -0
- ccproxy/models/requests.py +1 -1
- ccproxy/models/responses.py +64 -1
- ccproxy/observability/access_logger.py +1 -2
- ccproxy/observability/context.py +17 -1
- ccproxy/observability/metrics.py +1 -3
- ccproxy/observability/pushgateway.py +0 -2
- ccproxy/observability/stats_printer.py +2 -4
- ccproxy/observability/storage/duckdb_simple.py +1 -1
- ccproxy/observability/storage/models.py +0 -1
- ccproxy/pricing/cache.py +0 -1
- ccproxy/pricing/loader.py +5 -21
- ccproxy/pricing/updater.py +0 -1
- ccproxy/scheduler/__init__.py +1 -0
- ccproxy/scheduler/core.py +6 -6
- ccproxy/scheduler/manager.py +35 -7
- ccproxy/scheduler/registry.py +1 -1
- ccproxy/scheduler/tasks.py +127 -2
- ccproxy/services/claude_sdk_service.py +225 -329
- ccproxy/services/credentials/manager.py +0 -1
- ccproxy/services/credentials/oauth_client.py +1 -2
- ccproxy/services/proxy_service.py +93 -222
- ccproxy/testing/config.py +1 -1
- ccproxy/testing/mock_responses.py +0 -1
- ccproxy/utils/model_mapping.py +197 -0
- ccproxy/utils/models_provider.py +150 -0
- ccproxy/utils/simple_request_logger.py +284 -0
- ccproxy/utils/version_checker.py +184 -0
- {ccproxy_api-0.1.1.dist-info → ccproxy_api-0.1.3.dist-info}/METADATA +63 -2
- ccproxy_api-0.1.3.dist-info/RECORD +166 -0
- {ccproxy_api-0.1.1.dist-info → ccproxy_api-0.1.3.dist-info}/entry_points.txt +1 -0
- ccproxy_api-0.1.1.dist-info/RECORD +0 -149
- /ccproxy/scheduler/{exceptions.py → errors.py} +0 -0
- {ccproxy_api-0.1.1.dist-info → ccproxy_api-0.1.3.dist-info}/WHEEL +0 -0
- {ccproxy_api-0.1.1.dist-info → ccproxy_api-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,30 +1,30 @@
|
|
|
1
1
|
"""Claude SDK service orchestration for business logic."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from collections.abc import AsyncIterator
|
|
5
|
-
from dataclasses import asdict, is_dataclass
|
|
6
4
|
from typing import Any
|
|
7
5
|
|
|
8
6
|
import structlog
|
|
9
|
-
from claude_code_sdk import
|
|
10
|
-
AssistantMessage,
|
|
11
|
-
ClaudeCodeOptions,
|
|
12
|
-
ResultMessage,
|
|
13
|
-
SystemMessage,
|
|
14
|
-
)
|
|
7
|
+
from claude_code_sdk import ClaudeCodeOptions
|
|
15
8
|
|
|
16
|
-
from ccproxy.adapters.openai import adapter
|
|
17
9
|
from ccproxy.auth.manager import AuthManager
|
|
18
10
|
from ccproxy.claude_sdk.client import ClaudeSDKClient
|
|
19
11
|
from ccproxy.claude_sdk.converter import MessageConverter
|
|
20
12
|
from ccproxy.claude_sdk.options import OptionsHandler
|
|
13
|
+
from ccproxy.claude_sdk.streaming import ClaudeStreamProcessor
|
|
14
|
+
from ccproxy.config.claude import SDKMessageMode
|
|
15
|
+
from ccproxy.config.settings import Settings
|
|
21
16
|
from ccproxy.core.errors import (
|
|
17
|
+
AuthenticationError,
|
|
22
18
|
ClaudeProxyError,
|
|
23
19
|
ServiceUnavailableError,
|
|
24
20
|
)
|
|
21
|
+
from ccproxy.models import claude_sdk as sdk_models
|
|
22
|
+
from ccproxy.models.messages import MessageResponse
|
|
25
23
|
from ccproxy.observability.access_logger import log_request_access
|
|
26
24
|
from ccproxy.observability.context import RequestContext, request_context
|
|
27
25
|
from ccproxy.observability.metrics import PrometheusMetrics
|
|
26
|
+
from ccproxy.utils.model_mapping import map_model_to_claude
|
|
27
|
+
from ccproxy.utils.simple_request_logger import write_request_log
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
logger = structlog.get_logger(__name__)
|
|
@@ -44,6 +44,7 @@ class ClaudeSDKService:
|
|
|
44
44
|
sdk_client: ClaudeSDKClient | None = None,
|
|
45
45
|
auth_manager: AuthManager | None = None,
|
|
46
46
|
metrics: PrometheusMetrics | None = None,
|
|
47
|
+
settings: Settings | None = None,
|
|
47
48
|
) -> None:
|
|
48
49
|
"""
|
|
49
50
|
Initialize Claude SDK service.
|
|
@@ -52,12 +53,18 @@ class ClaudeSDKService:
|
|
|
52
53
|
sdk_client: Claude SDK client instance
|
|
53
54
|
auth_manager: Authentication manager (optional)
|
|
54
55
|
metrics: Prometheus metrics instance (optional)
|
|
56
|
+
settings: Application settings (optional)
|
|
55
57
|
"""
|
|
56
58
|
self.sdk_client = sdk_client or ClaudeSDKClient()
|
|
57
59
|
self.auth_manager = auth_manager
|
|
58
60
|
self.metrics = metrics
|
|
61
|
+
self.settings = settings
|
|
59
62
|
self.message_converter = MessageConverter()
|
|
60
|
-
self.options_handler = OptionsHandler()
|
|
63
|
+
self.options_handler = OptionsHandler(settings=settings)
|
|
64
|
+
self.stream_processor = ClaudeStreamProcessor(
|
|
65
|
+
message_converter=self.message_converter,
|
|
66
|
+
metrics=self.metrics,
|
|
67
|
+
)
|
|
61
68
|
|
|
62
69
|
async def create_completion(
|
|
63
70
|
self,
|
|
@@ -68,7 +75,7 @@ class ClaudeSDKService:
|
|
|
68
75
|
stream: bool = False,
|
|
69
76
|
user_id: str | None = None,
|
|
70
77
|
**kwargs: Any,
|
|
71
|
-
) ->
|
|
78
|
+
) -> MessageResponse | AsyncIterator[dict[str, Any]]:
|
|
72
79
|
"""
|
|
73
80
|
Create a completion using Claude SDK with business logic orchestration.
|
|
74
81
|
|
|
@@ -88,6 +95,7 @@ class ClaudeSDKService:
|
|
|
88
95
|
ClaudeProxyError: If request fails
|
|
89
96
|
ServiceUnavailableError: If service is unavailable
|
|
90
97
|
"""
|
|
98
|
+
|
|
91
99
|
# Validate authentication if auth manager is configured
|
|
92
100
|
if self.auth_manager and user_id:
|
|
93
101
|
try:
|
|
@@ -106,7 +114,7 @@ class ClaudeSDKService:
|
|
|
106
114
|
system_message = self.options_handler.extract_system_message(messages)
|
|
107
115
|
|
|
108
116
|
# Map model to Claude model
|
|
109
|
-
model =
|
|
117
|
+
model = map_model_to_claude(model)
|
|
110
118
|
|
|
111
119
|
options = self.options_handler.create_options(
|
|
112
120
|
model=model,
|
|
@@ -136,19 +144,34 @@ class ClaudeSDKService:
|
|
|
136
144
|
metrics=self.metrics, # Pass metrics for active request tracking
|
|
137
145
|
) as ctx:
|
|
138
146
|
try:
|
|
147
|
+
# Log SDK request parameters
|
|
148
|
+
timestamp = ctx.get_log_timestamp_prefix() if ctx else None
|
|
149
|
+
await self._log_sdk_request(
|
|
150
|
+
request_id, prompt, options, model, stream, timestamp
|
|
151
|
+
)
|
|
152
|
+
|
|
139
153
|
if stream:
|
|
140
154
|
# For streaming, return the async iterator directly
|
|
141
155
|
# Pass context to streaming method
|
|
142
156
|
return self._stream_completion(
|
|
143
|
-
prompt, options, model, request_id, ctx
|
|
157
|
+
prompt, options, model, request_id, ctx, timestamp
|
|
144
158
|
)
|
|
145
159
|
else:
|
|
146
160
|
result = await self._complete_non_streaming(
|
|
147
|
-
prompt, options, model, request_id, ctx
|
|
161
|
+
prompt, options, model, request_id, ctx, timestamp
|
|
148
162
|
)
|
|
149
163
|
return result
|
|
150
164
|
|
|
151
|
-
except
|
|
165
|
+
except AuthenticationError as e:
|
|
166
|
+
logger.error(
|
|
167
|
+
"authentication_failed",
|
|
168
|
+
user_id=user_id,
|
|
169
|
+
error=str(e),
|
|
170
|
+
error_type=type(e).__name__,
|
|
171
|
+
exc_info=True,
|
|
172
|
+
)
|
|
173
|
+
raise
|
|
174
|
+
except (ClaudeProxyError, ServiceUnavailableError) as e:
|
|
152
175
|
# Log error via access logger (includes metrics)
|
|
153
176
|
await log_request_access(
|
|
154
177
|
context=ctx,
|
|
@@ -162,11 +185,12 @@ class ClaudeSDKService:
|
|
|
162
185
|
async def _complete_non_streaming(
|
|
163
186
|
self,
|
|
164
187
|
prompt: str,
|
|
165
|
-
options: ClaudeCodeOptions,
|
|
188
|
+
options: "ClaudeCodeOptions",
|
|
166
189
|
model: str,
|
|
167
190
|
request_id: str | None = None,
|
|
168
191
|
ctx: RequestContext | None = None,
|
|
169
|
-
|
|
192
|
+
timestamp: str | None = None,
|
|
193
|
+
) -> MessageResponse:
|
|
170
194
|
"""
|
|
171
195
|
Complete a non-streaming request with business logic.
|
|
172
196
|
|
|
@@ -182,21 +206,19 @@ class ClaudeSDKService:
|
|
|
182
206
|
Raises:
|
|
183
207
|
ClaudeProxyError: If completion fails
|
|
184
208
|
"""
|
|
185
|
-
|
|
186
|
-
result_message = None
|
|
187
|
-
assistant_message = None
|
|
209
|
+
# SDK request already logged in create_completion
|
|
188
210
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
if isinstance(message, AssistantMessage):
|
|
194
|
-
assistant_message = message
|
|
195
|
-
elif isinstance(message, ResultMessage):
|
|
196
|
-
result_message = message
|
|
211
|
+
messages = [
|
|
212
|
+
m
|
|
213
|
+
async for m in self.sdk_client.query_completion(prompt, options, request_id)
|
|
214
|
+
]
|
|
197
215
|
|
|
198
|
-
|
|
199
|
-
|
|
216
|
+
result_message = next(
|
|
217
|
+
(m for m in messages if isinstance(m, sdk_models.ResultMessage)), None
|
|
218
|
+
)
|
|
219
|
+
assistant_message = next(
|
|
220
|
+
(m for m in messages if isinstance(m, sdk_models.AssistantMessage)), None
|
|
221
|
+
)
|
|
200
222
|
|
|
201
223
|
if result_message is None:
|
|
202
224
|
raise ClaudeProxyError(
|
|
@@ -213,65 +235,104 @@ class ClaudeSDKService:
|
|
|
213
235
|
)
|
|
214
236
|
|
|
215
237
|
logger.debug("claude_sdk_completion_received")
|
|
216
|
-
|
|
238
|
+
mode = (
|
|
239
|
+
self.settings.claude.sdk_message_mode
|
|
240
|
+
if self.settings
|
|
241
|
+
else SDKMessageMode.FORWARD
|
|
242
|
+
)
|
|
243
|
+
pretty_format = self.settings.claude.pretty_format if self.settings else True
|
|
244
|
+
|
|
217
245
|
response = self.message_converter.convert_to_anthropic_response(
|
|
218
|
-
assistant_message, result_message, model
|
|
246
|
+
assistant_message, result_message, model, mode, pretty_format
|
|
219
247
|
)
|
|
220
248
|
|
|
221
|
-
#
|
|
249
|
+
# Add other message types to the content block
|
|
250
|
+
all_messages = [
|
|
251
|
+
m
|
|
252
|
+
for m in messages
|
|
253
|
+
if not isinstance(m, sdk_models.AssistantMessage | sdk_models.ResultMessage)
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
if mode != SDKMessageMode.IGNORE and response.content:
|
|
257
|
+
for message in all_messages:
|
|
258
|
+
if isinstance(message, sdk_models.SystemMessage):
|
|
259
|
+
content_block = self.message_converter._create_sdk_content_block(
|
|
260
|
+
sdk_object=message,
|
|
261
|
+
mode=mode,
|
|
262
|
+
pretty_format=pretty_format,
|
|
263
|
+
xml_tag="system_message",
|
|
264
|
+
forward_converter=lambda obj: {
|
|
265
|
+
"type": "system_message",
|
|
266
|
+
"text": obj.model_dump_json(separators=(",", ":")),
|
|
267
|
+
},
|
|
268
|
+
)
|
|
269
|
+
if content_block:
|
|
270
|
+
# Only validate as SDKMessageMode if it's a system_message type
|
|
271
|
+
if content_block.get("type") == "system_message":
|
|
272
|
+
response.content.append(
|
|
273
|
+
sdk_models.SDKMessageMode.model_validate(content_block)
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
# For other types (like text blocks in FORMATTED mode), create appropriate content block
|
|
277
|
+
if content_block.get("type") == "text":
|
|
278
|
+
response.content.append(
|
|
279
|
+
sdk_models.TextBlock.model_validate(content_block)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
# Fallback for other content block types
|
|
283
|
+
logger.warning(
|
|
284
|
+
"unknown_content_block_type",
|
|
285
|
+
content_block_type=content_block.get("type"),
|
|
286
|
+
)
|
|
287
|
+
elif isinstance(message, sdk_models.UserMessage):
|
|
288
|
+
for block in message.content:
|
|
289
|
+
if isinstance(block, sdk_models.ToolResultBlock):
|
|
290
|
+
response.content.append(block)
|
|
291
|
+
|
|
222
292
|
cost_usd = result_message.total_cost_usd
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
else:
|
|
229
|
-
tokens_input = tokens_output = cache_read_tokens = cache_write_tokens = None
|
|
230
|
-
|
|
231
|
-
# Add cost to response usage section if available
|
|
232
|
-
if cost_usd is not None and "usage" in response:
|
|
233
|
-
response["usage"]["cost_usd"] = cost_usd
|
|
234
|
-
|
|
235
|
-
# Log metrics for observability
|
|
293
|
+
usage = result_message.usage_model
|
|
294
|
+
|
|
295
|
+
# if cost_usd is not None and response.usage:
|
|
296
|
+
# response.usage.cost_usd = cost_usd
|
|
297
|
+
|
|
236
298
|
logger.debug(
|
|
237
299
|
"claude_sdk_completion_completed",
|
|
238
300
|
model=model,
|
|
239
|
-
tokens_input=
|
|
240
|
-
tokens_output=
|
|
241
|
-
cache_read_tokens=
|
|
242
|
-
cache_write_tokens=
|
|
301
|
+
tokens_input=usage.input_tokens,
|
|
302
|
+
tokens_output=usage.output_tokens,
|
|
303
|
+
cache_read_tokens=usage.cache_read_input_tokens,
|
|
304
|
+
cache_write_tokens=usage.cache_creation_input_tokens,
|
|
243
305
|
cost_usd=cost_usd,
|
|
244
306
|
request_id=request_id,
|
|
245
307
|
)
|
|
246
308
|
|
|
247
|
-
# Update context with metrics if available
|
|
248
309
|
if ctx:
|
|
249
310
|
ctx.add_metadata(
|
|
250
311
|
status_code=200,
|
|
251
|
-
tokens_input=
|
|
252
|
-
tokens_output=
|
|
253
|
-
cache_read_tokens=
|
|
254
|
-
cache_write_tokens=
|
|
312
|
+
tokens_input=usage.input_tokens,
|
|
313
|
+
tokens_output=usage.output_tokens,
|
|
314
|
+
cache_read_tokens=usage.cache_read_input_tokens,
|
|
315
|
+
cache_write_tokens=usage.cache_creation_input_tokens,
|
|
255
316
|
cost_usd=cost_usd,
|
|
256
317
|
)
|
|
257
|
-
|
|
258
|
-
# Log comprehensive access log (includes Prometheus metrics)
|
|
259
318
|
await log_request_access(
|
|
260
|
-
context=ctx,
|
|
261
|
-
status_code=200,
|
|
262
|
-
method="POST",
|
|
263
|
-
metrics=self.metrics,
|
|
319
|
+
context=ctx, status_code=200, method="POST", metrics=self.metrics
|
|
264
320
|
)
|
|
265
321
|
|
|
322
|
+
# Log SDK response
|
|
323
|
+
if request_id:
|
|
324
|
+
await self._log_sdk_response(request_id, response, timestamp)
|
|
325
|
+
|
|
266
326
|
return response
|
|
267
327
|
|
|
268
328
|
async def _stream_completion(
|
|
269
329
|
self,
|
|
270
330
|
prompt: str,
|
|
271
|
-
options: ClaudeCodeOptions,
|
|
331
|
+
options: "ClaudeCodeOptions",
|
|
272
332
|
model: str,
|
|
273
333
|
request_id: str | None = None,
|
|
274
334
|
ctx: RequestContext | None = None,
|
|
335
|
+
timestamp: str | None = None,
|
|
275
336
|
) -> AsyncIterator[dict[str, Any]]:
|
|
276
337
|
"""
|
|
277
338
|
Stream completion responses with business logic.
|
|
@@ -280,143 +341,33 @@ class ClaudeSDKService:
|
|
|
280
341
|
prompt: The formatted prompt
|
|
281
342
|
options: Claude SDK options
|
|
282
343
|
model: The model being used
|
|
344
|
+
request_id: Optional request ID for logging
|
|
345
|
+
ctx: Optional request context for metrics
|
|
283
346
|
|
|
284
347
|
Yields:
|
|
285
348
|
Response chunks in Anthropic format
|
|
286
349
|
"""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
try:
|
|
294
|
-
async for message in self.sdk_client.query_completion(
|
|
295
|
-
prompt, options, request_id
|
|
296
|
-
):
|
|
297
|
-
message_count += 1
|
|
298
|
-
logger.debug(
|
|
299
|
-
"streaming_message_received",
|
|
300
|
-
message_count=message_count,
|
|
301
|
-
message_type=type(message).__name__,
|
|
302
|
-
request_id=request_id,
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
if first_chunk:
|
|
306
|
-
# Send initial chunk
|
|
307
|
-
yield self.message_converter.create_streaming_start_chunk(
|
|
308
|
-
f"msg_{id(message)}", model
|
|
309
|
-
)
|
|
310
|
-
first_chunk = False
|
|
311
|
-
|
|
312
|
-
# TODO: instead of creating one message we should create a list of messages
|
|
313
|
-
# and this will be serialized back in one messsage by the adapter.
|
|
314
|
-
# to do that we have to create the different type of messsages
|
|
315
|
-
# in anthropic models
|
|
316
|
-
if isinstance(message, SystemMessage):
|
|
317
|
-
# Serialize dataclass to JSON
|
|
318
|
-
text_content = f"<system>{json.dumps(asdict(message))}</system>"
|
|
319
|
-
yield self.message_converter.create_streaming_delta_chunk(
|
|
320
|
-
text_content
|
|
321
|
-
)
|
|
322
|
-
elif isinstance(message, AssistantMessage):
|
|
323
|
-
assistant_messages.append(message)
|
|
350
|
+
sdk_message_mode = (
|
|
351
|
+
self.settings.claude.sdk_message_mode
|
|
352
|
+
if self.settings
|
|
353
|
+
else SDKMessageMode.FORWARD
|
|
354
|
+
)
|
|
355
|
+
pretty_format = self.settings.claude.pretty_format if self.settings else True
|
|
324
356
|
|
|
325
|
-
|
|
326
|
-
text_content = self.message_converter.extract_contents(
|
|
327
|
-
message.content
|
|
328
|
-
)
|
|
357
|
+
sdk_stream = self.sdk_client.query_completion(prompt, options, request_id)
|
|
329
358
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
if message.usage:
|
|
343
|
-
tokens_input = message.usage.get("input_tokens")
|
|
344
|
-
tokens_output = message.usage.get("output_tokens")
|
|
345
|
-
cache_read_tokens = message.usage.get("cache_read_input_tokens")
|
|
346
|
-
cache_write_tokens = message.usage.get(
|
|
347
|
-
"cache_creation_input_tokens"
|
|
348
|
-
)
|
|
349
|
-
else:
|
|
350
|
-
tokens_input = tokens_output = cache_read_tokens = (
|
|
351
|
-
cache_write_tokens
|
|
352
|
-
) = None
|
|
353
|
-
|
|
354
|
-
# Log streaming completion metrics
|
|
355
|
-
logger.debug(
|
|
356
|
-
"streaming_completion_completed",
|
|
357
|
-
model=model,
|
|
358
|
-
tokens_input=tokens_input,
|
|
359
|
-
tokens_output=tokens_output,
|
|
360
|
-
cache_read_tokens=cache_read_tokens,
|
|
361
|
-
cache_write_tokens=cache_write_tokens,
|
|
362
|
-
cost_usd=cost_usd,
|
|
363
|
-
message_count=message_count,
|
|
364
|
-
request_id=request_id,
|
|
365
|
-
)
|
|
366
|
-
|
|
367
|
-
# Update context with metrics if available
|
|
368
|
-
if ctx:
|
|
369
|
-
ctx.add_metadata(
|
|
370
|
-
status_code=200,
|
|
371
|
-
tokens_input=tokens_input,
|
|
372
|
-
tokens_output=tokens_output,
|
|
373
|
-
cache_read_tokens=cache_read_tokens,
|
|
374
|
-
cache_write_tokens=cache_write_tokens,
|
|
375
|
-
cost_usd=cost_usd,
|
|
376
|
-
)
|
|
377
|
-
|
|
378
|
-
# Log comprehensive access log for streaming completion
|
|
379
|
-
await log_request_access(
|
|
380
|
-
context=ctx,
|
|
381
|
-
status_code=200,
|
|
382
|
-
method="POST",
|
|
383
|
-
metrics=self.metrics,
|
|
384
|
-
event_type="streaming_complete",
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
# Send final chunk with usage and cost information
|
|
388
|
-
final_chunk = self.message_converter.create_streaming_end_chunk()
|
|
389
|
-
|
|
390
|
-
# Add usage information to final chunk
|
|
391
|
-
if tokens_input or tokens_output or cost_usd:
|
|
392
|
-
usage_info = {}
|
|
393
|
-
if tokens_input:
|
|
394
|
-
usage_info["input_tokens"] = tokens_input
|
|
395
|
-
if tokens_output:
|
|
396
|
-
usage_info["output_tokens"] = tokens_output
|
|
397
|
-
if cost_usd is not None:
|
|
398
|
-
usage_info["cost_usd"] = cost_usd
|
|
399
|
-
|
|
400
|
-
# Update the usage in the final chunk
|
|
401
|
-
final_chunk["usage"].update(usage_info)
|
|
402
|
-
|
|
403
|
-
yield final_chunk
|
|
404
|
-
|
|
405
|
-
break
|
|
406
|
-
|
|
407
|
-
except asyncio.CancelledError:
|
|
408
|
-
logger.debug("streaming_completion_cancelled", request_id=request_id)
|
|
409
|
-
raise
|
|
410
|
-
except Exception as e:
|
|
411
|
-
logger.error(
|
|
412
|
-
"streaming_completion_failed",
|
|
413
|
-
error=str(e),
|
|
414
|
-
error_type=type(e).__name__,
|
|
415
|
-
request_id=request_id,
|
|
416
|
-
exc_info=True,
|
|
417
|
-
)
|
|
418
|
-
# Don't yield error chunk - let exception propagate for proper HTTP error response
|
|
419
|
-
raise
|
|
359
|
+
async for chunk in self.stream_processor.process_stream(
|
|
360
|
+
sdk_stream=sdk_stream,
|
|
361
|
+
model=model,
|
|
362
|
+
request_id=request_id,
|
|
363
|
+
ctx=ctx,
|
|
364
|
+
sdk_message_mode=sdk_message_mode,
|
|
365
|
+
pretty_format=pretty_format,
|
|
366
|
+
):
|
|
367
|
+
# Log streaming chunk
|
|
368
|
+
if request_id:
|
|
369
|
+
await self._log_sdk_streaming_chunk(request_id, chunk, timestamp)
|
|
370
|
+
yield chunk
|
|
420
371
|
|
|
421
372
|
async def _validate_user_auth(self, user_id: str) -> None:
|
|
422
373
|
"""
|
|
@@ -430,158 +381,103 @@ class ClaudeSDKService:
|
|
|
430
381
|
"""
|
|
431
382
|
if not self.auth_manager:
|
|
432
383
|
return
|
|
433
|
-
|
|
434
|
-
# Implement authentication validation logic
|
|
435
|
-
# This is a placeholder for future auth integration
|
|
436
384
|
logger.debug("user_auth_validation_start", user_id=user_id)
|
|
437
385
|
|
|
438
|
-
def
|
|
386
|
+
async def _log_sdk_request(
|
|
439
387
|
self,
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
388
|
+
request_id: str,
|
|
389
|
+
prompt: str,
|
|
390
|
+
options: "ClaudeCodeOptions",
|
|
391
|
+
model: str,
|
|
392
|
+
stream: bool,
|
|
393
|
+
timestamp: str | None = None,
|
|
394
|
+
) -> None:
|
|
395
|
+
"""Log SDK input parameters as JSON dump.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
request_id: Request identifier
|
|
399
|
+
prompt: The formatted prompt
|
|
400
|
+
options: Claude SDK options
|
|
401
|
+
model: The model being used
|
|
402
|
+
stream: Whether streaming is enabled
|
|
403
|
+
timestamp: Optional timestamp prefix
|
|
446
404
|
"""
|
|
447
|
-
|
|
405
|
+
# timestamp is already provided from context, no need for fallback
|
|
406
|
+
|
|
407
|
+
# JSON dump of the parameters passed to SDK completion
|
|
408
|
+
sdk_request_data = {
|
|
409
|
+
"prompt": prompt,
|
|
410
|
+
"options": options.model_dump()
|
|
411
|
+
if hasattr(options, "model_dump")
|
|
412
|
+
else str(options),
|
|
413
|
+
"model": model,
|
|
414
|
+
"stream": stream,
|
|
415
|
+
"request_id": request_id,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
await write_request_log(
|
|
419
|
+
request_id=request_id,
|
|
420
|
+
log_type="sdk_request",
|
|
421
|
+
data=sdk_request_data,
|
|
422
|
+
timestamp=timestamp,
|
|
423
|
+
)
|
|
448
424
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
425
|
+
async def _log_sdk_response(
|
|
426
|
+
self,
|
|
427
|
+
request_id: str,
|
|
428
|
+
result: Any,
|
|
429
|
+
timestamp: str | None = None,
|
|
430
|
+
) -> None:
|
|
431
|
+
"""Log SDK response result as JSON dump.
|
|
452
432
|
|
|
453
433
|
Args:
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
cache_read_tokens: Number of cache read tokens
|
|
458
|
-
cache_write_tokens: Number of cache write tokens
|
|
459
|
-
|
|
460
|
-
Returns:
|
|
461
|
-
Cost in USD or None if calculation not possible
|
|
434
|
+
request_id: Request identifier
|
|
435
|
+
result: The result from _complete_non_streaming
|
|
436
|
+
timestamp: Optional timestamp prefix
|
|
462
437
|
"""
|
|
463
|
-
from
|
|
438
|
+
# timestamp is already provided from context, no need for fallback
|
|
464
439
|
|
|
465
|
-
|
|
466
|
-
|
|
440
|
+
# JSON dump of the result from _complete_non_streaming
|
|
441
|
+
sdk_response_data = {
|
|
442
|
+
"result": result.model_dump()
|
|
443
|
+
if hasattr(result, "model_dump")
|
|
444
|
+
else str(result),
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
await write_request_log(
|
|
448
|
+
request_id=request_id,
|
|
449
|
+
log_type="sdk_response",
|
|
450
|
+
data=sdk_response_data,
|
|
451
|
+
timestamp=timestamp,
|
|
467
452
|
)
|
|
468
453
|
|
|
469
|
-
async def
|
|
470
|
-
|
|
471
|
-
|
|
454
|
+
async def _log_sdk_streaming_chunk(
|
|
455
|
+
self,
|
|
456
|
+
request_id: str,
|
|
457
|
+
chunk: dict[str, Any],
|
|
458
|
+
timestamp: str | None = None,
|
|
459
|
+
) -> None:
|
|
460
|
+
"""Log streaming chunk as JSON dump.
|
|
472
461
|
|
|
473
|
-
|
|
474
|
-
|
|
462
|
+
Args:
|
|
463
|
+
request_id: Request identifier
|
|
464
|
+
chunk: The streaming chunk from process_stream
|
|
465
|
+
timestamp: Optional timestamp prefix
|
|
475
466
|
"""
|
|
476
|
-
#
|
|
477
|
-
supported_models = self.options_handler.get_supported_models()
|
|
478
|
-
|
|
479
|
-
# Create Anthropic-style model entries
|
|
480
|
-
anthropic_models = []
|
|
481
|
-
for model_id in supported_models:
|
|
482
|
-
anthropic_models.append(
|
|
483
|
-
{
|
|
484
|
-
"type": "model",
|
|
485
|
-
"id": model_id,
|
|
486
|
-
"display_name": self._get_display_name(model_id),
|
|
487
|
-
"created_at": self._get_created_timestamp(model_id),
|
|
488
|
-
}
|
|
489
|
-
)
|
|
467
|
+
# timestamp is already provided from context, no need for fallback
|
|
490
468
|
|
|
491
|
-
#
|
|
492
|
-
|
|
493
|
-
{
|
|
494
|
-
"id": "gpt-4o",
|
|
495
|
-
"object": "model",
|
|
496
|
-
"created": 1715367049,
|
|
497
|
-
"owned_by": "openai",
|
|
498
|
-
},
|
|
499
|
-
{
|
|
500
|
-
"id": "gpt-4o-mini",
|
|
501
|
-
"object": "model",
|
|
502
|
-
"created": 1721172741,
|
|
503
|
-
"owned_by": "openai",
|
|
504
|
-
},
|
|
505
|
-
{
|
|
506
|
-
"id": "gpt-4-turbo",
|
|
507
|
-
"object": "model",
|
|
508
|
-
"created": 1712361441,
|
|
509
|
-
"owned_by": "openai",
|
|
510
|
-
},
|
|
511
|
-
{
|
|
512
|
-
"id": "gpt-4-turbo-preview",
|
|
513
|
-
"object": "model",
|
|
514
|
-
"created": 1706037777,
|
|
515
|
-
"owned_by": "openai",
|
|
516
|
-
},
|
|
517
|
-
{
|
|
518
|
-
"id": "o1",
|
|
519
|
-
"object": "model",
|
|
520
|
-
"created": 1734375816,
|
|
521
|
-
"owned_by": "openai",
|
|
522
|
-
},
|
|
523
|
-
{
|
|
524
|
-
"id": "o1-mini",
|
|
525
|
-
"object": "model",
|
|
526
|
-
"created": 1725649008,
|
|
527
|
-
"owned_by": "openai",
|
|
528
|
-
},
|
|
529
|
-
{
|
|
530
|
-
"id": "o1-preview",
|
|
531
|
-
"object": "model",
|
|
532
|
-
"created": 1725648897,
|
|
533
|
-
"owned_by": "openai",
|
|
534
|
-
},
|
|
535
|
-
{
|
|
536
|
-
"id": "o3",
|
|
537
|
-
"object": "model",
|
|
538
|
-
"created": 1744225308,
|
|
539
|
-
"owned_by": "openai",
|
|
540
|
-
},
|
|
541
|
-
{
|
|
542
|
-
"id": "o3-mini",
|
|
543
|
-
"object": "model",
|
|
544
|
-
"created": 1737146383,
|
|
545
|
-
"owned_by": "openai",
|
|
546
|
-
},
|
|
547
|
-
]
|
|
469
|
+
# Append streaming chunk as JSON to raw file
|
|
470
|
+
import json
|
|
548
471
|
|
|
549
|
-
|
|
550
|
-
return {
|
|
551
|
-
"data": anthropic_models + openai_models,
|
|
552
|
-
"has_more": False,
|
|
553
|
-
"object": "list",
|
|
554
|
-
}
|
|
472
|
+
from ccproxy.utils.simple_request_logger import append_streaming_log
|
|
555
473
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
"
|
|
560
|
-
"
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
"claude-3-5-haiku-20241022": "Claude Haiku 3.5",
|
|
564
|
-
"claude-3-5-haiku-latest": "Claude Haiku 3.5",
|
|
565
|
-
"claude-3-5-sonnet-20240620": "Claude Sonnet 3.5 (Old)",
|
|
566
|
-
"claude-3-haiku-20240307": "Claude Haiku 3",
|
|
567
|
-
"claude-3-opus-20240229": "Claude Opus 3",
|
|
568
|
-
}
|
|
569
|
-
return display_names.get(model_id, model_id)
|
|
570
|
-
|
|
571
|
-
def _get_created_timestamp(self, model_id: str) -> int:
|
|
572
|
-
"""Get created timestamp for a model ID."""
|
|
573
|
-
timestamps = {
|
|
574
|
-
"claude-opus-4-20250514": 1747526400, # 2025-05-22
|
|
575
|
-
"claude-sonnet-4-20250514": 1747526400, # 2025-05-22
|
|
576
|
-
"claude-3-7-sonnet-20250219": 1740268800, # 2025-02-24
|
|
577
|
-
"claude-3-5-sonnet-20241022": 1729555200, # 2024-10-22
|
|
578
|
-
"claude-3-5-haiku-20241022": 1729555200, # 2024-10-22
|
|
579
|
-
"claude-3-5-haiku-latest": 1729555200, # 2024-10-22
|
|
580
|
-
"claude-3-5-sonnet-20240620": 1718841600, # 2024-06-20
|
|
581
|
-
"claude-3-haiku-20240307": 1709769600, # 2024-03-07
|
|
582
|
-
"claude-3-opus-20240229": 1709164800, # 2024-02-29
|
|
583
|
-
}
|
|
584
|
-
return timestamps.get(model_id, 1677610602) # Default timestamp
|
|
474
|
+
chunk_data = json.dumps(chunk, default=str) + "\n"
|
|
475
|
+
await append_streaming_log(
|
|
476
|
+
request_id=request_id,
|
|
477
|
+
log_type="sdk_streaming",
|
|
478
|
+
data=chunk_data.encode("utf-8"),
|
|
479
|
+
timestamp=timestamp,
|
|
480
|
+
)
|
|
585
481
|
|
|
586
482
|
async def validate_health(self) -> bool:
|
|
587
483
|
"""
|