ccproxy-api 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ccproxy/_version.py +2 -2
  2. ccproxy/adapters/openai/adapter.py +1 -1
  3. ccproxy/adapters/openai/streaming.py +1 -0
  4. ccproxy/api/app.py +134 -224
  5. ccproxy/api/dependencies.py +22 -2
  6. ccproxy/api/middleware/errors.py +27 -3
  7. ccproxy/api/middleware/logging.py +4 -0
  8. ccproxy/api/responses.py +6 -1
  9. ccproxy/api/routes/claude.py +222 -17
  10. ccproxy/api/routes/proxy.py +25 -6
  11. ccproxy/api/services/permission_service.py +2 -2
  12. ccproxy/claude_sdk/__init__.py +4 -8
  13. ccproxy/claude_sdk/client.py +661 -131
  14. ccproxy/claude_sdk/exceptions.py +16 -0
  15. ccproxy/claude_sdk/manager.py +219 -0
  16. ccproxy/claude_sdk/message_queue.py +342 -0
  17. ccproxy/claude_sdk/options.py +5 -0
  18. ccproxy/claude_sdk/session_client.py +546 -0
  19. ccproxy/claude_sdk/session_pool.py +550 -0
  20. ccproxy/claude_sdk/stream_handle.py +538 -0
  21. ccproxy/claude_sdk/stream_worker.py +392 -0
  22. ccproxy/claude_sdk/streaming.py +53 -11
  23. ccproxy/cli/commands/serve.py +96 -0
  24. ccproxy/cli/options/claude_options.py +47 -0
  25. ccproxy/config/__init__.py +0 -3
  26. ccproxy/config/claude.py +171 -23
  27. ccproxy/config/discovery.py +10 -1
  28. ccproxy/config/scheduler.py +4 -4
  29. ccproxy/config/settings.py +19 -1
  30. ccproxy/core/http_transformers.py +305 -73
  31. ccproxy/core/logging.py +108 -12
  32. ccproxy/core/transformers.py +5 -0
  33. ccproxy/models/claude_sdk.py +57 -0
  34. ccproxy/models/detection.py +126 -0
  35. ccproxy/observability/access_logger.py +72 -14
  36. ccproxy/observability/metrics.py +151 -0
  37. ccproxy/observability/storage/duckdb_simple.py +12 -0
  38. ccproxy/observability/storage/models.py +16 -0
  39. ccproxy/observability/streaming_response.py +107 -0
  40. ccproxy/scheduler/manager.py +31 -6
  41. ccproxy/scheduler/tasks.py +122 -0
  42. ccproxy/services/claude_detection_service.py +269 -0
  43. ccproxy/services/claude_sdk_service.py +333 -130
  44. ccproxy/services/proxy_service.py +91 -200
  45. ccproxy/utils/__init__.py +9 -1
  46. ccproxy/utils/disconnection_monitor.py +83 -0
  47. ccproxy/utils/id_generator.py +12 -0
  48. ccproxy/utils/startup_helpers.py +408 -0
  49. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/METADATA +29 -2
  50. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/RECORD +53 -41
  51. ccproxy/config/loader.py +0 -105
  52. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/WHEEL +0 -0
  53. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/entry_points.txt +0 -0
  54. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@ from ccproxy.adapters.openai.adapter import (
14
14
  )
15
15
  from ccproxy.api.dependencies import ClaudeServiceDep
16
16
  from ccproxy.models.messages import MessageCreateParams, MessageResponse
17
+ from ccproxy.observability.streaming_response import StreamingResponseWithLogging
17
18
 
18
19
 
19
20
  # Create the router for Claude SDK endpoints
@@ -24,9 +25,9 @@ logger = structlog.get_logger(__name__)
24
25
 
25
26
  @router.post("/v1/chat/completions", response_model=None)
26
27
  async def create_openai_chat_completion(
27
- request: Request,
28
28
  openai_request: OpenAIChatCompletionRequest,
29
29
  claude_service: ClaudeServiceDep,
30
+ request: Request,
30
31
  ) -> StreamingResponse | OpenAIChatCompletionResponse:
31
32
  """Create a chat completion using Claude SDK with OpenAI-compatible format.
32
33
 
@@ -43,13 +44,102 @@ async def create_openai_chat_completion(
43
44
  # Extract stream parameter
44
45
  stream = openai_request.stream or False
45
46
 
47
+ # Get request context from middleware
48
+ request_context = getattr(request.state, "context", None)
49
+
50
+ if request_context is None:
51
+ raise HTTPException(
52
+ status_code=500, detail="Internal server error: no request context"
53
+ )
54
+
46
55
  # Call Claude SDK service with adapted request
47
- if request and hasattr(request, "state") and hasattr(request.state, "context"):
48
- # Use existing context from middleware
49
- ctx = request.state.context
50
- # Add service-specific metadata
51
- ctx.add_metadata(streaming=stream)
56
+ response = await claude_service.create_completion(
57
+ messages=anthropic_request["messages"],
58
+ model=anthropic_request["model"],
59
+ temperature=anthropic_request.get("temperature"),
60
+ max_tokens=anthropic_request.get("max_tokens"),
61
+ stream=stream,
62
+ user_id=getattr(openai_request, "user", None),
63
+ request_context=request_context,
64
+ )
65
+
66
+ if stream:
67
+ # Handle streaming response
68
+ async def openai_stream_generator() -> AsyncIterator[bytes]:
69
+ # Use adapt_stream for streaming responses
70
+ async for openai_chunk in adapter.adapt_stream(response): # type: ignore[arg-type]
71
+ yield f"data: {json.dumps(openai_chunk)}\n\n".encode()
72
+ # Send final chunk
73
+ yield b"data: [DONE]\n\n"
52
74
 
75
+ # Use unified streaming wrapper with logging
76
+ return StreamingResponseWithLogging(
77
+ content=openai_stream_generator(),
78
+ request_context=request_context,
79
+ metrics=getattr(claude_service, "metrics", None),
80
+ status_code=200,
81
+ media_type="text/event-stream",
82
+ headers={
83
+ "Cache-Control": "no-cache",
84
+ "Connection": "keep-alive",
85
+ },
86
+ )
87
+ else:
88
+ # Convert non-streaming response to OpenAI format using adapter
89
+ # Convert MessageResponse model to dict for adapter
90
+ # In non-streaming mode, response should always be MessageResponse
91
+ assert isinstance(response, MessageResponse), (
92
+ "Non-streaming response must be MessageResponse"
93
+ )
94
+ response_dict = response.model_dump()
95
+ openai_response = adapter.adapt_response(response_dict)
96
+ return OpenAIChatCompletionResponse.model_validate(openai_response)
97
+
98
+ except Exception as e:
99
+ # Re-raise specific proxy errors to be handled by the error handler
100
+ from ccproxy.core.errors import ClaudeProxyError
101
+
102
+ if isinstance(e, ClaudeProxyError):
103
+ raise
104
+ raise HTTPException(
105
+ status_code=500, detail=f"Internal server error: {str(e)}"
106
+ ) from e
107
+
108
+
109
+ @router.post(
110
+ "/{session_id}/v1/chat/completions",
111
+ response_model=None,
112
+ )
113
+ async def create_openai_chat_completion_with_session(
114
+ session_id: str,
115
+ openai_request: OpenAIChatCompletionRequest,
116
+ claude_service: ClaudeServiceDep,
117
+ request: Request,
118
+ ) -> StreamingResponse | OpenAIChatCompletionResponse:
119
+ """Create a chat completion using Claude SDK with OpenAI-compatible format and session ID.
120
+
121
+ This endpoint handles OpenAI API format requests with session ID and converts them
122
+ to Anthropic format before using the Claude SDK directly.
123
+ """
124
+ try:
125
+ # Create adapter instance
126
+ adapter = OpenAIAdapter()
127
+
128
+ # Convert entire OpenAI request to Anthropic format using adapter
129
+ anthropic_request = adapter.adapt_request(openai_request.model_dump())
130
+
131
+ # Extract stream parameter
132
+ stream = openai_request.stream or False
133
+
134
+ # Get request context from middleware
135
+ request_context = getattr(request.state, "context", None)
136
+
137
+ if request_context is None:
138
+ raise HTTPException(
139
+ status_code=500, detail="Internal server error: no request context"
140
+ )
141
+
142
+ # Call Claude SDK service with adapted request and session_id
53
143
  response = await claude_service.create_completion(
54
144
  messages=anthropic_request["messages"],
55
145
  model=anthropic_request["model"],
@@ -57,6 +147,8 @@ async def create_openai_chat_completion(
57
147
  max_tokens=anthropic_request.get("max_tokens"),
58
148
  stream=stream,
59
149
  user_id=getattr(openai_request, "user", None),
150
+ session_id=session_id,
151
+ request_context=request_context,
60
152
  )
61
153
 
62
154
  if stream:
@@ -68,8 +160,13 @@ async def create_openai_chat_completion(
68
160
  # Send final chunk
69
161
  yield b"data: [DONE]\n\n"
70
162
 
71
- return StreamingResponse(
72
- openai_stream_generator(),
163
+ # Use unified streaming wrapper with logging
164
+ # Session interrupts are now handled directly by the StreamHandle
165
+ return StreamingResponseWithLogging(
166
+ content=openai_stream_generator(),
167
+ request_context=request_context,
168
+ metrics=getattr(claude_service, "metrics", None),
169
+ status_code=200,
73
170
  media_type="text/event-stream",
74
171
  headers={
75
172
  "Cache-Control": "no-cache",
@@ -98,10 +195,98 @@ async def create_openai_chat_completion(
98
195
  ) from e
99
196
 
100
197
 
198
+ @router.post(
199
+ "/{session_id}/v1/messages",
200
+ response_model=None,
201
+ )
202
+ async def create_anthropic_message_with_session(
203
+ session_id: str,
204
+ message_request: MessageCreateParams,
205
+ claude_service: ClaudeServiceDep,
206
+ request: Request,
207
+ ) -> StreamingResponse | MessageResponse:
208
+ """Create a message using Claude SDK with Anthropic format and session ID.
209
+
210
+ This endpoint handles Anthropic API format requests with session ID directly
211
+ using the Claude SDK without any format conversion.
212
+ """
213
+ try:
214
+ # Extract parameters from Anthropic request
215
+ messages = [msg.model_dump() for msg in message_request.messages]
216
+ model = message_request.model
217
+ temperature = message_request.temperature
218
+ max_tokens = message_request.max_tokens
219
+ stream = message_request.stream or False
220
+
221
+ # Get request context from middleware
222
+ request_context = getattr(request.state, "context", None)
223
+ if request_context is None:
224
+ raise HTTPException(
225
+ status_code=500, detail="Internal server error: no request context"
226
+ )
227
+
228
+ # Call Claude SDK service directly with Anthropic format and session_id
229
+ response = await claude_service.create_completion(
230
+ messages=messages,
231
+ model=model,
232
+ temperature=temperature,
233
+ max_tokens=max_tokens,
234
+ stream=stream,
235
+ user_id=getattr(message_request, "user_id", None),
236
+ session_id=session_id,
237
+ request_context=request_context,
238
+ )
239
+
240
+ if stream:
241
+ # Handle streaming response
242
+ async def anthropic_stream_generator() -> AsyncIterator[bytes]:
243
+ async for chunk in response: # type: ignore[union-attr]
244
+ if chunk:
245
+ # All chunks from Claude SDK streaming should be dict format
246
+ # and need proper SSE event formatting
247
+ if isinstance(chunk, dict):
248
+ # Determine event type from chunk type
249
+ event_type = chunk.get("type", "message_delta")
250
+ yield f"event: {event_type}\n".encode()
251
+ yield f"data: {json.dumps(chunk)}\n\n".encode()
252
+ else:
253
+ # Fallback for unexpected format
254
+ yield f"data: {json.dumps(chunk)}\n\n".encode()
255
+ # No final [DONE] chunk for Anthropic format
256
+
257
+ # Use unified streaming wrapper with logging
258
+ # Session interrupts are now handled directly by the StreamHandle
259
+ return StreamingResponseWithLogging(
260
+ content=anthropic_stream_generator(),
261
+ request_context=request_context,
262
+ metrics=getattr(claude_service, "metrics", None),
263
+ status_code=200,
264
+ media_type="text/event-stream",
265
+ headers={
266
+ "Cache-Control": "no-cache",
267
+ "Connection": "keep-alive",
268
+ },
269
+ )
270
+ else:
271
+ # Return Anthropic format response directly
272
+ return MessageResponse.model_validate(response)
273
+
274
+ except Exception as e:
275
+ # Re-raise specific proxy errors to be handled by the error handler
276
+ from ccproxy.core.errors import ClaudeProxyError
277
+
278
+ if isinstance(e, ClaudeProxyError):
279
+ raise e
280
+ raise HTTPException(
281
+ status_code=500, detail=f"Internal server error: {str(e)}"
282
+ ) from e
283
+
284
+
101
285
  @router.post("/v1/messages", response_model=None)
102
286
  async def create_anthropic_message(
103
- request: MessageCreateParams,
287
+ message_request: MessageCreateParams,
104
288
  claude_service: ClaudeServiceDep,
289
+ request: Request,
105
290
  ) -> StreamingResponse | MessageResponse:
106
291
  """Create a message using Claude SDK with Anthropic format.
107
292
 
@@ -110,11 +295,24 @@ async def create_anthropic_message(
110
295
  """
111
296
  try:
112
297
  # Extract parameters from Anthropic request
113
- messages = [msg.model_dump() for msg in request.messages]
114
- model = request.model
115
- temperature = request.temperature
116
- max_tokens = request.max_tokens
117
- stream = request.stream or False
298
+ messages = [msg.model_dump() for msg in message_request.messages]
299
+ model = message_request.model
300
+ temperature = message_request.temperature
301
+ max_tokens = message_request.max_tokens
302
+ stream = message_request.stream or False
303
+
304
+ # Get request context from middleware
305
+ request_context = getattr(request.state, "context", None)
306
+ if request_context is None:
307
+ raise HTTPException(
308
+ status_code=500, detail="Internal server error: no request context"
309
+ )
310
+
311
+ # Extract session_id from metadata if present
312
+ session_id = None
313
+ if message_request.metadata:
314
+ metadata_dict = message_request.metadata.model_dump()
315
+ session_id = metadata_dict.get("session_id")
118
316
 
119
317
  # Call Claude SDK service directly with Anthropic format
120
318
  response = await claude_service.create_completion(
@@ -123,7 +321,9 @@ async def create_anthropic_message(
123
321
  temperature=temperature,
124
322
  max_tokens=max_tokens,
125
323
  stream=stream,
126
- user_id=getattr(request, "user_id", None),
324
+ user_id=getattr(message_request, "user_id", None),
325
+ session_id=session_id,
326
+ request_context=request_context,
127
327
  )
128
328
 
129
329
  if stream:
@@ -143,8 +343,13 @@ async def create_anthropic_message(
143
343
  yield f"data: {json.dumps(chunk)}\n\n".encode()
144
344
  # No final [DONE] chunk for Anthropic format
145
345
 
146
- return StreamingResponse(
147
- anthropic_stream_generator(),
346
+ # Use unified streaming wrapper with logging for all requests
347
+ # Session interrupts are now handled directly by the StreamHandle
348
+ return StreamingResponseWithLogging(
349
+ content=anthropic_stream_generator(),
350
+ request_context=request_context,
351
+ metrics=getattr(claude_service, "metrics", None),
352
+ status_code=200,
148
353
  media_type="text/event-stream",
149
354
  headers={
150
355
  "Cache-Control": "no-cache",
@@ -38,9 +38,11 @@ async def create_openai_chat_completion(
38
38
  )
39
39
 
40
40
  # Handle the request using proxy service directly
41
+ # Strip the /api prefix from the path
42
+ service_path = request.url.path.removeprefix("/api")
41
43
  response = await proxy_service.handle_request(
42
44
  method=request.method,
43
- path=request.url.path,
45
+ path=service_path,
44
46
  headers=headers,
45
47
  body=body,
46
48
  query_params=query_params,
@@ -55,6 +57,8 @@ async def create_openai_chat_completion(
55
57
  # Tuple response - handle regular response
56
58
  status_code, response_headers, response_body = response
57
59
  if status_code >= 400:
60
+ # Store headers for preservation middleware
61
+ request.state.preserve_headers = response_headers
58
62
  # Forward error response directly with headers
59
63
  return ProxyResponse(
60
64
  content=response_body,
@@ -128,9 +132,11 @@ async def create_anthropic_message(
128
132
  )
129
133
 
130
134
  # Handle the request using proxy service directly
135
+ # Strip the /api prefix from the path
136
+ service_path = request.url.path.removeprefix("/api")
131
137
  response = await proxy_service.handle_request(
132
138
  method=request.method,
133
- path=request.url.path,
139
+ path=service_path,
134
140
  headers=headers,
135
141
  body=body,
136
142
  query_params=query_params,
@@ -145,6 +151,8 @@ async def create_anthropic_message(
145
151
  # Tuple response - handle regular response
146
152
  status_code, response_headers, response_body = response
147
153
  if status_code >= 400:
154
+ # Store headers for preservation middleware
155
+ request.state.preserve_headers = response_headers
148
156
  # Forward error response directly with headers
149
157
  return ProxyResponse(
150
158
  content=response_body,
@@ -163,15 +171,26 @@ async def create_anthropic_message(
163
171
  if line.strip():
164
172
  yield f"{line}\n".encode()
165
173
 
174
+ # Start with the response headers from proxy service
175
+ streaming_headers = response_headers.copy()
176
+
177
+ # Ensure critical headers for streaming
178
+ streaming_headers["Cache-Control"] = "no-cache"
179
+ streaming_headers["Connection"] = "keep-alive"
180
+
181
+ # Set content-type if not already set by upstream
182
+ if "content-type" not in streaming_headers:
183
+ streaming_headers["content-type"] = "text/event-stream"
184
+
166
185
  return StreamingResponse(
167
186
  stream_generator(),
168
187
  media_type="text/event-stream",
169
- headers={
170
- "Cache-Control": "no-cache",
171
- "Connection": "keep-alive",
172
- },
188
+ headers=streaming_headers,
173
189
  )
174
190
  else:
191
+ # Store headers for preservation middleware
192
+ request.state.preserve_headers = response_headers
193
+
175
194
  # Parse JSON response
176
195
  response_data = json.loads(response_body.decode())
177
196
 
@@ -35,7 +35,7 @@ class PermissionService:
35
35
  async def start(self) -> None:
36
36
  if self._expiry_task is None:
37
37
  self._expiry_task = asyncio.create_task(self._expiry_checker())
38
- logger.info("permission_service_started")
38
+ logger.debug("permission_service_started")
39
39
 
40
40
  async def stop(self) -> None:
41
41
  self._shutdown = True
@@ -44,7 +44,7 @@ class PermissionService:
44
44
  with contextlib.suppress(asyncio.CancelledError):
45
45
  await self._expiry_task
46
46
  self._expiry_task = None
47
- logger.info("permission_service_stopped")
47
+ logger.debug("permission_service_stopped")
48
48
 
49
49
  async def request_permission(self, tool_name: str, input: dict[str, str]) -> str:
50
50
  """Create a new permission request.
@@ -1,21 +1,17 @@
1
1
  """Claude SDK integration module."""
2
2
 
3
- from .client import (
4
- ClaudeSDKClient,
5
- ClaudeSDKConnectionError,
6
- ClaudeSDKError,
7
- ClaudeSDKProcessError,
8
- )
3
+ from .client import ClaudeSDKClient
9
4
  from .converter import MessageConverter
5
+ from .exceptions import ClaudeSDKError, StreamTimeoutError
10
6
  from .options import OptionsHandler
11
7
  from .parser import parse_formatted_sdk_content
12
8
 
13
9
 
14
10
  __all__ = [
11
+ # Session Context will be imported here once created
15
12
  "ClaudeSDKClient",
16
13
  "ClaudeSDKError",
17
- "ClaudeSDKConnectionError",
18
- "ClaudeSDKProcessError",
14
+ "StreamTimeoutError",
19
15
  "MessageConverter",
20
16
  "OptionsHandler",
21
17
  "parse_formatted_sdk_content",