ccproxy-api 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ccproxy/_version.py +2 -2
  2. ccproxy/adapters/openai/adapter.py +1 -1
  3. ccproxy/adapters/openai/streaming.py +1 -0
  4. ccproxy/api/app.py +134 -224
  5. ccproxy/api/dependencies.py +22 -2
  6. ccproxy/api/middleware/errors.py +27 -3
  7. ccproxy/api/middleware/logging.py +4 -0
  8. ccproxy/api/responses.py +6 -1
  9. ccproxy/api/routes/claude.py +222 -17
  10. ccproxy/api/routes/proxy.py +25 -6
  11. ccproxy/api/services/permission_service.py +2 -2
  12. ccproxy/claude_sdk/__init__.py +4 -8
  13. ccproxy/claude_sdk/client.py +661 -131
  14. ccproxy/claude_sdk/exceptions.py +16 -0
  15. ccproxy/claude_sdk/manager.py +219 -0
  16. ccproxy/claude_sdk/message_queue.py +342 -0
  17. ccproxy/claude_sdk/options.py +5 -0
  18. ccproxy/claude_sdk/session_client.py +546 -0
  19. ccproxy/claude_sdk/session_pool.py +550 -0
  20. ccproxy/claude_sdk/stream_handle.py +538 -0
  21. ccproxy/claude_sdk/stream_worker.py +392 -0
  22. ccproxy/claude_sdk/streaming.py +53 -11
  23. ccproxy/cli/commands/serve.py +96 -0
  24. ccproxy/cli/options/claude_options.py +47 -0
  25. ccproxy/config/__init__.py +0 -3
  26. ccproxy/config/claude.py +171 -23
  27. ccproxy/config/discovery.py +10 -1
  28. ccproxy/config/scheduler.py +4 -4
  29. ccproxy/config/settings.py +19 -1
  30. ccproxy/core/http_transformers.py +305 -73
  31. ccproxy/core/logging.py +108 -12
  32. ccproxy/core/transformers.py +5 -0
  33. ccproxy/models/claude_sdk.py +57 -0
  34. ccproxy/models/detection.py +126 -0
  35. ccproxy/observability/access_logger.py +72 -14
  36. ccproxy/observability/metrics.py +151 -0
  37. ccproxy/observability/storage/duckdb_simple.py +12 -0
  38. ccproxy/observability/storage/models.py +16 -0
  39. ccproxy/observability/streaming_response.py +107 -0
  40. ccproxy/scheduler/manager.py +31 -6
  41. ccproxy/scheduler/tasks.py +122 -0
  42. ccproxy/services/claude_detection_service.py +269 -0
  43. ccproxy/services/claude_sdk_service.py +333 -130
  44. ccproxy/services/proxy_service.py +91 -200
  45. ccproxy/utils/__init__.py +9 -1
  46. ccproxy/utils/disconnection_monitor.py +83 -0
  47. ccproxy/utils/id_generator.py +12 -0
  48. ccproxy/utils/startup_helpers.py +408 -0
  49. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/METADATA +29 -2
  50. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/RECORD +53 -41
  51. ccproxy/config/loader.py +0 -105
  52. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/WHEEL +0 -0
  53. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/entry_points.txt +0 -0
  54. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -5,7 +5,6 @@ import json
5
5
  import os
6
6
  import random
7
7
  import time
8
- import urllib.parse
9
8
  from collections.abc import AsyncGenerator
10
9
  from pathlib import Path
11
10
  from typing import TYPE_CHECKING, Any
@@ -29,6 +28,7 @@ from ccproxy.observability import (
29
28
  timed_operation,
30
29
  )
31
30
  from ccproxy.observability.access_logger import log_request_access
31
+ from ccproxy.observability.streaming_response import StreamingResponseWithLogging
32
32
  from ccproxy.services.credentials.manager import CredentialsManager
33
33
  from ccproxy.testing import RealisticMockResponseGenerator
34
34
  from ccproxy.utils.simple_request_logger import (
@@ -83,6 +83,7 @@ class ProxyService:
83
83
  proxy_mode: str = "full",
84
84
  target_base_url: str = "https://api.anthropic.com",
85
85
  metrics: PrometheusMetrics | None = None,
86
+ app_state: Any = None,
86
87
  ) -> None:
87
88
  """Initialize the proxy service.
88
89
 
@@ -93,6 +94,7 @@ class ProxyService:
93
94
  proxy_mode: Transformation mode - "minimal" or "full"
94
95
  target_base_url: Base URL for the target API
95
96
  metrics: Prometheus metrics collector (optional)
97
+ app_state: FastAPI app state for accessing detection data
96
98
  """
97
99
  self.proxy_client = proxy_client
98
100
  self.credentials_manager = credentials_manager
@@ -100,6 +102,7 @@ class ProxyService:
100
102
  self.proxy_mode = proxy_mode
101
103
  self.target_base_url = target_base_url.rstrip("/")
102
104
  self.metrics = metrics or get_metrics()
105
+ self.app_state = app_state
103
106
 
104
107
  # Create concrete transformers
105
108
  self.request_transformer = HTTPRequestTransformer()
@@ -122,10 +125,6 @@ class ProxyService:
122
125
  self._verbose_api = (
123
126
  os.environ.get("CCPROXY_VERBOSE_API", "false").lower() == "true"
124
127
  )
125
- # Note: Request logging is now handled by simple_request_logger utility
126
- # which checks CCPROXY_LOG_REQUESTS and CCPROXY_REQUEST_LOG_DIR independently
127
-
128
- # Request context is now passed as parameters to methods
129
128
 
130
129
  def _init_proxy_url(self) -> str | None:
131
130
  """Initialize proxy URL from environment variables."""
@@ -239,9 +238,25 @@ class ProxyService:
239
238
 
240
239
  # 2. Request transformation
241
240
  async with timed_operation("request_transform", ctx.request_id):
242
- logger.debug("request_transform_start")
243
- transformed_request = await self._transform_request(
244
- method, path, headers, body, query_params, access_token
241
+ injection_mode = (
242
+ self.settings.claude.system_prompt_injection_mode.value
243
+ )
244
+ logger.debug(
245
+ "request_transform_start",
246
+ system_prompt_injection_mode=injection_mode,
247
+ )
248
+ transformed_request = (
249
+ await self.request_transformer.transform_proxy_request(
250
+ method,
251
+ path,
252
+ headers,
253
+ body,
254
+ query_params,
255
+ access_token,
256
+ self.target_base_url,
257
+ self.app_state,
258
+ injection_mode,
259
+ )
245
260
  )
246
261
 
247
262
  # 3. Check for bypass header to skip upstream forwarding
@@ -330,29 +345,25 @@ class ProxyService:
330
345
  content_length=len(response_body) if response_body else 0,
331
346
  )
332
347
 
333
- # Transform error to OpenAI format if this is an OpenAI endpoint
334
- transformed_error_body = response_body
335
- if self.response_transformer._is_openai_request(path):
336
- try:
337
- error_data = json.loads(response_body.decode("utf-8"))
338
- openai_error = self.openai_adapter.adapt_error(
339
- error_data
340
- )
341
- transformed_error_body = json.dumps(
342
- openai_error
343
- ).encode("utf-8")
344
- except (json.JSONDecodeError, UnicodeDecodeError):
345
- # Keep original error if parsing fails
346
- pass
347
-
348
- transformed_response = ResponseData(
349
- status_code=status_code,
350
- headers=response_headers,
351
- body=transformed_error_body,
348
+ # Use transformer to handle error transformation (including OpenAI format)
349
+ transformed_response = (
350
+ await self.response_transformer.transform_proxy_response(
351
+ status_code,
352
+ response_headers,
353
+ response_body,
354
+ path,
355
+ self.proxy_mode,
356
+ )
352
357
  )
353
358
  else:
354
- transformed_response = await self._transform_response(
355
- status_code, response_headers, response_body, path
359
+ transformed_response = (
360
+ await self.response_transformer.transform_proxy_response(
361
+ status_code,
362
+ response_headers,
363
+ response_body,
364
+ path,
365
+ self.proxy_mode,
366
+ )
356
367
  )
357
368
 
358
369
  # 5. Extract response metrics using direct JSON parsing
@@ -393,14 +404,6 @@ class ProxyService:
393
404
  cost_usd=cost_usd,
394
405
  )
395
406
 
396
- # 7. Log comprehensive access log (includes Prometheus metrics)
397
- await log_request_access(
398
- context=ctx,
399
- status_code=status_code,
400
- method=method,
401
- metrics=self.metrics,
402
- )
403
-
404
407
  return (
405
408
  transformed_response["status_code"],
406
409
  transformed_response["headers"],
@@ -408,27 +411,7 @@ class ProxyService:
408
411
  )
409
412
 
410
413
  except Exception as e:
411
- # Record error metrics via access logger
412
- error_type = type(e).__name__
413
-
414
- # Log the error with access logger (includes metrics)
415
- await log_request_access(
416
- context=ctx,
417
- method=method,
418
- error_message=str(e),
419
- metrics=self.metrics,
420
- error_type=error_type,
421
- )
422
-
423
- logger.exception(
424
- "proxy_request_failed",
425
- method=method,
426
- path=path,
427
- error=str(e),
428
- exc_info=True,
429
- )
430
- # Re-raise the exception without transformation
431
- # Let higher layers handle specific error types
414
+ ctx.add_metadata(error=e)
432
415
  raise
433
416
 
434
417
  async def _get_access_token(self) -> str:
@@ -491,120 +474,6 @@ class ProxyService:
491
474
  detail="Authentication failed",
492
475
  ) from e
493
476
 
494
- async def _transform_request(
495
- self,
496
- method: str,
497
- path: str,
498
- headers: dict[str, str],
499
- body: bytes | None,
500
- query_params: dict[str, str | list[str]] | None,
501
- access_token: str,
502
- ) -> RequestData:
503
- """Transform request using the transformer pipeline.
504
-
505
- Args:
506
- method: HTTP method
507
- path: Request path
508
- headers: Request headers
509
- body: Request body
510
- query_params: Query parameters
511
- access_token: OAuth access token
512
-
513
- Returns:
514
- Transformed request data
515
- """
516
- # Transform path
517
- transformed_path = self.request_transformer.transform_path(
518
- path, self.proxy_mode
519
- )
520
- target_url = f"{self.target_base_url}{transformed_path}"
521
-
522
- # Add beta=true query parameter for /v1/messages requests if not already present
523
- if transformed_path == "/v1/messages":
524
- if query_params is None:
525
- query_params = {}
526
- elif "beta" not in query_params:
527
- query_params = dict(query_params) # Make a copy
528
-
529
- if "beta" not in query_params:
530
- query_params["beta"] = "true"
531
- logger.debug("beta_parameter_added")
532
-
533
- # Transform body first (as it might change size)
534
- proxy_body = None
535
- if body:
536
- proxy_body = self.request_transformer.transform_request_body(
537
- body, path, self.proxy_mode
538
- )
539
-
540
- # Transform headers (and update Content-Length if body changed)
541
- proxy_headers = self.request_transformer.create_proxy_headers(
542
- headers, access_token, self.proxy_mode
543
- )
544
-
545
- # Update Content-Length if body was transformed and size changed
546
- if proxy_body and body and len(proxy_body) != len(body):
547
- # Remove any existing content-length headers (case-insensitive)
548
- proxy_headers = {
549
- k: v for k, v in proxy_headers.items() if k.lower() != "content-length"
550
- }
551
- proxy_headers["Content-Length"] = str(len(proxy_body))
552
- elif proxy_body and not body:
553
- # New body was created where none existed
554
- proxy_headers["Content-Length"] = str(len(proxy_body))
555
-
556
- # Add query parameters to URL if present
557
- if query_params:
558
- query_string = urllib.parse.urlencode(query_params)
559
- target_url = f"{target_url}?{query_string}"
560
-
561
- return {
562
- "method": method,
563
- "url": target_url,
564
- "headers": proxy_headers,
565
- "body": proxy_body,
566
- }
567
-
568
- async def _transform_response(
569
- self,
570
- status_code: int,
571
- headers: dict[str, str],
572
- body: bytes,
573
- original_path: str,
574
- ) -> ResponseData:
575
- """Transform response using the transformer pipeline.
576
-
577
- Args:
578
- status_code: HTTP status code
579
- headers: Response headers
580
- body: Response body
581
- original_path: Original request path for context
582
-
583
- Returns:
584
- Transformed response data
585
- """
586
- # For error responses, pass through without transformation
587
- if status_code >= 400:
588
- return {
589
- "status_code": status_code,
590
- "headers": headers,
591
- "body": body,
592
- }
593
-
594
- transformed_body = self.response_transformer.transform_response_body(
595
- body, original_path, self.proxy_mode
596
- )
597
-
598
- transformed_headers = self.response_transformer.transform_response_headers(
599
- headers, original_path, len(transformed_body), self.proxy_mode
600
- )
601
-
602
- return {
603
- "status_code": status_code,
604
- "headers": transformed_headers,
605
- "body": transformed_body,
606
- }
607
-
608
477
  def _redact_headers(self, headers: dict[str, str]) -> dict[str, str]:
609
478
  """Redact sensitive information from headers for safe logging."""
610
479
  return {
@@ -810,18 +679,17 @@ class ProxyService:
810
679
  error_detail=error_content.decode("utf-8", errors="replace"),
811
680
  )
812
681
 
813
- # Transform error to OpenAI format if this is an OpenAI endpoint
814
- transformed_error_body = error_content
815
- if self.response_transformer._is_openai_request(original_path):
816
- try:
817
- error_data = json.loads(error_content.decode("utf-8"))
818
- openai_error = self.openai_adapter.adapt_error(error_data)
819
- transformed_error_body = json.dumps(openai_error).encode(
820
- "utf-8"
821
- )
822
- except (json.JSONDecodeError, UnicodeDecodeError):
823
- # Keep original error if parsing fails
824
- pass
682
+ # Use transformer to handle error transformation (including OpenAI format)
683
+ transformed_error_response = (
684
+ await self.response_transformer.transform_proxy_response(
685
+ response.status_code,
686
+ dict(response.headers),
687
+ error_content,
688
+ original_path,
689
+ self.proxy_mode,
690
+ )
691
+ )
692
+ transformed_error_body = transformed_error_response["body"]
825
693
 
826
694
  # Update context with error status
827
695
  ctx.add_metadata(status_code=response.status_code)
@@ -844,10 +712,32 @@ class ProxyService:
844
712
  )
845
713
 
846
714
  # If no error, proceed with streaming
847
- # Store response headers to preserve for streaming
715
+ # Make initial request to get headers
716
+ proxy_url = self._proxy_url
717
+ verify = self._ssl_context
718
+
848
719
  response_headers = {}
849
720
  response_status = 200
850
721
 
722
+ async with httpx.AsyncClient(
723
+ timeout=timeout, proxy=proxy_url, verify=verify
724
+ ) as client:
725
+ # Make initial request to capture headers
726
+ initial_response = await client.send(
727
+ client.build_request(
728
+ method=request_data["method"],
729
+ url=request_data["url"],
730
+ headers=request_data["headers"],
731
+ content=request_data["body"],
732
+ ),
733
+ stream=True,
734
+ )
735
+ response_status = initial_response.status_code
736
+ response_headers = dict(initial_response.headers)
737
+
738
+ # Close the initial response since we'll make a new one in the generator
739
+ await initial_response.aclose()
740
+
851
741
  # Initialize streaming metrics collector
852
742
  from ccproxy.utils.streaming_metrics import StreamingMetricsCollector
853
743
 
@@ -1006,18 +896,7 @@ class ProxyService:
1006
896
  cost_usd=cost_usd,
1007
897
  )
1008
898
 
1009
- # Log comprehensive access log for streaming completion
1010
- from ccproxy.observability.access_logger import (
1011
- log_request_access,
1012
- )
1013
-
1014
- await log_request_access(
1015
- context=ctx,
1016
- status_code=response_status,
1017
- metrics=self.metrics,
1018
- # Additional metadata for streaming completion
1019
- event_type="streaming_complete",
1020
- )
899
+ # Access logging is now handled by StreamingResponseWithLogging
1021
900
 
1022
901
  if (
1023
902
  "content_block_delta" in chunk_str
@@ -1063,6 +942,11 @@ class ProxyService:
1063
942
  # Always use upstream headers as base
1064
943
  final_headers = response_headers.copy()
1065
944
 
945
+ # Remove headers that can cause conflicts
946
+ final_headers.pop(
947
+ "date", None
948
+ ) # Remove upstream date header to avoid conflicts
949
+
1066
950
  # Ensure critical headers for streaming
1067
951
  final_headers["Cache-Control"] = "no-cache"
1068
952
  final_headers["Connection"] = "keep-alive"
@@ -1071,8 +955,10 @@ class ProxyService:
1071
955
  if "content-type" not in final_headers:
1072
956
  final_headers["content-type"] = "text/event-stream"
1073
957
 
1074
- return StreamingResponse(
1075
- stream_generator(),
958
+ return StreamingResponseWithLogging(
959
+ content=stream_generator(),
960
+ request_context=ctx,
961
+ metrics=self.metrics,
1076
962
  status_code=response_status,
1077
963
  headers=final_headers,
1078
964
  )
@@ -1330,7 +1216,12 @@ class ProxyService:
1330
1216
  cost_usd=cost_usd,
1331
1217
  )
1332
1218
 
1333
- return StreamingResponse(realistic_mock_stream_generator(), headers=headers)
1219
+ return StreamingResponseWithLogging(
1220
+ content=realistic_mock_stream_generator(),
1221
+ request_context=ctx,
1222
+ metrics=self.metrics,
1223
+ headers=headers,
1224
+ )
1334
1225
 
1335
1226
  async def _generate_realistic_openai_stream(
1336
1227
  self,
ccproxy/utils/__init__.py CHANGED
@@ -1,6 +1,14 @@
1
1
  """Utility modules for shared functionality across the application."""
2
2
 
3
3
  from .cost_calculator import calculate_cost_breakdown, calculate_token_cost
4
+ from .disconnection_monitor import monitor_disconnection, monitor_stuck_stream
5
+ from .id_generator import generate_client_id
4
6
 
5
7
 
6
- __all__ = ["calculate_token_cost", "calculate_cost_breakdown"]
8
+ __all__ = [
9
+ "calculate_token_cost",
10
+ "calculate_cost_breakdown",
11
+ "monitor_disconnection",
12
+ "monitor_stuck_stream",
13
+ "generate_client_id",
14
+ ]
@@ -0,0 +1,83 @@
1
+ """Utility functions for monitoring client disconnection and stuck streams during streaming responses."""
2
+
3
+ import asyncio
4
+ from typing import TYPE_CHECKING
5
+
6
+ import structlog
7
+ from starlette.requests import Request
8
+
9
+
10
+ if TYPE_CHECKING:
11
+ from ccproxy.services.claude_sdk_service import ClaudeSDKService
12
+
13
+ logger = structlog.get_logger(__name__)
14
+
15
+
16
+ async def monitor_disconnection(
17
+ request: Request, session_id: str, claude_service: "ClaudeSDKService"
18
+ ) -> None:
19
+ """Monitor for client disconnection and interrupt session if detected.
20
+
21
+ Args:
22
+ request: The incoming HTTP request
23
+ session_id: The Claude SDK session ID to interrupt if disconnected
24
+ claude_service: The Claude SDK service instance
25
+ """
26
+ try:
27
+ while True:
28
+ await asyncio.sleep(1.0) # Check every second
29
+ if await request.is_disconnected():
30
+ logger.info(
31
+ "client_disconnected_interrupting_session", session_id=session_id
32
+ )
33
+ try:
34
+ await claude_service.sdk_client.interrupt_session(session_id)
35
+ except Exception as e:
36
+ logger.error(
37
+ "failed_to_interrupt_session",
38
+ session_id=session_id,
39
+ error=str(e),
40
+ )
41
+ return
42
+ except asyncio.CancelledError:
43
+ # Task was cancelled, which is expected when streaming completes normally
44
+ logger.debug("disconnection_monitor_cancelled", session_id=session_id)
45
+ raise
46
+
47
+
48
+ async def monitor_stuck_stream(
49
+ session_id: str,
50
+ claude_service: "ClaudeSDKService",
51
+ first_chunk_event: asyncio.Event,
52
+ timeout: float = 10.0,
53
+ ) -> None:
54
+ """Monitor for stuck streams that don't produce a first chunk (SystemMessage).
55
+
56
+ Args:
57
+ session_id: The Claude SDK session ID to monitor
58
+ claude_service: The Claude SDK service instance
59
+ first_chunk_event: Event that will be set when first chunk is received
60
+ timeout: Seconds to wait for first chunk before considering stream stuck
61
+ """
62
+ try:
63
+ # Wait for first chunk with timeout
64
+ await asyncio.wait_for(first_chunk_event.wait(), timeout=timeout)
65
+ logger.debug("stuck_stream_first_chunk_received", session_id=session_id)
66
+ except TimeoutError:
67
+ logger.error(
68
+ "streaming_system_message_timeout",
69
+ session_id=session_id,
70
+ timeout=timeout,
71
+ message=f"No SystemMessage received within {timeout}s, interrupting session",
72
+ )
73
+ try:
74
+ await claude_service.sdk_client.interrupt_session(session_id)
75
+ logger.info("stuck_session_interrupted_successfully", session_id=session_id)
76
+ except Exception as e:
77
+ logger.error(
78
+ "failed_to_interrupt_stuck_session", session_id=session_id, error=str(e)
79
+ )
80
+ except asyncio.CancelledError:
81
+ # Task was cancelled, which is expected when streaming completes normally
82
+ logger.debug("stuck_stream_monitor_cancelled", session_id=session_id)
83
+ raise
@@ -0,0 +1,12 @@
1
+ """Utility functions for generating consistent IDs across the application."""
2
+
3
+ import uuid
4
+
5
+
6
+ def generate_client_id() -> str:
7
+ """Generate a consistent client ID for SDK connections.
8
+
9
+ Returns:
10
+ str: First part of a UUID4 (8 characters)
11
+ """
12
+ return str(uuid.uuid4()).split("-")[0]