ccproxy-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. ccproxy/_version.py +2 -2
  2. ccproxy/adapters/codex/__init__.py +11 -0
  3. ccproxy/adapters/openai/adapter.py +1 -1
  4. ccproxy/adapters/openai/models.py +1 -1
  5. ccproxy/adapters/openai/response_adapter.py +355 -0
  6. ccproxy/adapters/openai/response_models.py +178 -0
  7. ccproxy/adapters/openai/streaming.py +1 -0
  8. ccproxy/api/app.py +150 -224
  9. ccproxy/api/dependencies.py +22 -2
  10. ccproxy/api/middleware/errors.py +27 -3
  11. ccproxy/api/middleware/logging.py +4 -0
  12. ccproxy/api/responses.py +6 -1
  13. ccproxy/api/routes/claude.py +222 -17
  14. ccproxy/api/routes/codex.py +1231 -0
  15. ccproxy/api/routes/health.py +228 -3
  16. ccproxy/api/routes/proxy.py +25 -6
  17. ccproxy/api/services/permission_service.py +2 -2
  18. ccproxy/auth/openai/__init__.py +13 -0
  19. ccproxy/auth/openai/credentials.py +166 -0
  20. ccproxy/auth/openai/oauth_client.py +334 -0
  21. ccproxy/auth/openai/storage.py +184 -0
  22. ccproxy/claude_sdk/__init__.py +4 -8
  23. ccproxy/claude_sdk/client.py +661 -131
  24. ccproxy/claude_sdk/exceptions.py +16 -0
  25. ccproxy/claude_sdk/manager.py +219 -0
  26. ccproxy/claude_sdk/message_queue.py +342 -0
  27. ccproxy/claude_sdk/options.py +6 -1
  28. ccproxy/claude_sdk/session_client.py +546 -0
  29. ccproxy/claude_sdk/session_pool.py +550 -0
  30. ccproxy/claude_sdk/stream_handle.py +538 -0
  31. ccproxy/claude_sdk/stream_worker.py +392 -0
  32. ccproxy/claude_sdk/streaming.py +53 -11
  33. ccproxy/cli/commands/auth.py +398 -1
  34. ccproxy/cli/commands/serve.py +99 -1
  35. ccproxy/cli/options/claude_options.py +47 -0
  36. ccproxy/config/__init__.py +0 -3
  37. ccproxy/config/claude.py +171 -23
  38. ccproxy/config/codex.py +100 -0
  39. ccproxy/config/discovery.py +10 -1
  40. ccproxy/config/scheduler.py +2 -2
  41. ccproxy/config/settings.py +38 -1
  42. ccproxy/core/codex_transformers.py +389 -0
  43. ccproxy/core/http_transformers.py +458 -75
  44. ccproxy/core/logging.py +108 -12
  45. ccproxy/core/transformers.py +5 -0
  46. ccproxy/models/claude_sdk.py +57 -0
  47. ccproxy/models/detection.py +208 -0
  48. ccproxy/models/requests.py +22 -0
  49. ccproxy/models/responses.py +16 -0
  50. ccproxy/observability/access_logger.py +72 -14
  51. ccproxy/observability/metrics.py +151 -0
  52. ccproxy/observability/storage/duckdb_simple.py +12 -0
  53. ccproxy/observability/storage/models.py +16 -0
  54. ccproxy/observability/streaming_response.py +107 -0
  55. ccproxy/scheduler/manager.py +31 -6
  56. ccproxy/scheduler/tasks.py +122 -0
  57. ccproxy/services/claude_detection_service.py +269 -0
  58. ccproxy/services/claude_sdk_service.py +333 -130
  59. ccproxy/services/codex_detection_service.py +263 -0
  60. ccproxy/services/proxy_service.py +618 -197
  61. ccproxy/utils/__init__.py +9 -1
  62. ccproxy/utils/disconnection_monitor.py +83 -0
  63. ccproxy/utils/id_generator.py +12 -0
  64. ccproxy/utils/model_mapping.py +7 -5
  65. ccproxy/utils/startup_helpers.py +470 -0
  66. ccproxy_api-0.1.6.dist-info/METADATA +615 -0
  67. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/RECORD +70 -47
  68. ccproxy/config/loader.py +0 -105
  69. ccproxy_api-0.1.4.dist-info/METADATA +0 -369
  70. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/WHEEL +0 -0
  71. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/entry_points.txt +0 -0
  72. {ccproxy_api-0.1.4.dist-info → ccproxy_api-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -3,6 +3,7 @@
3
3
  from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import structlog
6
+ from typing_extensions import TypedDict
6
7
 
7
8
  from ccproxy.core.transformers import RequestTransformer, ResponseTransformer
8
9
  from ccproxy.core.types import ProxyRequest, ProxyResponse, TransformContext
@@ -20,13 +21,64 @@ claude_code_prompt = "You are Claude Code, Anthropic's official CLI for Claude."
20
21
  # claude_code_prompt = "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n \n IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>\n"
21
22
 
22
23
 
23
- def get_claude_code_prompt() -> dict[str, Any]:
24
- """Get the Claude Code system prompt with cache control."""
25
- return {
26
- "type": "text",
27
- "text": claude_code_prompt,
28
- "cache_control": {"type": "ephemeral"},
29
- }
24
+ def get_detected_system_field(
25
+ app_state: Any = None, injection_mode: str = "minimal"
26
+ ) -> Any:
27
+ """Get the detected system field for injection.
28
+
29
+ Args:
30
+ app_state: App state containing detection data
31
+ injection_mode: 'minimal' or 'full' mode
32
+
33
+ Returns:
34
+ The system field to inject (preserving exact Claude CLI structure), or None if no detection data available
35
+ """
36
+ if not app_state or not hasattr(app_state, "claude_detection_data"):
37
+ return None
38
+
39
+ claude_data = app_state.claude_detection_data
40
+ detected_system = claude_data.system_prompt.system_field
41
+
42
+ if injection_mode == "full":
43
+ # Return the complete detected system field exactly as Claude CLI sent it
44
+ return detected_system
45
+ else:
46
+ # Minimal mode: extract just the first system message, preserving its structure
47
+ if isinstance(detected_system, str):
48
+ return detected_system
49
+ elif isinstance(detected_system, list) and detected_system:
50
+ # Return only the first message object with its complete structure (type, text, cache_control)
51
+ return [detected_system[0]]
52
+
53
+ return None
54
+
55
+
56
+ def get_fallback_system_field() -> list[dict[str, Any]]:
57
+ """Get fallback system field when no detection data is available."""
58
+ return [
59
+ {
60
+ "type": "text",
61
+ "text": claude_code_prompt,
62
+ "cache_control": {"type": "ephemeral"},
63
+ }
64
+ ]
65
+
66
+
67
+ class RequestData(TypedDict):
68
+ """Typed structure for transformed request data."""
69
+
70
+ method: str
71
+ url: str
72
+ headers: dict[str, str]
73
+ body: bytes | None
74
+
75
+
76
+ class ResponseData(TypedDict):
77
+ """Typed structure for transformed response data."""
78
+
79
+ status_code: int
80
+ headers: dict[str, str]
81
+ body: bytes
30
82
 
31
83
 
32
84
  class HTTPRequestTransformer(RequestTransformer):
@@ -73,24 +125,39 @@ class HTTPRequestTransformer(RequestTransformer):
73
125
  elif context and isinstance(context, dict):
74
126
  access_token = context.get("access_token", "")
75
127
 
76
- transformed_headers = self.create_proxy_headers(request.headers, access_token)
128
+ # Extract app_state from context if available
129
+ app_state = None
130
+ if context and hasattr(context, "app_state"):
131
+ app_state = context.app_state
132
+ elif context and isinstance(context, dict):
133
+ app_state = context.get("app_state")
134
+
135
+ transformed_headers = self.create_proxy_headers(
136
+ request.headers, access_token, self.proxy_mode, app_state
137
+ )
77
138
 
78
139
  # Transform body
79
140
  transformed_body = request.body
80
141
  if request.body:
81
142
  if isinstance(request.body, bytes):
82
143
  transformed_body = self.transform_request_body(
83
- request.body, transformed_path
144
+ request.body, transformed_path, self.proxy_mode, app_state
84
145
  )
85
146
  elif isinstance(request.body, str):
86
147
  transformed_body = self.transform_request_body(
87
- request.body.encode("utf-8"), transformed_path
148
+ request.body.encode("utf-8"),
149
+ transformed_path,
150
+ self.proxy_mode,
151
+ app_state,
88
152
  )
89
153
  elif isinstance(request.body, dict):
90
154
  import json
91
155
 
92
156
  transformed_body = self.transform_request_body(
93
- json.dumps(request.body).encode("utf-8"), transformed_path
157
+ json.dumps(request.body).encode("utf-8"),
158
+ transformed_path,
159
+ self.proxy_mode,
160
+ app_state,
94
161
  )
95
162
 
96
163
  # Create new transformed request
@@ -105,6 +172,88 @@ class HTTPRequestTransformer(RequestTransformer):
105
172
  metadata=request.metadata,
106
173
  )
107
174
 
175
+ async def transform_proxy_request(
176
+ self,
177
+ method: str,
178
+ path: str,
179
+ headers: dict[str, str],
180
+ body: bytes | None,
181
+ query_params: dict[str, str | list[str]] | None,
182
+ access_token: str,
183
+ target_base_url: str = "https://api.anthropic.com",
184
+ app_state: Any = None,
185
+ injection_mode: str = "minimal",
186
+ ) -> RequestData:
187
+ """Transform request using direct parameters from ProxyService.
188
+
189
+ This method provides the same functionality as ProxyService._transform_request()
190
+ but is properly located in the transformer layer.
191
+
192
+ Args:
193
+ method: HTTP method
194
+ path: Request path
195
+ headers: Request headers
196
+ body: Request body
197
+ query_params: Query parameters
198
+ access_token: OAuth access token
199
+ target_base_url: Base URL for the target API
200
+ app_state: Optional app state containing detection data
201
+ injection_mode: System prompt injection mode
202
+
203
+ Returns:
204
+ Dictionary with transformed request data (method, url, headers, body)
205
+ """
206
+ import urllib.parse
207
+
208
+ # Transform path
209
+ transformed_path = self.transform_path(path, self.proxy_mode)
210
+ target_url = f"{target_base_url.rstrip('/')}{transformed_path}"
211
+
212
+ # Add beta=true query parameter for /v1/messages requests if not already present
213
+ if transformed_path == "/v1/messages":
214
+ if query_params is None:
215
+ query_params = {}
216
+ elif "beta" not in query_params:
217
+ query_params = dict(query_params) # Make a copy
218
+
219
+ if "beta" not in query_params:
220
+ query_params["beta"] = "true"
221
+
222
+ # Transform body first (as it might change size)
223
+ proxy_body = None
224
+ if body:
225
+ proxy_body = self.transform_request_body(
226
+ body, path, self.proxy_mode, app_state, injection_mode
227
+ )
228
+
229
+ # Transform headers (and update Content-Length if body changed)
230
+ proxy_headers = self.create_proxy_headers(
231
+ headers, access_token, self.proxy_mode, app_state
232
+ )
233
+
234
+ # Update Content-Length if body was transformed and size changed
235
+ if proxy_body and body and len(proxy_body) != len(body):
236
+ # Remove any existing content-length headers (case-insensitive)
237
+ proxy_headers = {
238
+ k: v for k, v in proxy_headers.items() if k.lower() != "content-length"
239
+ }
240
+ proxy_headers["Content-Length"] = str(len(proxy_body))
241
+ elif proxy_body and not body:
242
+ # New body was created where none existed
243
+ proxy_headers["Content-Length"] = str(len(proxy_body))
244
+
245
+ # Add query parameters to URL if present
246
+ if query_params:
247
+ query_string = urllib.parse.urlencode(query_params)
248
+ target_url = f"{target_url}?{query_string}"
249
+
250
+ return RequestData(
251
+ method=method,
252
+ url=target_url,
253
+ headers=proxy_headers,
254
+ body=proxy_body,
255
+ )
256
+
108
257
  def transform_path(self, path: str, proxy_mode: str = "full") -> str:
109
258
  """Transform request path."""
110
259
  # Remove /api prefix if present (for new proxy endpoints)
@@ -122,7 +271,11 @@ class HTTPRequestTransformer(RequestTransformer):
122
271
  return path
123
272
 
124
273
  def create_proxy_headers(
125
- self, headers: dict[str, str], access_token: str, proxy_mode: str = "full"
274
+ self,
275
+ headers: dict[str, str],
276
+ access_token: str,
277
+ proxy_mode: str = "full",
278
+ app_state: Any = None,
126
279
  ) -> dict[str, str]:
127
280
  """Create proxy headers from original headers with Claude CLI identity."""
128
281
  proxy_headers = {}
@@ -170,27 +323,35 @@ class HTTPRequestTransformer(RequestTransformer):
170
323
  if "connection" not in [k.lower() for k in proxy_headers]:
171
324
  proxy_headers["Connection"] = "keep-alive"
172
325
 
173
- # Critical Claude/Anthropic headers for tools and beta features
174
- proxy_headers["anthropic-beta"] = (
175
- "claude-code-20250219,oauth-2025-04-20,"
176
- "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
177
- )
178
- proxy_headers["anthropic-version"] = "2023-06-01"
179
- proxy_headers["anthropic-dangerous-direct-browser-access"] = "true"
180
-
181
- # Claude CLI identity headers
182
- proxy_headers["x-app"] = "cli"
183
- proxy_headers["User-Agent"] = "claude-cli/1.0.60 (external, cli)"
184
-
185
- # Stainless SDK compatibility headers
186
- proxy_headers["X-Stainless-Lang"] = "js"
187
- proxy_headers["X-Stainless-Retry-Count"] = "0"
188
- proxy_headers["X-Stainless-Timeout"] = "60"
189
- proxy_headers["X-Stainless-Package-Version"] = "0.55.1"
190
- proxy_headers["X-Stainless-OS"] = "Linux"
191
- proxy_headers["X-Stainless-Arch"] = "x64"
192
- proxy_headers["X-Stainless-Runtime"] = "node"
193
- proxy_headers["X-Stainless-Runtime-Version"] = "v24.3.0"
326
+ # Use detected Claude CLI headers when available
327
+ if app_state and hasattr(app_state, "claude_detection_data"):
328
+ claude_data = app_state.claude_detection_data
329
+ detected_headers = claude_data.headers.to_headers_dict()
330
+ proxy_headers.update(detected_headers)
331
+ logger.debug("using_detected_headers", version=claude_data.claude_version)
332
+ else:
333
+ # Fallback to hardcoded Claude/Anthropic headers
334
+ proxy_headers["anthropic-beta"] = (
335
+ "claude-code-20250219,oauth-2025-04-20,"
336
+ "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
337
+ )
338
+ proxy_headers["anthropic-version"] = "2023-06-01"
339
+ proxy_headers["anthropic-dangerous-direct-browser-access"] = "true"
340
+
341
+ # Claude CLI identity headers
342
+ proxy_headers["x-app"] = "cli"
343
+ proxy_headers["User-Agent"] = "claude-cli/1.0.60 (external, cli)"
344
+
345
+ # Stainless SDK compatibility headers
346
+ proxy_headers["X-Stainless-Lang"] = "js"
347
+ proxy_headers["X-Stainless-Retry-Count"] = "0"
348
+ proxy_headers["X-Stainless-Timeout"] = "60"
349
+ proxy_headers["X-Stainless-Package-Version"] = "0.55.1"
350
+ proxy_headers["X-Stainless-OS"] = "Linux"
351
+ proxy_headers["X-Stainless-Arch"] = "x64"
352
+ proxy_headers["X-Stainless-Runtime"] = "node"
353
+ proxy_headers["X-Stainless-Runtime-Version"] = "v24.3.0"
354
+ logger.debug("using_fallback_headers")
194
355
 
195
356
  # Standard HTTP headers for proper API interaction
196
357
  proxy_headers["accept-language"] = "*"
@@ -200,8 +361,146 @@ class HTTPRequestTransformer(RequestTransformer):
200
361
 
201
362
  return proxy_headers
202
363
 
364
+ def _count_cache_control_blocks(self, data: dict[str, Any]) -> dict[str, int]:
365
+ """Count cache_control blocks in different parts of the request.
366
+
367
+ Returns:
368
+ Dictionary with counts for 'injected_system', 'user_system', and 'messages'
369
+ """
370
+ counts = {"injected_system": 0, "user_system": 0, "messages": 0}
371
+
372
+ # Count in system field
373
+ system = data.get("system")
374
+ if system:
375
+ if isinstance(system, str):
376
+ # String system prompts don't have cache_control
377
+ pass
378
+ elif isinstance(system, list):
379
+ # Count cache_control in system prompt blocks
380
+ # The first block(s) are injected, rest are user's
381
+ injected_count = 0
382
+ for i, block in enumerate(system):
383
+ if isinstance(block, dict) and "cache_control" in block:
384
+ # Check if this is the injected prompt (contains Claude Code identity)
385
+ text = block.get("text", "")
386
+ if "Claude Code" in text or "Anthropic's official CLI" in text:
387
+ counts["injected_system"] += 1
388
+ injected_count = max(injected_count, i + 1)
389
+ elif i < injected_count:
390
+ # Part of injected system (multiple blocks)
391
+ counts["injected_system"] += 1
392
+ else:
393
+ counts["user_system"] += 1
394
+
395
+ # Count in messages
396
+ messages = data.get("messages", [])
397
+ for msg in messages:
398
+ content = msg.get("content")
399
+ if isinstance(content, list):
400
+ for block in content:
401
+ if isinstance(block, dict) and "cache_control" in block:
402
+ counts["messages"] += 1
403
+
404
+ return counts
405
+
406
+ def _limit_cache_control_blocks(
407
+ self, data: dict[str, Any], max_blocks: int = 4
408
+ ) -> dict[str, Any]:
409
+ """Limit the number of cache_control blocks to comply with Anthropic's limit.
410
+
411
+ Priority order:
412
+ 1. Injected system prompt cache_control (highest priority - Claude Code identity)
413
+ 2. User's system prompt cache_control
414
+ 3. User's message cache_control (lowest priority)
415
+
416
+ Args:
417
+ data: Request data dictionary
418
+ max_blocks: Maximum number of cache_control blocks allowed (default: 4)
419
+
420
+ Returns:
421
+ Modified data dictionary with cache_control blocks limited
422
+ """
423
+ import copy
424
+
425
+ # Deep copy to avoid modifying original
426
+ data = copy.deepcopy(data)
427
+
428
+ # Count existing blocks
429
+ counts = self._count_cache_control_blocks(data)
430
+ total = counts["injected_system"] + counts["user_system"] + counts["messages"]
431
+
432
+ if total <= max_blocks:
433
+ # No need to remove anything
434
+ return data
435
+
436
+ logger.warning(
437
+ "cache_control_limit_exceeded",
438
+ total_blocks=total,
439
+ max_blocks=max_blocks,
440
+ injected=counts["injected_system"],
441
+ user_system=counts["user_system"],
442
+ messages=counts["messages"],
443
+ )
444
+
445
+ # Calculate how many to remove
446
+ to_remove = total - max_blocks
447
+ removed = 0
448
+
449
+ # Remove from messages first (lowest priority)
450
+ if to_remove > 0 and counts["messages"] > 0:
451
+ messages = data.get("messages", [])
452
+ for msg in reversed(messages): # Remove from end first
453
+ if removed >= to_remove:
454
+ break
455
+ content = msg.get("content")
456
+ if isinstance(content, list):
457
+ for block in reversed(content):
458
+ if removed >= to_remove:
459
+ break
460
+ if isinstance(block, dict) and "cache_control" in block:
461
+ del block["cache_control"]
462
+ removed += 1
463
+ logger.debug("removed_cache_control", location="message")
464
+
465
+ # Remove from user system prompts next
466
+ if removed < to_remove and counts["user_system"] > 0:
467
+ system = data.get("system")
468
+ if isinstance(system, list):
469
+ # Find and remove cache_control from user system blocks (non-injected)
470
+ for block in reversed(system):
471
+ if removed >= to_remove:
472
+ break
473
+ if isinstance(block, dict) and "cache_control" in block:
474
+ text = block.get("text", "")
475
+ # Skip injected prompts (highest priority)
476
+ if (
477
+ "Claude Code" not in text
478
+ and "Anthropic's official CLI" not in text
479
+ ):
480
+ del block["cache_control"]
481
+ removed += 1
482
+ logger.debug(
483
+ "removed_cache_control", location="user_system"
484
+ )
485
+
486
+ # In theory, we should never need to remove injected system cache_control
487
+ # but include this for completeness
488
+ if removed < to_remove:
489
+ logger.error(
490
+ "cannot_preserve_injected_cache_control",
491
+ needed_to_remove=to_remove,
492
+ actually_removed=removed,
493
+ )
494
+
495
+ return data
496
+
203
497
  def transform_request_body(
204
- self, body: bytes, path: str, proxy_mode: str = "full"
498
+ self,
499
+ body: bytes,
500
+ path: str,
501
+ proxy_mode: str = "full",
502
+ app_state: Any = None,
503
+ injection_mode: str = "minimal",
205
504
  ) -> bytes:
206
505
  """Transform request body."""
207
506
  if not body:
@@ -213,60 +512,77 @@ class HTTPRequestTransformer(RequestTransformer):
213
512
  body = self._transform_openai_to_anthropic(body)
214
513
 
215
514
  # Apply system prompt transformation for Claude Code identity
216
- return self.transform_system_prompt(body)
515
+ return self.transform_system_prompt(body, app_state, injection_mode)
217
516
 
218
- def transform_system_prompt(self, body: bytes) -> bytes:
219
- """Transform system prompt to ensure Claude Code identification comes first.
517
+ def transform_system_prompt(
518
+ self, body: bytes, app_state: Any = None, injection_mode: str = "minimal"
519
+ ) -> bytes:
520
+ """Transform system prompt based on injection mode.
220
521
 
221
522
  Args:
222
523
  body: Original request body as bytes
524
+ app_state: Optional app state containing detection data
525
+ injection_mode: System prompt injection mode ('minimal' or 'full')
223
526
 
224
527
  Returns:
225
- Transformed request body as bytes with Claude Code system prompt
528
+ Transformed request body as bytes with system prompt injection
226
529
  """
227
530
  try:
228
531
  import json
229
532
 
230
533
  data = json.loads(body.decode("utf-8"))
231
- except (json.JSONDecodeError, UnicodeDecodeError):
534
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
232
535
  # Return original if not valid JSON
536
+ logger.warning(
537
+ "http_transform_json_decode_failed",
538
+ error=str(e),
539
+ body_preview=body[:200].decode("utf-8", errors="replace")
540
+ if body
541
+ else None,
542
+ body_length=len(body) if body else 0,
543
+ )
233
544
  return body
234
545
 
235
- # Check if request has a system prompt
236
- if "system" not in data or (
237
- isinstance(data["system"], str) and data["system"] == claude_code_prompt
238
- ):
239
- # No system prompt, inject Claude Code identification
240
- data["system"] = [get_claude_code_prompt()]
241
- return json.dumps(data).encode("utf-8")
242
-
243
- system = data["system"]
244
-
245
- if isinstance(system, str):
246
- # Handle string system prompt
247
- if system == claude_code_prompt:
248
- # Already correct, convert to proper array format
249
- data["system"] = [get_claude_code_prompt()]
250
- return json.dumps(data).encode("utf-8")
251
-
252
- # Prepend Claude Code prompt to existing string
253
- data["system"] = [
254
- get_claude_code_prompt(),
255
- {"type": "text", "text": system},
256
- ]
257
-
258
- elif isinstance(system, list):
259
- # Handle array system prompt
260
- if len(system) > 0:
261
- # Check if first element has correct text
262
- first = system[0]
263
- if isinstance(first, dict) and first.get("text") == claude_code_prompt:
264
- # Already has Claude Code first, ensure it has cache_control
265
- data["system"][0] = get_claude_code_prompt()
266
- return json.dumps(data).encode("utf-8")
267
-
268
- # Prepend Claude Code prompt
269
- data["system"] = [get_claude_code_prompt()] + system
546
+ # Get the system field to inject
547
+ detected_system = get_detected_system_field(app_state, injection_mode)
548
+ if detected_system is None:
549
+ # No detection data, use fallback
550
+ detected_system = get_fallback_system_field()
551
+
552
+ # Always inject the system prompt (detected or fallback)
553
+ if "system" not in data:
554
+ # No existing system prompt, inject the detected/fallback one
555
+ data["system"] = detected_system
556
+ else:
557
+ # Request has existing system prompt, prepend the detected/fallback one
558
+ existing_system = data["system"]
559
+
560
+ if isinstance(detected_system, str):
561
+ # Detected system is a string
562
+ if isinstance(existing_system, str):
563
+ # Both are strings, convert to list format
564
+ data["system"] = [
565
+ {"type": "text", "text": detected_system},
566
+ {"type": "text", "text": existing_system},
567
+ ]
568
+ elif isinstance(existing_system, list):
569
+ # Detected is string, existing is list
570
+ data["system"] = [
571
+ {"type": "text", "text": detected_system}
572
+ ] + existing_system
573
+ elif isinstance(detected_system, list):
574
+ # Detected system is a list
575
+ if isinstance(existing_system, str):
576
+ # Detected is list, existing is string
577
+ data["system"] = detected_system + [
578
+ {"type": "text", "text": existing_system}
579
+ ]
580
+ elif isinstance(existing_system, list):
581
+ # Both are lists, concatenate
582
+ data["system"] = detected_system + existing_system
583
+
584
+ # Limit cache_control blocks to comply with Anthropic's limit
585
+ data = self._limit_cache_control_blocks(data)
270
586
 
271
587
  return json.dumps(data).encode("utf-8")
272
588
 
@@ -290,7 +606,14 @@ class HTTPRequestTransformer(RequestTransformer):
290
606
  messages = data.get("messages", [])
291
607
  if messages and any(msg.get("role") == "system" for msg in messages):
292
608
  return True
293
- except (json.JSONDecodeError, UnicodeDecodeError):
609
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
610
+ logger.warning(
611
+ "openai_request_detection_json_decode_failed",
612
+ error=str(e),
613
+ body_preview=body[:100].decode("utf-8", errors="replace")
614
+ if body
615
+ else None,
616
+ )
294
617
  pass
295
618
 
296
619
  return False
@@ -387,6 +710,65 @@ class HTTPResponseTransformer(ResponseTransformer):
387
710
  metadata=response.metadata,
388
711
  )
389
712
 
713
+ async def transform_proxy_response(
714
+ self,
715
+ status_code: int,
716
+ headers: dict[str, str],
717
+ body: bytes,
718
+ original_path: str,
719
+ proxy_mode: str = "full",
720
+ ) -> ResponseData:
721
+ """Transform response using direct parameters from ProxyService.
722
+
723
+ This method provides the same functionality as ProxyService._transform_response()
724
+ but is properly located in the transformer layer.
725
+
726
+ Args:
727
+ status_code: HTTP status code
728
+ headers: Response headers
729
+ body: Response body
730
+ original_path: Original request path for context
731
+ proxy_mode: Proxy transformation mode
732
+
733
+ Returns:
734
+ Dictionary with transformed response data (status_code, headers, body)
735
+ """
736
+ # For error responses, handle OpenAI transformation if needed
737
+ if status_code >= 400:
738
+ transformed_error_body = body
739
+ if self._is_openai_request(original_path):
740
+ try:
741
+ import json
742
+
743
+ from ccproxy.adapters.openai.adapter import OpenAIAdapter
744
+
745
+ error_data = json.loads(body.decode("utf-8"))
746
+ openai_adapter = OpenAIAdapter()
747
+ openai_error = openai_adapter.adapt_error(error_data)
748
+ transformed_error_body = json.dumps(openai_error).encode("utf-8")
749
+ except (json.JSONDecodeError, UnicodeDecodeError):
750
+ # Keep original error if parsing fails
751
+ pass
752
+
753
+ return ResponseData(
754
+ status_code=status_code,
755
+ headers=headers,
756
+ body=transformed_error_body,
757
+ )
758
+
759
+ # For successful responses, transform normally
760
+ transformed_body = self.transform_response_body(body, original_path, proxy_mode)
761
+
762
+ transformed_headers = self.transform_response_headers(
763
+ headers, original_path, len(transformed_body), proxy_mode
764
+ )
765
+
766
+ return ResponseData(
767
+ status_code=status_code,
768
+ headers=transformed_headers,
769
+ body=transformed_body,
770
+ )
771
+
390
772
  def transform_response_body(
391
773
  self, body: bytes, path: str, proxy_mode: str = "full"
392
774
  ) -> bytes:
@@ -411,6 +793,7 @@ class HTTPResponseTransformer(ResponseTransformer):
411
793
  "content-length",
412
794
  "transfer-encoding",
413
795
  "content-encoding",
796
+ "date", # Remove upstream date header to avoid conflicts
414
797
  ]:
415
798
  transformed_headers[key] = value
416
799