ccproxy-api 0.1.7__py3-none-any.whl → 0.2.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (481) hide show
  1. ccproxy/api/__init__.py +1 -15
  2. ccproxy/api/app.py +434 -219
  3. ccproxy/api/bootstrap.py +30 -0
  4. ccproxy/api/decorators.py +85 -0
  5. ccproxy/api/dependencies.py +144 -168
  6. ccproxy/api/format_validation.py +54 -0
  7. ccproxy/api/middleware/cors.py +6 -3
  8. ccproxy/api/middleware/errors.py +388 -524
  9. ccproxy/api/middleware/hooks.py +563 -0
  10. ccproxy/api/middleware/normalize_headers.py +59 -0
  11. ccproxy/api/middleware/request_id.py +35 -16
  12. ccproxy/api/middleware/streaming_hooks.py +292 -0
  13. ccproxy/api/routes/__init__.py +5 -14
  14. ccproxy/api/routes/health.py +39 -672
  15. ccproxy/api/routes/plugins.py +277 -0
  16. ccproxy/auth/__init__.py +2 -19
  17. ccproxy/auth/bearer.py +25 -15
  18. ccproxy/auth/dependencies.py +123 -157
  19. ccproxy/auth/exceptions.py +0 -12
  20. ccproxy/auth/manager.py +35 -49
  21. ccproxy/auth/managers/__init__.py +10 -0
  22. ccproxy/auth/managers/base.py +523 -0
  23. ccproxy/auth/managers/base_enhanced.py +63 -0
  24. ccproxy/auth/managers/token_snapshot.py +77 -0
  25. ccproxy/auth/models/base.py +65 -0
  26. ccproxy/auth/models/credentials.py +40 -0
  27. ccproxy/auth/oauth/__init__.py +4 -18
  28. ccproxy/auth/oauth/base.py +533 -0
  29. ccproxy/auth/oauth/cli_errors.py +37 -0
  30. ccproxy/auth/oauth/flows.py +430 -0
  31. ccproxy/auth/oauth/protocol.py +366 -0
  32. ccproxy/auth/oauth/registry.py +408 -0
  33. ccproxy/auth/oauth/router.py +396 -0
  34. ccproxy/auth/oauth/routes.py +186 -113
  35. ccproxy/auth/oauth/session.py +151 -0
  36. ccproxy/auth/oauth/templates.py +342 -0
  37. ccproxy/auth/storage/__init__.py +2 -5
  38. ccproxy/auth/storage/base.py +279 -5
  39. ccproxy/auth/storage/generic.py +134 -0
  40. ccproxy/cli/__init__.py +1 -2
  41. ccproxy/cli/_settings_help.py +351 -0
  42. ccproxy/cli/commands/auth.py +1519 -793
  43. ccproxy/cli/commands/config/commands.py +209 -276
  44. ccproxy/cli/commands/plugins.py +669 -0
  45. ccproxy/cli/commands/serve.py +75 -810
  46. ccproxy/cli/commands/status.py +254 -0
  47. ccproxy/cli/decorators.py +83 -0
  48. ccproxy/cli/helpers.py +22 -60
  49. ccproxy/cli/main.py +359 -10
  50. ccproxy/cli/options/claude_options.py +0 -25
  51. ccproxy/config/__init__.py +7 -11
  52. ccproxy/config/core.py +227 -0
  53. ccproxy/config/env_generator.py +232 -0
  54. ccproxy/config/runtime.py +67 -0
  55. ccproxy/config/security.py +36 -3
  56. ccproxy/config/settings.py +382 -441
  57. ccproxy/config/toml_generator.py +299 -0
  58. ccproxy/config/utils.py +452 -0
  59. ccproxy/core/__init__.py +7 -271
  60. ccproxy/{_version.py → core/_version.py} +16 -3
  61. ccproxy/core/async_task_manager.py +516 -0
  62. ccproxy/core/async_utils.py +47 -14
  63. ccproxy/core/auth/__init__.py +6 -0
  64. ccproxy/core/constants.py +16 -50
  65. ccproxy/core/errors.py +53 -0
  66. ccproxy/core/id_utils.py +20 -0
  67. ccproxy/core/interfaces.py +16 -123
  68. ccproxy/core/logging.py +473 -18
  69. ccproxy/core/plugins/__init__.py +77 -0
  70. ccproxy/core/plugins/cli_discovery.py +211 -0
  71. ccproxy/core/plugins/declaration.py +455 -0
  72. ccproxy/core/plugins/discovery.py +604 -0
  73. ccproxy/core/plugins/factories.py +967 -0
  74. ccproxy/core/plugins/hooks/__init__.py +30 -0
  75. ccproxy/core/plugins/hooks/base.py +58 -0
  76. ccproxy/core/plugins/hooks/events.py +46 -0
  77. ccproxy/core/plugins/hooks/implementations/__init__.py +16 -0
  78. ccproxy/core/plugins/hooks/implementations/formatters/__init__.py +11 -0
  79. ccproxy/core/plugins/hooks/implementations/formatters/json.py +552 -0
  80. ccproxy/core/plugins/hooks/implementations/formatters/raw.py +370 -0
  81. ccproxy/core/plugins/hooks/implementations/http_tracer.py +431 -0
  82. ccproxy/core/plugins/hooks/layers.py +44 -0
  83. ccproxy/core/plugins/hooks/manager.py +186 -0
  84. ccproxy/core/plugins/hooks/registry.py +139 -0
  85. ccproxy/core/plugins/hooks/thread_manager.py +203 -0
  86. ccproxy/core/plugins/hooks/types.py +22 -0
  87. ccproxy/core/plugins/interfaces.py +416 -0
  88. ccproxy/core/plugins/loader.py +166 -0
  89. ccproxy/core/plugins/middleware.py +233 -0
  90. ccproxy/core/plugins/models.py +59 -0
  91. ccproxy/core/plugins/protocol.py +180 -0
  92. ccproxy/core/plugins/runtime.py +519 -0
  93. ccproxy/{observability/context.py → core/request_context.py} +137 -94
  94. ccproxy/core/status_report.py +211 -0
  95. ccproxy/core/transformers.py +13 -8
  96. ccproxy/data/claude_headers_fallback.json +540 -19
  97. ccproxy/data/codex_headers_fallback.json +114 -7
  98. ccproxy/http/__init__.py +30 -0
  99. ccproxy/http/base.py +95 -0
  100. ccproxy/http/client.py +323 -0
  101. ccproxy/http/hooks.py +642 -0
  102. ccproxy/http/pool.py +279 -0
  103. ccproxy/llms/formatters/__init__.py +7 -0
  104. ccproxy/llms/formatters/anthropic_to_openai/__init__.py +55 -0
  105. ccproxy/llms/formatters/anthropic_to_openai/errors.py +65 -0
  106. ccproxy/llms/formatters/anthropic_to_openai/requests.py +356 -0
  107. ccproxy/llms/formatters/anthropic_to_openai/responses.py +153 -0
  108. ccproxy/llms/formatters/anthropic_to_openai/streams.py +1546 -0
  109. ccproxy/llms/formatters/base.py +140 -0
  110. ccproxy/llms/formatters/base_model.py +33 -0
  111. ccproxy/llms/formatters/common/__init__.py +51 -0
  112. ccproxy/llms/formatters/common/identifiers.py +48 -0
  113. ccproxy/llms/formatters/common/streams.py +254 -0
  114. ccproxy/llms/formatters/common/thinking.py +74 -0
  115. ccproxy/llms/formatters/common/usage.py +135 -0
  116. ccproxy/llms/formatters/constants.py +55 -0
  117. ccproxy/llms/formatters/context.py +116 -0
  118. ccproxy/llms/formatters/mapping.py +33 -0
  119. ccproxy/llms/formatters/openai_to_anthropic/__init__.py +55 -0
  120. ccproxy/llms/formatters/openai_to_anthropic/_helpers.py +141 -0
  121. ccproxy/llms/formatters/openai_to_anthropic/errors.py +53 -0
  122. ccproxy/llms/formatters/openai_to_anthropic/requests.py +674 -0
  123. ccproxy/llms/formatters/openai_to_anthropic/responses.py +285 -0
  124. ccproxy/llms/formatters/openai_to_anthropic/streams.py +530 -0
  125. ccproxy/llms/formatters/openai_to_openai/__init__.py +53 -0
  126. ccproxy/llms/formatters/openai_to_openai/_helpers.py +325 -0
  127. ccproxy/llms/formatters/openai_to_openai/errors.py +6 -0
  128. ccproxy/llms/formatters/openai_to_openai/requests.py +388 -0
  129. ccproxy/llms/formatters/openai_to_openai/responses.py +594 -0
  130. ccproxy/llms/formatters/openai_to_openai/streams.py +1832 -0
  131. ccproxy/llms/formatters/utils.py +306 -0
  132. ccproxy/llms/models/__init__.py +9 -0
  133. ccproxy/llms/models/anthropic.py +619 -0
  134. ccproxy/llms/models/openai.py +844 -0
  135. ccproxy/llms/streaming/__init__.py +26 -0
  136. ccproxy/llms/streaming/accumulators.py +1074 -0
  137. ccproxy/llms/streaming/formatters.py +251 -0
  138. ccproxy/{adapters/openai/streaming.py → llms/streaming/processors.py} +193 -240
  139. ccproxy/models/__init__.py +8 -159
  140. ccproxy/models/detection.py +92 -193
  141. ccproxy/models/provider.py +75 -0
  142. ccproxy/plugins/access_log/README.md +32 -0
  143. ccproxy/plugins/access_log/__init__.py +20 -0
  144. ccproxy/plugins/access_log/config.py +33 -0
  145. ccproxy/plugins/access_log/formatter.py +126 -0
  146. ccproxy/plugins/access_log/hook.py +763 -0
  147. ccproxy/plugins/access_log/logger.py +254 -0
  148. ccproxy/plugins/access_log/plugin.py +137 -0
  149. ccproxy/plugins/access_log/writer.py +109 -0
  150. ccproxy/plugins/analytics/README.md +24 -0
  151. ccproxy/plugins/analytics/__init__.py +1 -0
  152. ccproxy/plugins/analytics/config.py +5 -0
  153. ccproxy/plugins/analytics/ingest.py +85 -0
  154. ccproxy/plugins/analytics/models.py +97 -0
  155. ccproxy/plugins/analytics/plugin.py +121 -0
  156. ccproxy/plugins/analytics/routes.py +163 -0
  157. ccproxy/plugins/analytics/service.py +284 -0
  158. ccproxy/plugins/claude_api/README.md +29 -0
  159. ccproxy/plugins/claude_api/__init__.py +10 -0
  160. ccproxy/plugins/claude_api/adapter.py +829 -0
  161. ccproxy/plugins/claude_api/config.py +52 -0
  162. ccproxy/plugins/claude_api/detection_service.py +461 -0
  163. ccproxy/plugins/claude_api/health.py +175 -0
  164. ccproxy/plugins/claude_api/hooks.py +284 -0
  165. ccproxy/plugins/claude_api/models.py +256 -0
  166. ccproxy/plugins/claude_api/plugin.py +298 -0
  167. ccproxy/plugins/claude_api/routes.py +118 -0
  168. ccproxy/plugins/claude_api/streaming_metrics.py +68 -0
  169. ccproxy/plugins/claude_api/tasks.py +84 -0
  170. ccproxy/plugins/claude_sdk/README.md +35 -0
  171. ccproxy/plugins/claude_sdk/__init__.py +80 -0
  172. ccproxy/plugins/claude_sdk/adapter.py +749 -0
  173. ccproxy/plugins/claude_sdk/auth.py +57 -0
  174. ccproxy/{claude_sdk → plugins/claude_sdk}/client.py +63 -39
  175. ccproxy/plugins/claude_sdk/config.py +210 -0
  176. ccproxy/{claude_sdk → plugins/claude_sdk}/converter.py +6 -6
  177. ccproxy/plugins/claude_sdk/detection_service.py +163 -0
  178. ccproxy/{services/claude_sdk_service.py → plugins/claude_sdk/handler.py} +123 -304
  179. ccproxy/plugins/claude_sdk/health.py +113 -0
  180. ccproxy/plugins/claude_sdk/hooks.py +115 -0
  181. ccproxy/{claude_sdk → plugins/claude_sdk}/manager.py +42 -32
  182. ccproxy/{claude_sdk → plugins/claude_sdk}/message_queue.py +8 -8
  183. ccproxy/{models/claude_sdk.py → plugins/claude_sdk/models.py} +64 -16
  184. ccproxy/plugins/claude_sdk/options.py +154 -0
  185. ccproxy/{claude_sdk → plugins/claude_sdk}/parser.py +23 -5
  186. ccproxy/plugins/claude_sdk/plugin.py +269 -0
  187. ccproxy/plugins/claude_sdk/routes.py +104 -0
  188. ccproxy/{claude_sdk → plugins/claude_sdk}/session_client.py +124 -12
  189. ccproxy/plugins/claude_sdk/session_pool.py +700 -0
  190. ccproxy/{claude_sdk → plugins/claude_sdk}/stream_handle.py +48 -43
  191. ccproxy/{claude_sdk → plugins/claude_sdk}/stream_worker.py +22 -18
  192. ccproxy/{claude_sdk → plugins/claude_sdk}/streaming.py +50 -16
  193. ccproxy/plugins/claude_sdk/tasks.py +97 -0
  194. ccproxy/plugins/claude_shared/README.md +18 -0
  195. ccproxy/plugins/claude_shared/__init__.py +12 -0
  196. ccproxy/plugins/claude_shared/model_defaults.py +171 -0
  197. ccproxy/plugins/codex/README.md +35 -0
  198. ccproxy/plugins/codex/__init__.py +6 -0
  199. ccproxy/plugins/codex/adapter.py +635 -0
  200. ccproxy/{config/codex.py → plugins/codex/config.py} +78 -12
  201. ccproxy/plugins/codex/detection_service.py +544 -0
  202. ccproxy/plugins/codex/health.py +162 -0
  203. ccproxy/plugins/codex/hooks.py +263 -0
  204. ccproxy/plugins/codex/model_defaults.py +39 -0
  205. ccproxy/plugins/codex/models.py +263 -0
  206. ccproxy/plugins/codex/plugin.py +275 -0
  207. ccproxy/plugins/codex/routes.py +129 -0
  208. ccproxy/plugins/codex/streaming_metrics.py +324 -0
  209. ccproxy/plugins/codex/tasks.py +106 -0
  210. ccproxy/plugins/codex/utils/__init__.py +1 -0
  211. ccproxy/plugins/codex/utils/sse_parser.py +106 -0
  212. ccproxy/plugins/command_replay/README.md +34 -0
  213. ccproxy/plugins/command_replay/__init__.py +17 -0
  214. ccproxy/plugins/command_replay/config.py +133 -0
  215. ccproxy/plugins/command_replay/formatter.py +432 -0
  216. ccproxy/plugins/command_replay/hook.py +294 -0
  217. ccproxy/plugins/command_replay/plugin.py +161 -0
  218. ccproxy/plugins/copilot/README.md +39 -0
  219. ccproxy/plugins/copilot/__init__.py +11 -0
  220. ccproxy/plugins/copilot/adapter.py +465 -0
  221. ccproxy/plugins/copilot/config.py +155 -0
  222. ccproxy/plugins/copilot/data/copilot_fallback.json +41 -0
  223. ccproxy/plugins/copilot/detection_service.py +255 -0
  224. ccproxy/plugins/copilot/manager.py +275 -0
  225. ccproxy/plugins/copilot/model_defaults.py +284 -0
  226. ccproxy/plugins/copilot/models.py +148 -0
  227. ccproxy/plugins/copilot/oauth/__init__.py +16 -0
  228. ccproxy/plugins/copilot/oauth/client.py +494 -0
  229. ccproxy/plugins/copilot/oauth/models.py +385 -0
  230. ccproxy/plugins/copilot/oauth/provider.py +602 -0
  231. ccproxy/plugins/copilot/oauth/storage.py +170 -0
  232. ccproxy/plugins/copilot/plugin.py +360 -0
  233. ccproxy/plugins/copilot/routes.py +294 -0
  234. ccproxy/plugins/credential_balancer/README.md +124 -0
  235. ccproxy/plugins/credential_balancer/__init__.py +6 -0
  236. ccproxy/plugins/credential_balancer/config.py +270 -0
  237. ccproxy/plugins/credential_balancer/factory.py +415 -0
  238. ccproxy/plugins/credential_balancer/hook.py +51 -0
  239. ccproxy/plugins/credential_balancer/manager.py +587 -0
  240. ccproxy/plugins/credential_balancer/plugin.py +146 -0
  241. ccproxy/plugins/dashboard/README.md +25 -0
  242. ccproxy/plugins/dashboard/__init__.py +1 -0
  243. ccproxy/plugins/dashboard/config.py +8 -0
  244. ccproxy/plugins/dashboard/plugin.py +71 -0
  245. ccproxy/plugins/dashboard/routes.py +67 -0
  246. ccproxy/plugins/docker/README.md +32 -0
  247. ccproxy/{docker → plugins/docker}/__init__.py +3 -0
  248. ccproxy/{docker → plugins/docker}/adapter.py +108 -10
  249. ccproxy/plugins/docker/config.py +82 -0
  250. ccproxy/{docker → plugins/docker}/docker_path.py +4 -3
  251. ccproxy/{docker → plugins/docker}/middleware.py +2 -2
  252. ccproxy/plugins/docker/plugin.py +198 -0
  253. ccproxy/{docker → plugins/docker}/stream_process.py +3 -3
  254. ccproxy/plugins/duckdb_storage/README.md +26 -0
  255. ccproxy/plugins/duckdb_storage/__init__.py +1 -0
  256. ccproxy/plugins/duckdb_storage/config.py +22 -0
  257. ccproxy/plugins/duckdb_storage/plugin.py +128 -0
  258. ccproxy/plugins/duckdb_storage/routes.py +51 -0
  259. ccproxy/plugins/duckdb_storage/storage.py +633 -0
  260. ccproxy/plugins/max_tokens/README.md +38 -0
  261. ccproxy/plugins/max_tokens/__init__.py +12 -0
  262. ccproxy/plugins/max_tokens/adapter.py +235 -0
  263. ccproxy/plugins/max_tokens/config.py +86 -0
  264. ccproxy/plugins/max_tokens/models.py +53 -0
  265. ccproxy/plugins/max_tokens/plugin.py +200 -0
  266. ccproxy/plugins/max_tokens/service.py +271 -0
  267. ccproxy/plugins/max_tokens/token_limits.json +54 -0
  268. ccproxy/plugins/metrics/README.md +35 -0
  269. ccproxy/plugins/metrics/__init__.py +10 -0
  270. ccproxy/{observability/metrics.py → plugins/metrics/collector.py} +20 -153
  271. ccproxy/plugins/metrics/config.py +85 -0
  272. ccproxy/plugins/metrics/grafana/dashboards/ccproxy-dashboard.json +1720 -0
  273. ccproxy/plugins/metrics/hook.py +403 -0
  274. ccproxy/plugins/metrics/plugin.py +268 -0
  275. ccproxy/{observability → plugins/metrics}/pushgateway.py +57 -59
  276. ccproxy/plugins/metrics/routes.py +107 -0
  277. ccproxy/plugins/metrics/tasks.py +117 -0
  278. ccproxy/plugins/oauth_claude/README.md +35 -0
  279. ccproxy/plugins/oauth_claude/__init__.py +14 -0
  280. ccproxy/plugins/oauth_claude/client.py +270 -0
  281. ccproxy/plugins/oauth_claude/config.py +84 -0
  282. ccproxy/plugins/oauth_claude/manager.py +482 -0
  283. ccproxy/plugins/oauth_claude/models.py +266 -0
  284. ccproxy/plugins/oauth_claude/plugin.py +149 -0
  285. ccproxy/plugins/oauth_claude/provider.py +571 -0
  286. ccproxy/plugins/oauth_claude/storage.py +212 -0
  287. ccproxy/plugins/oauth_codex/README.md +38 -0
  288. ccproxy/plugins/oauth_codex/__init__.py +14 -0
  289. ccproxy/plugins/oauth_codex/client.py +224 -0
  290. ccproxy/plugins/oauth_codex/config.py +95 -0
  291. ccproxy/plugins/oauth_codex/manager.py +256 -0
  292. ccproxy/plugins/oauth_codex/models.py +239 -0
  293. ccproxy/plugins/oauth_codex/plugin.py +146 -0
  294. ccproxy/plugins/oauth_codex/provider.py +574 -0
  295. ccproxy/plugins/oauth_codex/storage.py +92 -0
  296. ccproxy/plugins/permissions/README.md +28 -0
  297. ccproxy/plugins/permissions/__init__.py +22 -0
  298. ccproxy/plugins/permissions/config.py +28 -0
  299. ccproxy/{cli/commands/permission_handler.py → plugins/permissions/handlers/cli.py} +49 -25
  300. ccproxy/plugins/permissions/handlers/protocol.py +33 -0
  301. ccproxy/plugins/permissions/handlers/terminal.py +675 -0
  302. ccproxy/{api/routes → plugins/permissions}/mcp.py +34 -7
  303. ccproxy/{models/permissions.py → plugins/permissions/models.py} +65 -1
  304. ccproxy/plugins/permissions/plugin.py +153 -0
  305. ccproxy/{api/routes/permissions.py → plugins/permissions/routes.py} +20 -16
  306. ccproxy/{api/services/permission_service.py → plugins/permissions/service.py} +65 -11
  307. ccproxy/{api → plugins/permissions}/ui/permission_handler_protocol.py +1 -1
  308. ccproxy/{api → plugins/permissions}/ui/terminal_permission_handler.py +66 -10
  309. ccproxy/plugins/pricing/README.md +34 -0
  310. ccproxy/plugins/pricing/__init__.py +6 -0
  311. ccproxy/{pricing → plugins/pricing}/cache.py +7 -6
  312. ccproxy/{config/pricing.py → plugins/pricing/config.py} +32 -6
  313. ccproxy/plugins/pricing/exceptions.py +35 -0
  314. ccproxy/plugins/pricing/loader.py +440 -0
  315. ccproxy/{pricing → plugins/pricing}/models.py +13 -23
  316. ccproxy/plugins/pricing/plugin.py +169 -0
  317. ccproxy/plugins/pricing/service.py +191 -0
  318. ccproxy/plugins/pricing/tasks.py +300 -0
  319. ccproxy/{pricing → plugins/pricing}/updater.py +86 -72
  320. ccproxy/plugins/pricing/utils.py +99 -0
  321. ccproxy/plugins/request_tracer/README.md +40 -0
  322. ccproxy/plugins/request_tracer/__init__.py +7 -0
  323. ccproxy/plugins/request_tracer/config.py +120 -0
  324. ccproxy/plugins/request_tracer/hook.py +415 -0
  325. ccproxy/plugins/request_tracer/plugin.py +255 -0
  326. ccproxy/scheduler/__init__.py +2 -14
  327. ccproxy/scheduler/core.py +26 -41
  328. ccproxy/scheduler/manager.py +61 -105
  329. ccproxy/scheduler/registry.py +6 -32
  330. ccproxy/scheduler/tasks.py +268 -276
  331. ccproxy/services/__init__.py +0 -1
  332. ccproxy/services/adapters/__init__.py +11 -0
  333. ccproxy/services/adapters/base.py +123 -0
  334. ccproxy/services/adapters/chain_composer.py +88 -0
  335. ccproxy/services/adapters/chain_validation.py +44 -0
  336. ccproxy/services/adapters/chat_accumulator.py +200 -0
  337. ccproxy/services/adapters/delta_utils.py +142 -0
  338. ccproxy/services/adapters/format_adapter.py +136 -0
  339. ccproxy/services/adapters/format_context.py +11 -0
  340. ccproxy/services/adapters/format_registry.py +158 -0
  341. ccproxy/services/adapters/http_adapter.py +1045 -0
  342. ccproxy/services/adapters/mock_adapter.py +118 -0
  343. ccproxy/services/adapters/protocols.py +35 -0
  344. ccproxy/services/adapters/simple_converters.py +571 -0
  345. ccproxy/services/auth_registry.py +180 -0
  346. ccproxy/services/cache/__init__.py +6 -0
  347. ccproxy/services/cache/response_cache.py +261 -0
  348. ccproxy/services/cli_detection.py +437 -0
  349. ccproxy/services/config/__init__.py +6 -0
  350. ccproxy/services/config/proxy_configuration.py +111 -0
  351. ccproxy/services/container.py +256 -0
  352. ccproxy/services/factories.py +380 -0
  353. ccproxy/services/handler_config.py +76 -0
  354. ccproxy/services/interfaces.py +298 -0
  355. ccproxy/services/mocking/__init__.py +6 -0
  356. ccproxy/services/mocking/mock_handler.py +291 -0
  357. ccproxy/services/tracing/__init__.py +7 -0
  358. ccproxy/services/tracing/interfaces.py +61 -0
  359. ccproxy/services/tracing/null_tracer.py +57 -0
  360. ccproxy/streaming/__init__.py +23 -0
  361. ccproxy/streaming/buffer.py +1056 -0
  362. ccproxy/streaming/deferred.py +897 -0
  363. ccproxy/streaming/handler.py +117 -0
  364. ccproxy/streaming/interfaces.py +77 -0
  365. ccproxy/streaming/simple_adapter.py +39 -0
  366. ccproxy/streaming/sse.py +109 -0
  367. ccproxy/streaming/sse_parser.py +127 -0
  368. ccproxy/templates/__init__.py +6 -0
  369. ccproxy/templates/plugin_scaffold.py +695 -0
  370. ccproxy/testing/endpoints/__init__.py +33 -0
  371. ccproxy/testing/endpoints/cli.py +215 -0
  372. ccproxy/testing/endpoints/config.py +874 -0
  373. ccproxy/testing/endpoints/console.py +57 -0
  374. ccproxy/testing/endpoints/models.py +100 -0
  375. ccproxy/testing/endpoints/runner.py +1903 -0
  376. ccproxy/testing/endpoints/tools.py +308 -0
  377. ccproxy/testing/mock_responses.py +70 -1
  378. ccproxy/testing/response_handlers.py +20 -0
  379. ccproxy/utils/__init__.py +0 -6
  380. ccproxy/utils/binary_resolver.py +476 -0
  381. ccproxy/utils/caching.py +327 -0
  382. ccproxy/utils/cli_logging.py +101 -0
  383. ccproxy/utils/command_line.py +251 -0
  384. ccproxy/utils/headers.py +228 -0
  385. ccproxy/utils/model_mapper.py +120 -0
  386. ccproxy/utils/startup_helpers.py +68 -446
  387. ccproxy/utils/version_checker.py +273 -6
  388. ccproxy_api-0.2.0a4.dist-info/METADATA +212 -0
  389. ccproxy_api-0.2.0a4.dist-info/RECORD +417 -0
  390. {ccproxy_api-0.1.7.dist-info → ccproxy_api-0.2.0a4.dist-info}/WHEEL +1 -1
  391. ccproxy_api-0.2.0a4.dist-info/entry_points.txt +24 -0
  392. ccproxy/__init__.py +0 -4
  393. ccproxy/adapters/__init__.py +0 -11
  394. ccproxy/adapters/base.py +0 -80
  395. ccproxy/adapters/codex/__init__.py +0 -11
  396. ccproxy/adapters/openai/__init__.py +0 -42
  397. ccproxy/adapters/openai/adapter.py +0 -953
  398. ccproxy/adapters/openai/models.py +0 -412
  399. ccproxy/adapters/openai/response_adapter.py +0 -355
  400. ccproxy/adapters/openai/response_models.py +0 -178
  401. ccproxy/api/middleware/headers.py +0 -49
  402. ccproxy/api/middleware/logging.py +0 -180
  403. ccproxy/api/middleware/request_content_logging.py +0 -297
  404. ccproxy/api/middleware/server_header.py +0 -58
  405. ccproxy/api/responses.py +0 -89
  406. ccproxy/api/routes/claude.py +0 -371
  407. ccproxy/api/routes/codex.py +0 -1251
  408. ccproxy/api/routes/metrics.py +0 -1029
  409. ccproxy/api/routes/proxy.py +0 -211
  410. ccproxy/api/services/__init__.py +0 -6
  411. ccproxy/auth/conditional.py +0 -84
  412. ccproxy/auth/credentials_adapter.py +0 -93
  413. ccproxy/auth/models.py +0 -118
  414. ccproxy/auth/oauth/models.py +0 -48
  415. ccproxy/auth/openai/__init__.py +0 -13
  416. ccproxy/auth/openai/credentials.py +0 -166
  417. ccproxy/auth/openai/oauth_client.py +0 -334
  418. ccproxy/auth/openai/storage.py +0 -184
  419. ccproxy/auth/storage/json_file.py +0 -158
  420. ccproxy/auth/storage/keyring.py +0 -189
  421. ccproxy/claude_sdk/__init__.py +0 -18
  422. ccproxy/claude_sdk/options.py +0 -194
  423. ccproxy/claude_sdk/session_pool.py +0 -550
  424. ccproxy/cli/docker/__init__.py +0 -34
  425. ccproxy/cli/docker/adapter_factory.py +0 -157
  426. ccproxy/cli/docker/params.py +0 -274
  427. ccproxy/config/auth.py +0 -153
  428. ccproxy/config/claude.py +0 -348
  429. ccproxy/config/cors.py +0 -79
  430. ccproxy/config/discovery.py +0 -95
  431. ccproxy/config/docker_settings.py +0 -264
  432. ccproxy/config/observability.py +0 -158
  433. ccproxy/config/reverse_proxy.py +0 -31
  434. ccproxy/config/scheduler.py +0 -108
  435. ccproxy/config/server.py +0 -86
  436. ccproxy/config/validators.py +0 -231
  437. ccproxy/core/codex_transformers.py +0 -389
  438. ccproxy/core/http.py +0 -328
  439. ccproxy/core/http_transformers.py +0 -812
  440. ccproxy/core/proxy.py +0 -143
  441. ccproxy/core/validators.py +0 -288
  442. ccproxy/models/errors.py +0 -42
  443. ccproxy/models/messages.py +0 -269
  444. ccproxy/models/requests.py +0 -107
  445. ccproxy/models/responses.py +0 -270
  446. ccproxy/models/types.py +0 -102
  447. ccproxy/observability/__init__.py +0 -51
  448. ccproxy/observability/access_logger.py +0 -457
  449. ccproxy/observability/sse_events.py +0 -303
  450. ccproxy/observability/stats_printer.py +0 -753
  451. ccproxy/observability/storage/__init__.py +0 -1
  452. ccproxy/observability/storage/duckdb_simple.py +0 -677
  453. ccproxy/observability/storage/models.py +0 -70
  454. ccproxy/observability/streaming_response.py +0 -107
  455. ccproxy/pricing/__init__.py +0 -19
  456. ccproxy/pricing/loader.py +0 -251
  457. ccproxy/services/claude_detection_service.py +0 -243
  458. ccproxy/services/codex_detection_service.py +0 -252
  459. ccproxy/services/credentials/__init__.py +0 -55
  460. ccproxy/services/credentials/config.py +0 -105
  461. ccproxy/services/credentials/manager.py +0 -561
  462. ccproxy/services/credentials/oauth_client.py +0 -481
  463. ccproxy/services/proxy_service.py +0 -1827
  464. ccproxy/static/.keep +0 -0
  465. ccproxy/utils/cost_calculator.py +0 -210
  466. ccproxy/utils/disconnection_monitor.py +0 -83
  467. ccproxy/utils/model_mapping.py +0 -199
  468. ccproxy/utils/models_provider.py +0 -150
  469. ccproxy/utils/simple_request_logger.py +0 -284
  470. ccproxy/utils/streaming_metrics.py +0 -199
  471. ccproxy_api-0.1.7.dist-info/METADATA +0 -615
  472. ccproxy_api-0.1.7.dist-info/RECORD +0 -191
  473. ccproxy_api-0.1.7.dist-info/entry_points.txt +0 -4
  474. /ccproxy/{api/middleware/auth.py → auth/models/__init__.py} +0 -0
  475. /ccproxy/{claude_sdk → plugins/claude_sdk}/exceptions.py +0 -0
  476. /ccproxy/{docker → plugins/docker}/models.py +0 -0
  477. /ccproxy/{docker → plugins/docker}/protocol.py +0 -0
  478. /ccproxy/{docker → plugins/docker}/validators.py +0 -0
  479. /ccproxy/{auth/oauth/storage.py → plugins/permissions/handlers/__init__.py} +0 -0
  480. /ccproxy/{api → plugins/permissions}/ui/__init__.py +0 -0
  481. {ccproxy_api-0.1.7.dist-info → ccproxy_api-0.2.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -1,1827 +0,0 @@
1
- """Proxy service for orchestrating Claude API requests with business logic."""
2
-
3
- import asyncio
4
- import json
5
- import os
6
- import random
7
- import time
8
- from collections.abc import AsyncGenerator
9
- from pathlib import Path
10
- from typing import TYPE_CHECKING, Any
11
-
12
- import httpx
13
- import structlog
14
- from fastapi import HTTPException, Request
15
- from fastapi.responses import StreamingResponse
16
- from starlette.responses import Response
17
- from typing_extensions import TypedDict
18
-
19
- from ccproxy.config.settings import Settings
20
- from ccproxy.core.codex_transformers import CodexRequestTransformer
21
- from ccproxy.core.http import BaseProxyClient
22
- from ccproxy.core.http_transformers import (
23
- HTTPRequestTransformer,
24
- HTTPResponseTransformer,
25
- )
26
- from ccproxy.observability import (
27
- PrometheusMetrics,
28
- get_metrics,
29
- request_context,
30
- timed_operation,
31
- )
32
- from ccproxy.observability.access_logger import log_request_access
33
- from ccproxy.observability.streaming_response import StreamingResponseWithLogging
34
- from ccproxy.services.credentials.manager import CredentialsManager
35
- from ccproxy.testing import RealisticMockResponseGenerator
36
- from ccproxy.utils.simple_request_logger import (
37
- append_streaming_log,
38
- write_request_log,
39
- )
40
-
41
-
42
- if TYPE_CHECKING:
43
- from ccproxy.observability.context import RequestContext
44
-
45
-
46
- class RequestData(TypedDict):
47
- """Typed structure for transformed request data."""
48
-
49
- method: str
50
- url: str
51
- headers: dict[str, str]
52
- body: bytes | None
53
-
54
-
55
- class ResponseData(TypedDict):
56
- """Typed structure for transformed response data."""
57
-
58
- status_code: int
59
- headers: dict[str, str]
60
- body: bytes
61
-
62
-
63
- logger = structlog.get_logger(__name__)
64
-
65
-
66
- class ProxyService:
67
- """Claude-specific proxy orchestration with business logic.
68
-
69
- This service orchestrates the complete proxy flow including:
70
- - Authentication management
71
- - Request/response transformations
72
- - Metrics collection (future)
73
- - Error handling and logging
74
-
75
- Pure HTTP forwarding is delegated to BaseProxyClient.
76
- """
77
-
78
- SENSITIVE_HEADERS = {"authorization", "x-api-key", "cookie", "set-cookie"}
79
-
80
- def __init__(
81
- self,
82
- proxy_client: BaseProxyClient,
83
- credentials_manager: CredentialsManager,
84
- settings: Settings,
85
- proxy_mode: str = "full",
86
- target_base_url: str = "https://api.anthropic.com",
87
- metrics: PrometheusMetrics | None = None,
88
- app_state: Any = None,
89
- ) -> None:
90
- """Initialize the proxy service.
91
-
92
- Args:
93
- proxy_client: HTTP client for pure forwarding
94
- credentials_manager: Authentication manager
95
- settings: Application settings
96
- proxy_mode: Transformation mode - "minimal" or "full"
97
- target_base_url: Base URL for the target API
98
- metrics: Prometheus metrics collector (optional)
99
- app_state: FastAPI app state for accessing detection data
100
- """
101
- self.proxy_client = proxy_client
102
- self.credentials_manager = credentials_manager
103
- self.settings = settings
104
- self.proxy_mode = proxy_mode
105
- self.target_base_url = target_base_url.rstrip("/")
106
- self.metrics = metrics or get_metrics()
107
- self.app_state = app_state
108
-
109
- # Create concrete transformers
110
- self.request_transformer = HTTPRequestTransformer()
111
- self.response_transformer = HTTPResponseTransformer()
112
- self.codex_transformer = CodexRequestTransformer()
113
-
114
- # Create OpenAI adapter for stream transformation
115
- from ccproxy.adapters.openai.adapter import OpenAIAdapter
116
-
117
- self.openai_adapter = OpenAIAdapter()
118
-
119
- # Create mock response generator for bypass mode
120
- self.mock_generator = RealisticMockResponseGenerator()
121
-
122
- # Cache environment-based configuration
123
- self._proxy_url = self._init_proxy_url()
124
- self._ssl_context = self._init_ssl_context()
125
- self._verbose_streaming = (
126
- os.environ.get("CCPROXY_VERBOSE_STREAMING", "false").lower() == "true"
127
- )
128
- self._verbose_api = (
129
- os.environ.get("CCPROXY_VERBOSE_API", "false").lower() == "true"
130
- )
131
-
132
- def _init_proxy_url(self) -> str | None:
133
- """Initialize proxy URL from environment variables."""
134
- # Check for standard proxy environment variables
135
- # For HTTPS requests, prioritize HTTPS_PROXY
136
- https_proxy = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy")
137
- all_proxy = os.environ.get("ALL_PROXY")
138
- http_proxy = os.environ.get("HTTP_PROXY") or os.environ.get("http_proxy")
139
-
140
- proxy_url = https_proxy or all_proxy or http_proxy
141
-
142
- if proxy_url:
143
- logger.debug("proxy_configured", proxy_url=proxy_url)
144
-
145
- return proxy_url
146
-
147
- def _init_ssl_context(self) -> str | bool:
148
- """Initialize SSL context configuration from environment variables."""
149
- # Check for custom CA bundle
150
- ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get(
151
- "SSL_CERT_FILE"
152
- )
153
-
154
- # Check if SSL verification should be disabled (NOT RECOMMENDED)
155
- ssl_verify = os.environ.get("SSL_VERIFY", "true").lower()
156
-
157
- if ca_bundle and Path(ca_bundle).exists():
158
- logger.info("ca_bundle_configured", ca_bundle=ca_bundle)
159
- return ca_bundle
160
- elif ssl_verify in ("false", "0", "no"):
161
- logger.warning("ssl_verification_disabled")
162
- return False
163
- else:
164
- logger.debug("ssl_verification_default")
165
- return True
166
-
167
- async def handle_request(
168
- self,
169
- method: str,
170
- path: str,
171
- headers: dict[str, str],
172
- body: bytes | None = None,
173
- query_params: dict[str, str | list[str]] | None = None,
174
- timeout: float = 240.0,
175
- request: Request | None = None, # Optional FastAPI Request object
176
- ) -> tuple[int, dict[str, str], bytes] | StreamingResponse:
177
- """Handle a proxy request with full business logic orchestration.
178
-
179
- Args:
180
- method: HTTP method
181
- path: Request path (without /unclaude prefix)
182
- headers: Request headers
183
- body: Request body
184
- query_params: Query parameters
185
- timeout: Request timeout in seconds
186
- request: Optional FastAPI Request object for accessing request context
187
-
188
- Returns:
189
- Tuple of (status_code, headers, body) or StreamingResponse for streaming
190
-
191
- Raises:
192
- HTTPException: If request fails
193
- """
194
- # Extract request metadata
195
- model, streaming = self._extract_request_metadata(body)
196
- endpoint = path.split("/")[-1] if path else "unknown"
197
-
198
- # Use existing context from request if available, otherwise create new one
199
- if request and hasattr(request, "state") and hasattr(request.state, "context"):
200
- # Use existing context from middleware
201
- ctx = request.state.context
202
- # Add service-specific metadata
203
- ctx.add_metadata(
204
- endpoint=endpoint,
205
- model=model,
206
- streaming=streaming,
207
- service_type="proxy_service",
208
- )
209
- # Create a context manager that preserves the existing context's lifecycle
210
- # This ensures __aexit__ is called for proper access logging
211
- from contextlib import asynccontextmanager
212
-
213
- @asynccontextmanager
214
- async def existing_context_manager() -> AsyncGenerator[Any, None]:
215
- try:
216
- yield ctx
217
- finally:
218
- # Let the existing context handle its own lifecycle
219
- # The middleware or parent context will call __aexit__
220
- pass
221
-
222
- context_manager: Any = existing_context_manager()
223
- else:
224
- # Create new context for observability
225
- context_manager = request_context(
226
- method=method,
227
- path=path,
228
- endpoint=endpoint,
229
- model=model,
230
- streaming=streaming,
231
- service_type="proxy_service",
232
- metrics=self.metrics,
233
- )
234
-
235
- async with context_manager as ctx:
236
- try:
237
- # 1. Authentication - get access token
238
- async with timed_operation("oauth_token", ctx.request_id):
239
- logger.debug("oauth_token_retrieval_start")
240
- access_token = await self._get_access_token()
241
-
242
- # 2. Request transformation
243
- async with timed_operation("request_transform", ctx.request_id):
244
- injection_mode = (
245
- self.settings.claude.system_prompt_injection_mode.value
246
- )
247
- logger.debug(
248
- "request_transform_start",
249
- system_prompt_injection_mode=injection_mode,
250
- )
251
- transformed_request = (
252
- await self.request_transformer.transform_proxy_request(
253
- method,
254
- path,
255
- headers,
256
- body,
257
- query_params,
258
- access_token,
259
- self.target_base_url,
260
- self.app_state,
261
- injection_mode,
262
- )
263
- )
264
-
265
- # 3. Check for bypass header to skip upstream forwarding
266
- bypass_upstream = (
267
- headers.get("X-CCProxy-Bypass-Upstream", "").lower() == "true"
268
- )
269
-
270
- if bypass_upstream:
271
- logger.debug("bypassing_upstream_forwarding_due_to_header")
272
- # Determine message type from request body for realistic response generation
273
- message_type = self._extract_message_type_from_body(body)
274
-
275
- # Check if this will be a streaming response
276
- should_stream = streaming or self._should_stream_response(
277
- transformed_request["headers"]
278
- )
279
-
280
- # Determine response format based on original request path
281
- is_openai_format = self.response_transformer._is_openai_request(
282
- path
283
- )
284
-
285
- if should_stream:
286
- return await self._generate_bypass_streaming_response(
287
- model, is_openai_format, ctx, message_type
288
- )
289
- else:
290
- return await self._generate_bypass_standard_response(
291
- model, is_openai_format, ctx, message_type
292
- )
293
-
294
- # 3. Forward request using proxy client
295
- logger.debug("request_forwarding_start", url=transformed_request["url"])
296
-
297
- # Check if this will be a streaming response
298
- should_stream = streaming or self._should_stream_response(
299
- transformed_request["headers"]
300
- )
301
-
302
- if should_stream:
303
- logger.debug("streaming_response_detected")
304
- return await self._handle_streaming_request(
305
- transformed_request, path, timeout, ctx
306
- )
307
- else:
308
- logger.debug("non_streaming_response_detected")
309
-
310
- # Log the outgoing request if verbose API logging is enabled
311
- await self._log_verbose_api_request(transformed_request, ctx)
312
-
313
- # Handle regular request
314
- async with timed_operation("api_call", ctx.request_id) as api_op:
315
- start_time = time.perf_counter()
316
-
317
- (
318
- status_code,
319
- response_headers,
320
- response_body,
321
- ) = await self.proxy_client.forward(
322
- method=transformed_request["method"],
323
- url=transformed_request["url"],
324
- headers=transformed_request["headers"],
325
- body=transformed_request["body"],
326
- timeout=timeout,
327
- )
328
-
329
- end_time = time.perf_counter()
330
- api_duration = end_time - start_time
331
- api_op["duration_seconds"] = api_duration
332
-
333
- # Log the received response if verbose API logging is enabled
334
- await self._log_verbose_api_response(
335
- status_code, response_headers, response_body, ctx
336
- )
337
-
338
- # 4. Response transformation
339
- async with timed_operation("response_transform", ctx.request_id):
340
- logger.debug("response_transform_start")
341
- # For error responses, transform to OpenAI format if needed
342
- transformed_response: ResponseData
343
- if status_code >= 400:
344
- logger.info(
345
- "upstream_error_received",
346
- status_code=status_code,
347
- has_body=bool(response_body),
348
- content_length=len(response_body) if response_body else 0,
349
- )
350
-
351
- # Use transformer to handle error transformation (including OpenAI format)
352
- transformed_response = (
353
- await self.response_transformer.transform_proxy_response(
354
- status_code,
355
- response_headers,
356
- response_body,
357
- path,
358
- self.proxy_mode,
359
- )
360
- )
361
- else:
362
- transformed_response = (
363
- await self.response_transformer.transform_proxy_response(
364
- status_code,
365
- response_headers,
366
- response_body,
367
- path,
368
- self.proxy_mode,
369
- )
370
- )
371
-
372
- # 5. Extract response metrics using direct JSON parsing
373
- tokens_input = tokens_output = cache_read_tokens = (
374
- cache_write_tokens
375
- ) = cost_usd = None
376
- if transformed_response["body"]:
377
- try:
378
- response_data = json.loads(
379
- transformed_response["body"].decode("utf-8")
380
- )
381
- usage = response_data.get("usage", {})
382
- tokens_input = usage.get("input_tokens")
383
- tokens_output = usage.get("output_tokens")
384
- cache_read_tokens = usage.get("cache_read_input_tokens")
385
- cache_write_tokens = usage.get("cache_creation_input_tokens")
386
-
387
- # Calculate cost including cache tokens if we have tokens and model
388
- from ccproxy.utils.cost_calculator import calculate_token_cost
389
-
390
- cost_usd = calculate_token_cost(
391
- tokens_input,
392
- tokens_output,
393
- model,
394
- cache_read_tokens,
395
- cache_write_tokens,
396
- )
397
- except (json.JSONDecodeError, UnicodeDecodeError):
398
- pass # Keep all values as None if parsing fails
399
-
400
- # 6. Update context with response data
401
- ctx.add_metadata(
402
- status_code=status_code,
403
- tokens_input=tokens_input,
404
- tokens_output=tokens_output,
405
- cache_read_tokens=cache_read_tokens,
406
- cache_write_tokens=cache_write_tokens,
407
- cost_usd=cost_usd,
408
- )
409
-
410
- return (
411
- transformed_response["status_code"],
412
- transformed_response["headers"],
413
- transformed_response["body"],
414
- )
415
-
416
- except Exception as e:
417
- ctx.add_metadata(error=e)
418
- raise
419
-
420
- async def handle_codex_request(
421
- self,
422
- method: str,
423
- path: str,
424
- session_id: str,
425
- access_token: str,
426
- request: Request,
427
- settings: Settings,
428
- ) -> StreamingResponse | Response:
429
- """Handle OpenAI Codex proxy request with request/response capture.
430
-
431
- Args:
432
- method: HTTP method
433
- path: Request path (e.g., "/responses" or "/{session_id}/responses")
434
- session_id: Resolved session ID
435
- access_token: OpenAI access token
436
- request: FastAPI request object
437
- settings: Application settings
438
-
439
- Returns:
440
- StreamingResponse or regular Response
441
- """
442
- try:
443
- # Read request body - check if already stored by middleware
444
- if hasattr(request.state, "body"):
445
- body = request.state.body
446
- else:
447
- body = await request.body()
448
-
449
- # Parse request data to capture the instructions field and other metadata
450
- request_data = None
451
- try:
452
- request_data = json.loads(body.decode("utf-8")) if body else {}
453
- except (json.JSONDecodeError, UnicodeDecodeError) as e:
454
- request_data = {}
455
- logger.warning(
456
- "codex_json_decode_failed",
457
- error=str(e),
458
- body_preview=body[:100].decode("utf-8", errors="replace")
459
- if body
460
- else None,
461
- body_length=len(body) if body else 0,
462
- )
463
-
464
- # Parse request to extract account_id from token if available
465
- import jwt
466
-
467
- account_id = "unknown"
468
- try:
469
- decoded = jwt.decode(access_token, options={"verify_signature": False})
470
- account_id = decoded.get(
471
- "org_id", decoded.get("sub", decoded.get("account_id", "unknown"))
472
- )
473
- except Exception:
474
- pass
475
-
476
- # Get Codex detection data from app state
477
- codex_detection_data = None
478
- if self.app_state and hasattr(self.app_state, "codex_detection_data"):
479
- codex_detection_data = self.app_state.codex_detection_data
480
-
481
- # Use CodexRequestTransformer to build request
482
- original_headers = dict(request.headers)
483
- transformed_request = await self.codex_transformer.transform_codex_request(
484
- method=method,
485
- path=path,
486
- headers=original_headers,
487
- body=body,
488
- access_token=access_token,
489
- session_id=session_id,
490
- account_id=account_id,
491
- codex_detection_data=codex_detection_data,
492
- target_base_url=settings.codex.base_url,
493
- )
494
-
495
- target_url = transformed_request["url"]
496
- headers = transformed_request["headers"]
497
- transformed_body = transformed_request["body"] or body
498
-
499
- # Parse transformed body for logging
500
- transformed_request_data = request_data
501
- if transformed_body and transformed_body != body:
502
- try:
503
- transformed_request_data = json.loads(
504
- transformed_body.decode("utf-8")
505
- )
506
- except (json.JSONDecodeError, UnicodeDecodeError):
507
- transformed_request_data = request_data
508
-
509
- # Generate request ID for logging
510
- from uuid import uuid4
511
-
512
- request_id = f"codex_{uuid4().hex[:8]}"
513
-
514
- # Log Codex request (including instructions field and headers)
515
- await self._log_codex_request(
516
- request_id=request_id,
517
- method=method,
518
- url=target_url,
519
- headers=headers,
520
- body_data=transformed_request_data,
521
- session_id=session_id,
522
- )
523
-
524
- # Check if user explicitly requested streaming (from original request)
525
- user_requested_streaming = self.codex_transformer._is_streaming_request(
526
- body
527
- )
528
-
529
- # Forward request to ChatGPT backend
530
- if user_requested_streaming:
531
- # Handle streaming request with proper context management
532
- # First, collect the response to check for errors
533
- collected_chunks = []
534
- chunk_count = 0
535
- total_bytes = 0
536
- response_status_code = 200
537
- response_headers = {}
538
-
539
- async def stream_codex_response() -> AsyncGenerator[bytes, None]:
540
- nonlocal \
541
- collected_chunks, \
542
- chunk_count, \
543
- total_bytes, \
544
- response_status_code, \
545
- response_headers
546
-
547
- logger.debug(
548
- "proxy_service_streaming_started",
549
- request_id=request_id,
550
- session_id=session_id,
551
- )
552
-
553
- async with (
554
- httpx.AsyncClient(timeout=240.0) as client,
555
- client.stream(
556
- method=method,
557
- url=target_url,
558
- headers=headers,
559
- content=transformed_body,
560
- ) as response,
561
- ):
562
- # Capture response info for error checking
563
- response_status_code = response.status_code
564
- response_headers = dict(response.headers)
565
-
566
- # Log response headers for streaming
567
- await self._log_codex_response_headers(
568
- request_id=request_id,
569
- status_code=response.status_code,
570
- headers=dict(response.headers),
571
- stream_type="codex_sse",
572
- )
573
-
574
- # Check if upstream actually returned streaming
575
- content_type = response.headers.get("content-type", "")
576
- is_streaming = "text/event-stream" in content_type
577
-
578
- if not is_streaming:
579
- logger.warning(
580
- "codex_expected_streaming_but_got_regular",
581
- content_type=content_type,
582
- status_code=response.status_code,
583
- )
584
-
585
- async for chunk in response.aiter_bytes():
586
- chunk_count += 1
587
- chunk_size = len(chunk)
588
- total_bytes += chunk_size
589
- collected_chunks.append(chunk)
590
-
591
- logger.debug(
592
- "proxy_service_streaming_chunk",
593
- request_id=request_id,
594
- chunk_number=chunk_count,
595
- chunk_size=chunk_size,
596
- total_bytes=total_bytes,
597
- )
598
-
599
- yield chunk
600
-
601
- logger.debug(
602
- "proxy_service_streaming_complete",
603
- request_id=request_id,
604
- total_chunks=chunk_count,
605
- total_bytes=total_bytes,
606
- )
607
-
608
- # Log the complete stream data after streaming finishes
609
- await self._log_codex_streaming_complete(
610
- request_id=request_id,
611
- chunks=collected_chunks,
612
- )
613
-
614
- # Execute the stream generator to collect the response
615
- generator_chunks = []
616
- async for chunk in stream_codex_response():
617
- generator_chunks.append(chunk)
618
-
619
- # Now check if this should be an error response
620
- content_type = response_headers.get("content-type", "")
621
- if (
622
- response_status_code >= 400
623
- and "text/event-stream" not in content_type
624
- ):
625
- # Return error as regular Response with proper status code
626
- error_content = b"".join(collected_chunks)
627
- logger.warning(
628
- "codex_returning_error_as_regular_response",
629
- status_code=response_status_code,
630
- content_type=content_type,
631
- content_preview=error_content[:200].decode(
632
- "utf-8", errors="replace"
633
- ),
634
- )
635
- return Response(
636
- content=error_content,
637
- status_code=response_status_code,
638
- headers=response_headers,
639
- )
640
-
641
- # Return normal streaming response
642
- async def replay_stream() -> AsyncGenerator[bytes, None]:
643
- for chunk in generator_chunks:
644
- yield chunk
645
-
646
- # Forward upstream headers but filter out incompatible ones for streaming
647
- streaming_headers = dict(response_headers)
648
- # Remove headers that conflict with streaming responses
649
- streaming_headers.pop("content-length", None)
650
- streaming_headers.pop("content-encoding", None)
651
- streaming_headers.pop("date", None)
652
- # Set streaming-specific headers
653
- streaming_headers.update(
654
- {
655
- "content-type": "text/event-stream",
656
- "cache-control": "no-cache",
657
- "connection": "keep-alive",
658
- }
659
- )
660
-
661
- return StreamingResponse(
662
- replay_stream(),
663
- media_type="text/event-stream",
664
- headers=streaming_headers,
665
- )
666
- else:
667
- # Handle non-streaming request
668
- async with httpx.AsyncClient(timeout=240.0) as client:
669
- response = await client.request(
670
- method=method,
671
- url=target_url,
672
- headers=headers,
673
- content=transformed_body,
674
- )
675
-
676
- # Check if upstream response is streaming (shouldn't happen)
677
- content_type = response.headers.get("content-type", "")
678
- transfer_encoding = response.headers.get("transfer-encoding", "")
679
- upstream_is_streaming = "text/event-stream" in content_type or (
680
- transfer_encoding == "chunked" and content_type == ""
681
- )
682
-
683
- logger.debug(
684
- "codex_response_non_streaming",
685
- content_type=content_type,
686
- user_requested_streaming=user_requested_streaming,
687
- upstream_is_streaming=upstream_is_streaming,
688
- transfer_encoding=transfer_encoding,
689
- )
690
-
691
- if upstream_is_streaming:
692
- # Upstream is streaming but user didn't request streaming
693
- # Collect all streaming data and return as JSON
694
- logger.debug(
695
- "converting_upstream_stream_to_json", request_id=request_id
696
- )
697
-
698
- collected_chunks = []
699
- async for chunk in response.aiter_bytes():
700
- collected_chunks.append(chunk)
701
-
702
- # Combine all chunks
703
- full_content = b"".join(collected_chunks)
704
-
705
- # Try to parse the streaming data and extract the final response
706
- try:
707
- # Parse SSE data to extract JSON response
708
- content_str = full_content.decode("utf-8")
709
- lines = content_str.strip().split("\n")
710
-
711
- # Look for the last data line with JSON content
712
- final_json = None
713
- for line in reversed(lines):
714
- if line.startswith("data: ") and not line.endswith(
715
- "[DONE]"
716
- ):
717
- try:
718
- json_str = line[6:] # Remove "data: " prefix
719
- final_json = json.loads(json_str)
720
- break
721
- except json.JSONDecodeError:
722
- continue
723
-
724
- if final_json:
725
- response_content = json.dumps(final_json).encode(
726
- "utf-8"
727
- )
728
- else:
729
- # Fallback: return the raw content
730
- response_content = full_content
731
-
732
- except (UnicodeDecodeError, json.JSONDecodeError):
733
- # Fallback: return raw content
734
- response_content = full_content
735
-
736
- # Log the complete response
737
- try:
738
- response_data = json.loads(response_content.decode("utf-8"))
739
- except (json.JSONDecodeError, UnicodeDecodeError):
740
- response_data = {
741
- "raw_content": response_content.decode(
742
- "utf-8", errors="replace"
743
- )
744
- }
745
-
746
- await self._log_codex_response(
747
- request_id=request_id,
748
- status_code=response.status_code,
749
- headers=dict(response.headers),
750
- body_data=response_data,
751
- )
752
-
753
- # Return as JSON response
754
- return Response(
755
- content=response_content,
756
- status_code=response.status_code,
757
- headers={
758
- "content-type": "application/json",
759
- "content-length": str(len(response_content)),
760
- },
761
- media_type="application/json",
762
- )
763
- else:
764
- # For regular non-streaming responses
765
- response_data = None
766
- try:
767
- response_data = (
768
- json.loads(response.content.decode("utf-8"))
769
- if response.content
770
- else {}
771
- )
772
- except (json.JSONDecodeError, UnicodeDecodeError):
773
- response_data = {
774
- "raw_content": response.content.decode(
775
- "utf-8", errors="replace"
776
- )
777
- }
778
-
779
- await self._log_codex_response(
780
- request_id=request_id,
781
- status_code=response.status_code,
782
- headers=dict(response.headers),
783
- body_data=response_data,
784
- )
785
-
786
- # Return regular response
787
- return Response(
788
- content=response.content,
789
- status_code=response.status_code,
790
- headers=dict(response.headers),
791
- media_type=response.headers.get("content-type"),
792
- )
793
-
794
- except Exception as e:
795
- logger.error("Codex request failed", error=str(e), session_id=session_id)
796
- raise
797
-
798
- async def _get_access_token(self) -> str:
799
- """Get access token for upstream authentication.
800
-
801
- Uses OAuth credentials from Claude CLI for upstream authentication.
802
-
803
- NOTE: The SECURITY__AUTH_TOKEN is only for authenticating incoming requests,
804
- not for upstream authentication.
805
-
806
- Returns:
807
- Valid access token
808
-
809
- Raises:
810
- HTTPException: If no valid token is available
811
- """
812
- # Always use OAuth credentials for upstream authentication
813
- # The SECURITY__AUTH_TOKEN is only for client authentication, not upstream
814
- try:
815
- access_token = await self.credentials_manager.get_access_token()
816
- if not access_token:
817
- logger.error("oauth_token_unavailable")
818
-
819
- # Try to get more details about credential status
820
- try:
821
- validation = await self.credentials_manager.validate()
822
-
823
- if (
824
- validation.valid
825
- and validation.expired
826
- and validation.credentials
827
- ):
828
- logger.debug(
829
- "oauth_token_expired",
830
- expired_at=str(
831
- validation.credentials.claude_ai_oauth.expires_at
832
- ),
833
- )
834
- except Exception as e:
835
- logger.debug(
836
- "credential_check_failed",
837
- error=str(e),
838
- exc_info=True,
839
- )
840
-
841
- raise HTTPException(
842
- status_code=401,
843
- detail="No valid OAuth credentials found. Please run 'ccproxy auth login'.",
844
- )
845
-
846
- logger.debug("oauth_token_retrieved")
847
- return access_token
848
-
849
- except HTTPException:
850
- raise
851
- except Exception as e:
852
- logger.error("oauth_token_retrieval_failed", error=str(e), exc_info=True)
853
- raise HTTPException(
854
- status_code=401,
855
- detail="Authentication failed",
856
- ) from e
857
-
858
- def _redact_headers(self, headers: dict[str, str]) -> dict[str, str]:
859
- """Redact sensitive information from headers for safe logging."""
860
- return {
861
- k: "[REDACTED]" if k.lower() in self.SENSITIVE_HEADERS else v
862
- for k, v in headers.items()
863
- }
864
-
865
- async def _log_verbose_api_request(
866
- self, request_data: RequestData, ctx: "RequestContext"
867
- ) -> None:
868
- """Log details of an outgoing API request if verbose logging is enabled."""
869
- if not self._verbose_api:
870
- return
871
-
872
- body = request_data.get("body")
873
- body_preview = ""
874
- full_body = None
875
- if body:
876
- try:
877
- full_body = body.decode("utf-8", errors="replace")
878
- # Truncate at 1024 chars for readability
879
- body_preview = full_body[:1024]
880
- # Try to parse as JSON for better formatting
881
- try:
882
- import json
883
-
884
- full_body = json.loads(full_body)
885
- except json.JSONDecodeError:
886
- pass # Keep as string
887
- except Exception:
888
- body_preview = f"<binary data of length {len(body)}>"
889
-
890
- logger.info(
891
- "verbose_api_request",
892
- method=request_data["method"],
893
- url=request_data["url"],
894
- headers=self._redact_headers(request_data["headers"]),
895
- body_size=len(body) if body else 0,
896
- body_preview=body_preview,
897
- )
898
-
899
- # Use new request logging system
900
- request_id = ctx.request_id
901
- timestamp = ctx.get_log_timestamp_prefix()
902
- await write_request_log(
903
- request_id=request_id,
904
- log_type="upstream_request",
905
- data={
906
- "method": request_data["method"],
907
- "url": request_data["url"],
908
- "headers": dict(request_data["headers"]), # Don't redact in file
909
- "body": full_body,
910
- },
911
- timestamp=timestamp,
912
- )
913
-
914
- async def _log_verbose_api_response(
915
- self,
916
- status_code: int,
917
- headers: dict[str, str],
918
- body: bytes,
919
- ctx: "RequestContext",
920
- ) -> None:
921
- """Log details of a received API response if verbose logging is enabled."""
922
- if not self._verbose_api:
923
- return
924
-
925
- body_preview = ""
926
- if body:
927
- try:
928
- # Truncate at 1024 chars for readability
929
- body_preview = body.decode("utf-8", errors="replace")[:1024]
930
- except Exception:
931
- body_preview = f"<binary data of length {len(body)}>"
932
-
933
- logger.info(
934
- "verbose_api_response",
935
- status_code=status_code,
936
- headers=self._redact_headers(headers),
937
- body_size=len(body),
938
- body_preview=body_preview,
939
- )
940
-
941
- # Use new request logging system
942
- full_body = None
943
- if body:
944
- try:
945
- full_body_str = body.decode("utf-8", errors="replace")
946
- # Try to parse as JSON for better formatting
947
- try:
948
- full_body = json.loads(full_body_str)
949
- except json.JSONDecodeError:
950
- full_body = full_body_str
951
- except Exception:
952
- full_body = f"<binary data of length {len(body)}>"
953
-
954
- # Use new request logging system
955
- request_id = ctx.request_id
956
- timestamp = ctx.get_log_timestamp_prefix()
957
- await write_request_log(
958
- request_id=request_id,
959
- log_type="upstream_response",
960
- data={
961
- "status_code": status_code,
962
- "headers": dict(headers), # Don't redact in file
963
- "body": full_body,
964
- },
965
- timestamp=timestamp,
966
- )
967
-
968
- async def _log_codex_request(
969
- self,
970
- request_id: str,
971
- method: str,
972
- url: str,
973
- headers: dict[str, str],
974
- body_data: dict[str, Any] | None,
975
- session_id: str,
976
- ) -> None:
977
- """Log outgoing Codex request preserving instructions field exactly."""
978
- if not self._verbose_api:
979
- return
980
-
981
- # Log to console with redacted headers
982
- logger.info(
983
- "verbose_codex_request",
984
- request_id=request_id,
985
- method=method,
986
- url=url,
987
- headers=self._redact_headers(headers),
988
- session_id=session_id,
989
- instructions_preview=(
990
- body_data.get("instructions", "")[:100] + "..."
991
- if body_data and body_data.get("instructions")
992
- else None
993
- ),
994
- )
995
-
996
- # Save complete request to file (without redaction)
997
- timestamp = time.strftime("%Y%m%d_%H%M%S")
998
- await write_request_log(
999
- request_id=request_id,
1000
- log_type="codex_request",
1001
- data={
1002
- "method": method,
1003
- "url": url,
1004
- "headers": dict(headers),
1005
- "body": body_data,
1006
- "session_id": session_id,
1007
- },
1008
- timestamp=timestamp,
1009
- )
1010
-
1011
- async def _log_codex_response(
1012
- self,
1013
- request_id: str,
1014
- status_code: int,
1015
- headers: dict[str, str],
1016
- body_data: dict[str, Any] | None,
1017
- ) -> None:
1018
- """Log complete non-streaming Codex response."""
1019
- if not self._verbose_api:
1020
- return
1021
-
1022
- # Log to console with redacted headers
1023
- logger.info(
1024
- "verbose_codex_response",
1025
- request_id=request_id,
1026
- status_code=status_code,
1027
- headers=self._redact_headers(headers),
1028
- response_type="non_streaming",
1029
- )
1030
-
1031
- # Save complete response to file
1032
- timestamp = time.strftime("%Y%m%d_%H%M%S")
1033
- await write_request_log(
1034
- request_id=request_id,
1035
- log_type="codex_response",
1036
- data={
1037
- "status_code": status_code,
1038
- "headers": dict(headers),
1039
- "body": body_data,
1040
- },
1041
- timestamp=timestamp,
1042
- )
1043
-
1044
- async def _log_codex_response_headers(
1045
- self,
1046
- request_id: str,
1047
- status_code: int,
1048
- headers: dict[str, str],
1049
- stream_type: str,
1050
- ) -> None:
1051
- """Log streaming Codex response headers."""
1052
- if not self._verbose_api:
1053
- return
1054
-
1055
- # Log to console with redacted headers
1056
- logger.info(
1057
- "verbose_codex_response_headers",
1058
- request_id=request_id,
1059
- status_code=status_code,
1060
- headers=self._redact_headers(headers),
1061
- stream_type=stream_type,
1062
- )
1063
-
1064
- # Save response headers to file
1065
- timestamp = time.strftime("%Y%m%d_%H%M%S")
1066
- await write_request_log(
1067
- request_id=request_id,
1068
- log_type="codex_response_headers",
1069
- data={
1070
- "status_code": status_code,
1071
- "headers": dict(headers),
1072
- "stream_type": stream_type,
1073
- },
1074
- timestamp=timestamp,
1075
- )
1076
-
1077
- async def _log_codex_streaming_complete(
1078
- self,
1079
- request_id: str,
1080
- chunks: list[bytes],
1081
- ) -> None:
1082
- """Log complete streaming data after stream finishes."""
1083
- if not self._verbose_api:
1084
- return
1085
-
1086
- # Combine chunks and decode for analysis
1087
- complete_data = b"".join(chunks)
1088
- try:
1089
- decoded_data = complete_data.decode("utf-8", errors="replace")
1090
- except Exception:
1091
- decoded_data = f"<binary data of length {len(complete_data)}>"
1092
-
1093
- # Log to console with preview
1094
- logger.info(
1095
- "verbose_codex_streaming_complete",
1096
- request_id=request_id,
1097
- total_bytes=len(complete_data),
1098
- chunk_count=len(chunks),
1099
- data_preview=decoded_data[:200] + "..."
1100
- if len(decoded_data) > 200
1101
- else decoded_data,
1102
- )
1103
-
1104
- # Save complete streaming data to file
1105
- timestamp = time.strftime("%Y%m%d_%H%M%S")
1106
- await write_request_log(
1107
- request_id=request_id,
1108
- log_type="codex_streaming_complete",
1109
- data={
1110
- "total_bytes": len(complete_data),
1111
- "chunk_count": len(chunks),
1112
- "complete_data": decoded_data,
1113
- },
1114
- timestamp=timestamp,
1115
- )
1116
-
1117
- def _should_stream_response(self, headers: dict[str, str]) -> bool:
1118
- """Check if response should be streamed based on request headers.
1119
-
1120
- Args:
1121
- headers: Request headers
1122
-
1123
- Returns:
1124
- True if response should be streamed
1125
- """
1126
- # Check if client requested streaming
1127
- accept_header = headers.get("accept", "").lower()
1128
- should_stream = (
1129
- "text/event-stream" in accept_header or "stream" in accept_header
1130
- )
1131
- logger.debug(
1132
- "stream_check_completed",
1133
- accept_header=accept_header,
1134
- should_stream=should_stream,
1135
- )
1136
- return should_stream
1137
-
1138
- def _extract_request_metadata(self, body: bytes | None) -> tuple[str | None, bool]:
1139
- """Extract model and streaming flag from request body.
1140
-
1141
- Args:
1142
- body: Request body
1143
-
1144
- Returns:
1145
- Tuple of (model, streaming)
1146
- """
1147
- if not body:
1148
- return None, False
1149
-
1150
- try:
1151
- body_data = json.loads(body.decode("utf-8"))
1152
- model = body_data.get("model")
1153
- streaming = body_data.get("stream", False)
1154
- return model, streaming
1155
- except (json.JSONDecodeError, UnicodeDecodeError):
1156
- return None, False
1157
-
1158
- async def _handle_streaming_request(
1159
- self,
1160
- request_data: RequestData,
1161
- original_path: str,
1162
- timeout: float,
1163
- ctx: "RequestContext",
1164
- ) -> StreamingResponse | tuple[int, dict[str, str], bytes]:
1165
- """Handle streaming request with transformation.
1166
-
1167
- Args:
1168
- request_data: Transformed request data
1169
- original_path: Original request path for context
1170
- timeout: Request timeout
1171
- ctx: Request context for observability
1172
-
1173
- Returns:
1174
- StreamingResponse or error response tuple
1175
- """
1176
- # Log the outgoing request if verbose API logging is enabled
1177
- await self._log_verbose_api_request(request_data, ctx)
1178
-
1179
- # First, make the request and check for errors before streaming
1180
- proxy_url = self._proxy_url
1181
- verify = self._ssl_context
1182
-
1183
- async with httpx.AsyncClient(
1184
- timeout=timeout, proxy=proxy_url, verify=verify
1185
- ) as client:
1186
- # Start the request to get headers
1187
- response = await client.send(
1188
- client.build_request(
1189
- method=request_data["method"],
1190
- url=request_data["url"],
1191
- headers=request_data["headers"],
1192
- content=request_data["body"],
1193
- ),
1194
- stream=True,
1195
- )
1196
-
1197
- # Check for errors before starting to stream
1198
- if response.status_code >= 400:
1199
- error_content = await response.aread()
1200
-
1201
- # Log the full error response body
1202
- await self._log_verbose_api_response(
1203
- response.status_code, dict(response.headers), error_content, ctx
1204
- )
1205
-
1206
- logger.info(
1207
- "streaming_error_received",
1208
- status_code=response.status_code,
1209
- error_detail=error_content.decode("utf-8", errors="replace"),
1210
- )
1211
-
1212
- # Use transformer to handle error transformation (including OpenAI format)
1213
- transformed_error_response = (
1214
- await self.response_transformer.transform_proxy_response(
1215
- response.status_code,
1216
- dict(response.headers),
1217
- error_content,
1218
- original_path,
1219
- self.proxy_mode,
1220
- )
1221
- )
1222
- transformed_error_body = transformed_error_response["body"]
1223
-
1224
- # Update context with error status
1225
- ctx.add_metadata(status_code=response.status_code)
1226
-
1227
- # Log access log for error
1228
- from ccproxy.observability.access_logger import log_request_access
1229
-
1230
- await log_request_access(
1231
- context=ctx,
1232
- status_code=response.status_code,
1233
- method=request_data["method"],
1234
- metrics=self.metrics,
1235
- )
1236
-
1237
- # Return error as regular response
1238
- return (
1239
- response.status_code,
1240
- dict(response.headers),
1241
- transformed_error_body,
1242
- )
1243
-
1244
- # If no error, proceed with streaming
1245
- # Make initial request to get headers
1246
- proxy_url = self._proxy_url
1247
- verify = self._ssl_context
1248
-
1249
- response_headers = {}
1250
- response_status = 200
1251
-
1252
- async with httpx.AsyncClient(
1253
- timeout=timeout, proxy=proxy_url, verify=verify
1254
- ) as client:
1255
- # Make initial request to capture headers
1256
- initial_response = await client.send(
1257
- client.build_request(
1258
- method=request_data["method"],
1259
- url=request_data["url"],
1260
- headers=request_data["headers"],
1261
- content=request_data["body"],
1262
- ),
1263
- stream=True,
1264
- )
1265
- response_status = initial_response.status_code
1266
- response_headers = dict(initial_response.headers)
1267
-
1268
- # Close the initial response since we'll make a new one in the generator
1269
- await initial_response.aclose()
1270
-
1271
- # Initialize streaming metrics collector
1272
- from ccproxy.utils.streaming_metrics import StreamingMetricsCollector
1273
-
1274
- metrics_collector = StreamingMetricsCollector(request_id=ctx.request_id)
1275
-
1276
- async def stream_generator() -> AsyncGenerator[bytes, None]:
1277
- try:
1278
- logger.debug(
1279
- "stream_generator_start",
1280
- method=request_data["method"],
1281
- url=request_data["url"],
1282
- headers=request_data["headers"],
1283
- )
1284
-
1285
- # Use httpx directly for streaming since we need the stream context manager
1286
- # Get proxy and SSL settings from cached configuration
1287
- proxy_url = self._proxy_url
1288
- verify = self._ssl_context
1289
-
1290
- start_time = time.perf_counter()
1291
- async with (
1292
- httpx.AsyncClient(
1293
- timeout=timeout, proxy=proxy_url, verify=verify
1294
- ) as client,
1295
- client.stream(
1296
- method=request_data["method"],
1297
- url=request_data["url"],
1298
- headers=request_data["headers"],
1299
- content=request_data["body"],
1300
- ) as response,
1301
- ):
1302
- end_time = time.perf_counter()
1303
- proxy_api_call_ms = (end_time - start_time) * 1000
1304
- logger.debug(
1305
- "stream_response_received",
1306
- status_code=response.status_code,
1307
- headers=dict(response.headers),
1308
- )
1309
-
1310
- # Log initial stream response headers if verbose
1311
- if self._verbose_api:
1312
- logger.info(
1313
- "verbose_api_stream_response_start",
1314
- status_code=response.status_code,
1315
- headers=self._redact_headers(dict(response.headers)),
1316
- )
1317
-
1318
- # Store response status and headers
1319
- nonlocal response_status, response_headers
1320
- response_status = response.status_code
1321
- response_headers = dict(response.headers)
1322
-
1323
- # Log upstream response headers for streaming
1324
- if self._verbose_api:
1325
- request_id = ctx.request_id
1326
- timestamp = ctx.get_log_timestamp_prefix()
1327
- await write_request_log(
1328
- request_id=request_id,
1329
- log_type="upstream_response_headers",
1330
- data={
1331
- "status_code": response.status_code,
1332
- "headers": dict(response.headers),
1333
- "stream_type": "anthropic_sse"
1334
- if not self.response_transformer._is_openai_request(
1335
- original_path
1336
- )
1337
- else "openai_sse",
1338
- },
1339
- timestamp=timestamp,
1340
- )
1341
-
1342
- # Transform streaming response
1343
- is_openai = self.response_transformer._is_openai_request(
1344
- original_path
1345
- )
1346
- logger.debug(
1347
- "openai_format_check", is_openai=is_openai, path=original_path
1348
- )
1349
-
1350
- if is_openai:
1351
- # Transform Anthropic SSE to OpenAI SSE format using adapter
1352
- logger.debug("sse_transform_start", path=original_path)
1353
-
1354
- # Get timestamp once for all streaming chunks
1355
- request_id = ctx.request_id
1356
- timestamp = ctx.get_log_timestamp_prefix()
1357
-
1358
- async for (
1359
- transformed_chunk
1360
- ) in self._transform_anthropic_to_openai_stream(
1361
- response, original_path
1362
- ):
1363
- # Log transformed streaming chunk
1364
- await append_streaming_log(
1365
- request_id=request_id,
1366
- log_type="upstream_streaming",
1367
- data=transformed_chunk,
1368
- timestamp=timestamp,
1369
- )
1370
-
1371
- logger.debug(
1372
- "transformed_chunk_yielded",
1373
- chunk_size=len(transformed_chunk),
1374
- )
1375
- yield transformed_chunk
1376
- else:
1377
- # Stream as-is for Anthropic endpoints
1378
- logger.debug("anthropic_streaming_start")
1379
- chunk_count = 0
1380
- content_block_delta_count = 0
1381
-
1382
- # Use cached verbose streaming configuration
1383
- verbose_streaming = self._verbose_streaming
1384
-
1385
- # Get timestamp once for all streaming chunks
1386
- request_id = ctx.request_id
1387
- timestamp = ctx.get_log_timestamp_prefix()
1388
-
1389
- async for chunk in response.aiter_bytes():
1390
- if chunk:
1391
- chunk_count += 1
1392
-
1393
- # Log raw streaming chunk
1394
- await append_streaming_log(
1395
- request_id=request_id,
1396
- log_type="upstream_streaming",
1397
- data=chunk,
1398
- timestamp=timestamp,
1399
- )
1400
-
1401
- # Compact logging for content_block_delta events
1402
- chunk_str = chunk.decode("utf-8", errors="replace")
1403
-
1404
- # Extract token metrics from streaming events
1405
- is_final = metrics_collector.process_chunk(chunk_str)
1406
-
1407
- # If this is the final chunk with complete metrics, update context and record metrics
1408
- if is_final:
1409
- model = ctx.metadata.get("model")
1410
- cost_usd = metrics_collector.calculate_final_cost(
1411
- model
1412
- )
1413
- final_metrics = metrics_collector.get_metrics()
1414
-
1415
- # Update context with final metrics
1416
- ctx.add_metadata(
1417
- status_code=response_status,
1418
- tokens_input=final_metrics["tokens_input"],
1419
- tokens_output=final_metrics["tokens_output"],
1420
- cache_read_tokens=final_metrics[
1421
- "cache_read_tokens"
1422
- ],
1423
- cache_write_tokens=final_metrics[
1424
- "cache_write_tokens"
1425
- ],
1426
- cost_usd=cost_usd,
1427
- )
1428
-
1429
- # Access logging is now handled by StreamingResponseWithLogging
1430
-
1431
- if (
1432
- "content_block_delta" in chunk_str
1433
- and not verbose_streaming
1434
- ):
1435
- content_block_delta_count += 1
1436
- # Only log every 10th content_block_delta or when we start/end
1437
- if content_block_delta_count == 1:
1438
- logger.debug("content_block_delta_start")
1439
- elif content_block_delta_count % 10 == 0:
1440
- logger.debug(
1441
- "content_block_delta_progress",
1442
- count=content_block_delta_count,
1443
- )
1444
- elif (
1445
- verbose_streaming
1446
- or "content_block_delta" not in chunk_str
1447
- ):
1448
- # Log non-content_block_delta events normally, or everything if verbose mode
1449
- logger.debug(
1450
- "chunk_yielded",
1451
- chunk_number=chunk_count,
1452
- chunk_size=len(chunk),
1453
- chunk_preview=chunk[:100].decode(
1454
- "utf-8", errors="replace"
1455
- ),
1456
- )
1457
-
1458
- yield chunk
1459
-
1460
- # Final summary for content_block_delta events
1461
- if content_block_delta_count > 0 and not verbose_streaming:
1462
- logger.debug(
1463
- "content_block_delta_completed",
1464
- total_count=content_block_delta_count,
1465
- )
1466
-
1467
- except Exception as e:
1468
- logger.exception("streaming_error", error=str(e), exc_info=True)
1469
- error_message = f'data: {{"error": "Streaming error: {str(e)}"}}\n\n'
1470
- yield error_message.encode("utf-8")
1471
-
1472
- # Always use upstream headers as base
1473
- final_headers = response_headers.copy()
1474
-
1475
- # Remove headers that can cause conflicts
1476
- final_headers.pop(
1477
- "date", None
1478
- ) # Remove upstream date header to avoid conflicts
1479
-
1480
- # Ensure critical headers for streaming
1481
- final_headers["Cache-Control"] = "no-cache"
1482
- final_headers["Connection"] = "keep-alive"
1483
-
1484
- # Set content-type if not already set by upstream
1485
- if "content-type" not in final_headers:
1486
- final_headers["content-type"] = "text/event-stream"
1487
-
1488
- return StreamingResponseWithLogging(
1489
- content=stream_generator(),
1490
- request_context=ctx,
1491
- metrics=self.metrics,
1492
- status_code=response_status,
1493
- headers=final_headers,
1494
- )
1495
-
1496
- async def _transform_anthropic_to_openai_stream(
1497
- self, response: httpx.Response, original_path: str
1498
- ) -> AsyncGenerator[bytes, None]:
1499
- """Transform Anthropic SSE stream to OpenAI SSE format using adapter.
1500
-
1501
- Args:
1502
- response: Streaming response from Anthropic
1503
- original_path: Original request path for context
1504
-
1505
- Yields:
1506
- Transformed OpenAI SSE format chunks
1507
- """
1508
-
1509
- # Parse SSE chunks from response into dict stream
1510
- async def sse_to_dict_stream() -> AsyncGenerator[dict[str, object], None]:
1511
- chunk_count = 0
1512
- async for line in response.aiter_lines():
1513
- if line.startswith("data: "):
1514
- data_str = line[6:].strip()
1515
- if data_str and data_str != "[DONE]":
1516
- try:
1517
- chunk_data = json.loads(data_str)
1518
- chunk_count += 1
1519
- logger.debug(
1520
- "proxy_anthropic_chunk_received",
1521
- chunk_count=chunk_count,
1522
- chunk_type=chunk_data.get("type"),
1523
- chunk=chunk_data,
1524
- )
1525
- yield chunk_data
1526
- except json.JSONDecodeError:
1527
- logger.warning("sse_parse_failed", data=data_str)
1528
- continue
1529
-
1530
- # Transform using OpenAI adapter and format back to SSE
1531
- async for openai_chunk in self.openai_adapter.adapt_stream(
1532
- sse_to_dict_stream()
1533
- ):
1534
- sse_line = f"data: {json.dumps(openai_chunk)}\n\n"
1535
- yield sse_line.encode("utf-8")
1536
-
1537
- def _extract_message_type_from_body(self, body: bytes | None) -> str:
1538
- """Extract message type from request body for realistic response generation."""
1539
- if not body:
1540
- return "short"
1541
-
1542
- try:
1543
- body_data = json.loads(body.decode("utf-8"))
1544
- # Check if tools are present - indicates tool use
1545
- if body_data.get("tools"):
1546
- return "tool_use"
1547
-
1548
- # Check message content length to determine type
1549
- messages = body_data.get("messages", [])
1550
- if messages:
1551
- content = str(messages[-1].get("content", ""))
1552
- if len(content) > 200:
1553
- return "long"
1554
- elif len(content) < 50:
1555
- return "short"
1556
- else:
1557
- return "medium"
1558
- except (json.JSONDecodeError, UnicodeDecodeError):
1559
- pass
1560
-
1561
- return "short"
1562
-
1563
- async def _generate_bypass_standard_response(
1564
- self,
1565
- model: str | None,
1566
- is_openai_format: bool,
1567
- ctx: "RequestContext",
1568
- message_type: str = "short",
1569
- ) -> tuple[int, dict[str, str], bytes]:
1570
- """Generate realistic mock standard response."""
1571
-
1572
- # Check if we should simulate an error
1573
- if self.mock_generator.should_simulate_error():
1574
- error_response, status_code = self.mock_generator.generate_error_response(
1575
- "openai" if is_openai_format else "anthropic"
1576
- )
1577
- response_body = json.dumps(error_response).encode()
1578
- return status_code, {"content-type": "application/json"}, response_body
1579
-
1580
- # Generate realistic content and token counts
1581
- content, input_tokens, output_tokens = (
1582
- self.mock_generator.generate_response_content(
1583
- message_type, model or "claude-3-5-sonnet-20241022"
1584
- )
1585
- )
1586
- cache_read_tokens, cache_write_tokens = (
1587
- self.mock_generator.generate_cache_tokens()
1588
- )
1589
-
1590
- # Simulate realistic latency
1591
- latency_ms = random.randint(*self.mock_generator.config.base_latency_ms)
1592
- await asyncio.sleep(latency_ms / 1000.0)
1593
-
1594
- # Always start with Anthropic format
1595
- request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
1596
- content_list: list[dict[str, Any]] = [{"type": "text", "text": content}]
1597
- anthropic_response = {
1598
- "id": request_id,
1599
- "type": "message",
1600
- "role": "assistant",
1601
- "content": content_list,
1602
- "model": model or "claude-3-5-sonnet-20241022",
1603
- "stop_reason": "end_turn",
1604
- "stop_sequence": None,
1605
- "usage": {
1606
- "input_tokens": input_tokens,
1607
- "output_tokens": output_tokens,
1608
- "cache_creation_input_tokens": cache_write_tokens,
1609
- "cache_read_input_tokens": cache_read_tokens,
1610
- },
1611
- }
1612
-
1613
- # Add tool use if appropriate
1614
- if message_type == "tool_use":
1615
- content_list.insert(
1616
- 0,
1617
- {
1618
- "type": "tool_use",
1619
- "id": f"toolu_{random.randint(10000, 99999)}",
1620
- "name": "calculator",
1621
- "input": {"expression": "23 * 45"},
1622
- },
1623
- )
1624
-
1625
- if is_openai_format:
1626
- # Transform to OpenAI format using existing adapter
1627
- openai_response = self.openai_adapter.adapt_response(anthropic_response)
1628
- response_body = json.dumps(openai_response).encode()
1629
- else:
1630
- response_body = json.dumps(anthropic_response).encode()
1631
-
1632
- headers = {
1633
- "content-type": "application/json",
1634
- "content-length": str(len(response_body)),
1635
- }
1636
-
1637
- # Update context with realistic metrics
1638
- cost_usd = self.mock_generator.calculate_realistic_cost(
1639
- input_tokens,
1640
- output_tokens,
1641
- model or "claude-3-5-sonnet-20241022",
1642
- cache_read_tokens,
1643
- cache_write_tokens,
1644
- )
1645
-
1646
- ctx.add_metadata(
1647
- status_code=200,
1648
- tokens_input=input_tokens,
1649
- tokens_output=output_tokens,
1650
- cache_read_tokens=cache_read_tokens,
1651
- cache_write_tokens=cache_write_tokens,
1652
- cost_usd=cost_usd,
1653
- )
1654
-
1655
- # Log comprehensive access log (includes Prometheus metrics)
1656
- await log_request_access(
1657
- context=ctx,
1658
- status_code=200,
1659
- method="POST",
1660
- metrics=self.metrics,
1661
- )
1662
-
1663
- return 200, headers, response_body
1664
-
1665
- async def _generate_bypass_streaming_response(
1666
- self,
1667
- model: str | None,
1668
- is_openai_format: bool,
1669
- ctx: "RequestContext",
1670
- message_type: str = "short",
1671
- ) -> StreamingResponse:
1672
- """Generate realistic mock streaming response."""
1673
-
1674
- # Generate content and tokens
1675
- content, input_tokens, output_tokens = (
1676
- self.mock_generator.generate_response_content(
1677
- message_type, model or "claude-3-5-sonnet-20241022"
1678
- )
1679
- )
1680
- cache_read_tokens, cache_write_tokens = (
1681
- self.mock_generator.generate_cache_tokens()
1682
- )
1683
-
1684
- async def realistic_mock_stream_generator() -> AsyncGenerator[bytes, None]:
1685
- request_id = f"msg_test_{ctx.request_id}_{random.randint(1000, 9999)}"
1686
-
1687
- if is_openai_format:
1688
- # Generate OpenAI-style streaming
1689
- chunks = await self._generate_realistic_openai_stream(
1690
- request_id,
1691
- model or "claude-3-5-sonnet-20241022",
1692
- content,
1693
- input_tokens,
1694
- output_tokens,
1695
- )
1696
- else:
1697
- # Generate Anthropic-style streaming
1698
- chunks = self.mock_generator.generate_realistic_anthropic_stream(
1699
- request_id,
1700
- model or "claude-3-5-sonnet-20241022",
1701
- content,
1702
- input_tokens,
1703
- output_tokens,
1704
- cache_read_tokens,
1705
- cache_write_tokens,
1706
- )
1707
-
1708
- # Simulate realistic token generation rate
1709
- tokens_per_second = self.mock_generator.config.token_generation_rate
1710
-
1711
- for i, chunk in enumerate(chunks):
1712
- # Realistic delay based on token generation rate
1713
- if i > 0: # Don't delay the first chunk
1714
- # Estimate tokens in this chunk and calculate delay
1715
- chunk_tokens = len(str(chunk)) // 4 # Rough estimate
1716
- delay_seconds = chunk_tokens / tokens_per_second
1717
- # Add some randomness
1718
- delay_seconds *= random.uniform(0.5, 1.5)
1719
- await asyncio.sleep(max(0.01, delay_seconds))
1720
-
1721
- yield f"data: {json.dumps(chunk)}\n\n".encode()
1722
-
1723
- yield b"data: [DONE]\n\n"
1724
-
1725
- headers = {
1726
- "content-type": "text/event-stream",
1727
- "cache-control": "no-cache",
1728
- "connection": "keep-alive",
1729
- }
1730
-
1731
- # Update context with realistic metrics
1732
- cost_usd = self.mock_generator.calculate_realistic_cost(
1733
- input_tokens,
1734
- output_tokens,
1735
- model or "claude-3-5-sonnet-20241022",
1736
- cache_read_tokens,
1737
- cache_write_tokens,
1738
- )
1739
-
1740
- ctx.add_metadata(
1741
- status_code=200,
1742
- tokens_input=input_tokens,
1743
- tokens_output=output_tokens,
1744
- cache_read_tokens=cache_read_tokens,
1745
- cache_write_tokens=cache_write_tokens,
1746
- cost_usd=cost_usd,
1747
- )
1748
-
1749
- return StreamingResponseWithLogging(
1750
- content=realistic_mock_stream_generator(),
1751
- request_context=ctx,
1752
- metrics=self.metrics,
1753
- headers=headers,
1754
- )
1755
-
1756
- async def _generate_realistic_openai_stream(
1757
- self,
1758
- request_id: str,
1759
- model: str,
1760
- content: str,
1761
- input_tokens: int,
1762
- output_tokens: int,
1763
- ) -> list[dict[str, Any]]:
1764
- """Generate realistic OpenAI streaming chunks by converting Anthropic format."""
1765
-
1766
- # Generate Anthropic chunks first
1767
- anthropic_chunks = self.mock_generator.generate_realistic_anthropic_stream(
1768
- request_id, model, content, input_tokens, output_tokens, 0, 0
1769
- )
1770
-
1771
- # Convert to OpenAI format using the adapter
1772
- openai_chunks = []
1773
- for chunk in anthropic_chunks:
1774
- # Use the OpenAI adapter to convert each chunk
1775
- # This is a simplified conversion - in practice, you'd need a full streaming adapter
1776
- if chunk.get("type") == "message_start":
1777
- openai_chunks.append(
1778
- {
1779
- "id": f"chatcmpl-{request_id}",
1780
- "object": "chat.completion.chunk",
1781
- "created": int(time.time()),
1782
- "model": model,
1783
- "choices": [
1784
- {
1785
- "index": 0,
1786
- "delta": {"role": "assistant", "content": ""},
1787
- "finish_reason": None,
1788
- }
1789
- ],
1790
- }
1791
- )
1792
- elif chunk.get("type") == "content_block_delta":
1793
- delta_text = chunk.get("delta", {}).get("text", "")
1794
- openai_chunks.append(
1795
- {
1796
- "id": f"chatcmpl-{request_id}",
1797
- "object": "chat.completion.chunk",
1798
- "created": int(time.time()),
1799
- "model": model,
1800
- "choices": [
1801
- {
1802
- "index": 0,
1803
- "delta": {"content": delta_text},
1804
- "finish_reason": None,
1805
- }
1806
- ],
1807
- }
1808
- )
1809
- elif chunk.get("type") == "message_stop":
1810
- openai_chunks.append(
1811
- {
1812
- "id": f"chatcmpl-{request_id}",
1813
- "object": "chat.completion.chunk",
1814
- "created": int(time.time()),
1815
- "model": model,
1816
- "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
1817
- }
1818
- )
1819
-
1820
- return openai_chunks
1821
-
1822
- async def close(self) -> None:
1823
- """Close any resources held by the proxy service."""
1824
- if self.proxy_client:
1825
- await self.proxy_client.close()
1826
- if self.credentials_manager:
1827
- await self.credentials_manager.__aexit__(None, None, None)