sf-veritas 0.11.10__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. sf_veritas/__init__.py +46 -0
  2. sf_veritas/_auto_preload.py +73 -0
  3. sf_veritas/_sfconfig.c +162 -0
  4. sf_veritas/_sfconfig.cpython-314-x86_64-linux-gnu.so +0 -0
  5. sf_veritas/_sfcrashhandler.c +267 -0
  6. sf_veritas/_sfcrashhandler.cpython-314-x86_64-linux-gnu.so +0 -0
  7. sf_veritas/_sffastlog.c +953 -0
  8. sf_veritas/_sffastlog.cpython-314-x86_64-linux-gnu.so +0 -0
  9. sf_veritas/_sffastnet.c +994 -0
  10. sf_veritas/_sffastnet.cpython-314-x86_64-linux-gnu.so +0 -0
  11. sf_veritas/_sffastnetworkrequest.c +727 -0
  12. sf_veritas/_sffastnetworkrequest.cpython-314-x86_64-linux-gnu.so +0 -0
  13. sf_veritas/_sffuncspan.c +2791 -0
  14. sf_veritas/_sffuncspan.cpython-314-x86_64-linux-gnu.so +0 -0
  15. sf_veritas/_sffuncspan_config.c +730 -0
  16. sf_veritas/_sffuncspan_config.cpython-314-x86_64-linux-gnu.so +0 -0
  17. sf_veritas/_sfheadercheck.c +341 -0
  18. sf_veritas/_sfheadercheck.cpython-314-x86_64-linux-gnu.so +0 -0
  19. sf_veritas/_sfnetworkhop.c +1454 -0
  20. sf_veritas/_sfnetworkhop.cpython-314-x86_64-linux-gnu.so +0 -0
  21. sf_veritas/_sfservice.c +1223 -0
  22. sf_veritas/_sfservice.cpython-314-x86_64-linux-gnu.so +0 -0
  23. sf_veritas/_sfteepreload.c +6227 -0
  24. sf_veritas/app_config.py +57 -0
  25. sf_veritas/cli.py +336 -0
  26. sf_veritas/constants.py +10 -0
  27. sf_veritas/custom_excepthook.py +304 -0
  28. sf_veritas/custom_log_handler.py +146 -0
  29. sf_veritas/custom_output_wrapper.py +153 -0
  30. sf_veritas/custom_print.py +153 -0
  31. sf_veritas/django_app.py +5 -0
  32. sf_veritas/env_vars.py +186 -0
  33. sf_veritas/exception_handling_middleware.py +18 -0
  34. sf_veritas/exception_metaclass.py +69 -0
  35. sf_veritas/fast_frame_info.py +116 -0
  36. sf_veritas/fast_network_hop.py +293 -0
  37. sf_veritas/frame_tools.py +112 -0
  38. sf_veritas/funcspan_config_loader.py +693 -0
  39. sf_veritas/function_span_profiler.py +1313 -0
  40. sf_veritas/get_preload_path.py +34 -0
  41. sf_veritas/import_hook.py +62 -0
  42. sf_veritas/infra_details/__init__.py +3 -0
  43. sf_veritas/infra_details/get_infra_details.py +24 -0
  44. sf_veritas/infra_details/kubernetes/__init__.py +3 -0
  45. sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
  46. sf_veritas/infra_details/kubernetes/get_details.py +7 -0
  47. sf_veritas/infra_details/running_on/__init__.py +17 -0
  48. sf_veritas/infra_details/running_on/kubernetes.py +11 -0
  49. sf_veritas/interceptors.py +543 -0
  50. sf_veritas/libsfnettee.so +0 -0
  51. sf_veritas/local_env_detect.py +118 -0
  52. sf_veritas/package_metadata.py +6 -0
  53. sf_veritas/patches/__init__.py +0 -0
  54. sf_veritas/patches/_patch_tracker.py +74 -0
  55. sf_veritas/patches/concurrent_futures.py +19 -0
  56. sf_veritas/patches/constants.py +1 -0
  57. sf_veritas/patches/exceptions.py +82 -0
  58. sf_veritas/patches/multiprocessing.py +32 -0
  59. sf_veritas/patches/network_libraries/__init__.py +99 -0
  60. sf_veritas/patches/network_libraries/aiohttp.py +294 -0
  61. sf_veritas/patches/network_libraries/curl_cffi.py +363 -0
  62. sf_veritas/patches/network_libraries/http_client.py +670 -0
  63. sf_veritas/patches/network_libraries/httpcore.py +580 -0
  64. sf_veritas/patches/network_libraries/httplib2.py +315 -0
  65. sf_veritas/patches/network_libraries/httpx.py +557 -0
  66. sf_veritas/patches/network_libraries/niquests.py +218 -0
  67. sf_veritas/patches/network_libraries/pycurl.py +399 -0
  68. sf_veritas/patches/network_libraries/requests.py +595 -0
  69. sf_veritas/patches/network_libraries/ssl_socket.py +822 -0
  70. sf_veritas/patches/network_libraries/tornado.py +360 -0
  71. sf_veritas/patches/network_libraries/treq.py +270 -0
  72. sf_veritas/patches/network_libraries/urllib_request.py +483 -0
  73. sf_veritas/patches/network_libraries/utils.py +598 -0
  74. sf_veritas/patches/os.py +17 -0
  75. sf_veritas/patches/threading.py +231 -0
  76. sf_veritas/patches/web_frameworks/__init__.py +54 -0
  77. sf_veritas/patches/web_frameworks/aiohttp.py +798 -0
  78. sf_veritas/patches/web_frameworks/async_websocket_consumer.py +337 -0
  79. sf_veritas/patches/web_frameworks/blacksheep.py +532 -0
  80. sf_veritas/patches/web_frameworks/bottle.py +513 -0
  81. sf_veritas/patches/web_frameworks/cherrypy.py +683 -0
  82. sf_veritas/patches/web_frameworks/cors_utils.py +122 -0
  83. sf_veritas/patches/web_frameworks/django.py +963 -0
  84. sf_veritas/patches/web_frameworks/eve.py +401 -0
  85. sf_veritas/patches/web_frameworks/falcon.py +931 -0
  86. sf_veritas/patches/web_frameworks/fastapi.py +738 -0
  87. sf_veritas/patches/web_frameworks/flask.py +526 -0
  88. sf_veritas/patches/web_frameworks/klein.py +501 -0
  89. sf_veritas/patches/web_frameworks/litestar.py +616 -0
  90. sf_veritas/patches/web_frameworks/pyramid.py +440 -0
  91. sf_veritas/patches/web_frameworks/quart.py +841 -0
  92. sf_veritas/patches/web_frameworks/robyn.py +708 -0
  93. sf_veritas/patches/web_frameworks/sanic.py +874 -0
  94. sf_veritas/patches/web_frameworks/starlette.py +742 -0
  95. sf_veritas/patches/web_frameworks/strawberry.py +1446 -0
  96. sf_veritas/patches/web_frameworks/tornado.py +485 -0
  97. sf_veritas/patches/web_frameworks/utils.py +170 -0
  98. sf_veritas/print_override.py +13 -0
  99. sf_veritas/regular_data_transmitter.py +444 -0
  100. sf_veritas/request_interceptor.py +401 -0
  101. sf_veritas/request_utils.py +550 -0
  102. sf_veritas/segfault_handler.py +116 -0
  103. sf_veritas/server_status.py +1 -0
  104. sf_veritas/shutdown_flag.py +11 -0
  105. sf_veritas/subprocess_startup.py +3 -0
  106. sf_veritas/test_cli.py +145 -0
  107. sf_veritas/thread_local.py +1319 -0
  108. sf_veritas/timeutil.py +114 -0
  109. sf_veritas/transmit_exception_to_sailfish.py +28 -0
  110. sf_veritas/transmitter.py +132 -0
  111. sf_veritas/types.py +47 -0
  112. sf_veritas/unified_interceptor.py +1678 -0
  113. sf_veritas/utils.py +39 -0
  114. sf_veritas-0.11.10.dist-info/METADATA +97 -0
  115. sf_veritas-0.11.10.dist-info/RECORD +141 -0
  116. sf_veritas-0.11.10.dist-info/WHEEL +5 -0
  117. sf_veritas-0.11.10.dist-info/entry_points.txt +2 -0
  118. sf_veritas-0.11.10.dist-info/top_level.txt +1 -0
  119. sf_veritas.libs/libbrotlicommon-6ce2a53c.so.1.0.6 +0 -0
  120. sf_veritas.libs/libbrotlidec-811d1be3.so.1.0.6 +0 -0
  121. sf_veritas.libs/libcom_err-730ca923.so.2.1 +0 -0
  122. sf_veritas.libs/libcrypt-52aca757.so.1.1.0 +0 -0
  123. sf_veritas.libs/libcrypto-bdaed0ea.so.1.1.1k +0 -0
  124. sf_veritas.libs/libcurl-eaa3cf66.so.4.5.0 +0 -0
  125. sf_veritas.libs/libgssapi_krb5-323bbd21.so.2.2 +0 -0
  126. sf_veritas.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
  127. sf_veritas.libs/libk5crypto-9a74ff38.so.3.1 +0 -0
  128. sf_veritas.libs/libkeyutils-2777d33d.so.1.6 +0 -0
  129. sf_veritas.libs/libkrb5-a55300e8.so.3.3 +0 -0
  130. sf_veritas.libs/libkrb5support-e6594cfc.so.0.1 +0 -0
  131. sf_veritas.libs/liblber-2-d20824ef.4.so.2.10.9 +0 -0
  132. sf_veritas.libs/libldap-2-cea2a960.4.so.2.10.9 +0 -0
  133. sf_veritas.libs/libnghttp2-39367a22.so.14.17.0 +0 -0
  134. sf_veritas.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
  135. sf_veritas.libs/libpsl-99becdd3.so.5.3.1 +0 -0
  136. sf_veritas.libs/libsasl2-7de4d792.so.3.0.0 +0 -0
  137. sf_veritas.libs/libselinux-d0805dcb.so.1 +0 -0
  138. sf_veritas.libs/libssh-c11d285b.so.4.8.7 +0 -0
  139. sf_veritas.libs/libssl-60250281.so.1.1.1k +0 -0
  140. sf_veritas.libs/libunistring-05abdd40.so.2.1.0 +0 -0
  141. sf_veritas.libs/libuuid-95b83d40.so.1.3.0 +0 -0
@@ -0,0 +1,218 @@
1
+ """
2
+ Monkey-patch Niquests using EVENT HOOKS for clean, maintainable instrumentation.
3
+
4
+ Uses niquests' built-in event hook system:
5
+ - pre_send: Inject headers before request transmission
6
+ - response: Consume body and track results after response
7
+
8
+ This approach is cleaner than wrapping every method and automatically
9
+ handles all sync request types (streaming/non-streaming).
10
+
11
+ NOTE: Async support is disabled for niquests.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import time
18
+ from typing import List, Optional
19
+
20
+ from ...thread_local import trace_id_ctx
21
+ from ..constants import supported_network_verbs as verbs
22
+ from .utils import (
23
+ init_fast_header_check,
24
+ inject_headers_ultrafast,
25
+ is_ssl_socket_active,
26
+ record_network_request,
27
+ track_request_result,
28
+ )
29
+
30
+ # JSON serialization - try fast orjson first, fallback to stdlib json
31
+ try:
32
+ import orjson
33
+
34
+ HAS_ORJSON = True
35
+ except ImportError:
36
+ import json
37
+
38
+ HAS_ORJSON = False
39
+
40
+
41
+ def _tee_preload_active() -> bool:
42
+ """Detect if LD_PRELOAD tee is active."""
43
+ if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
44
+ return True
45
+ ld = os.getenv("LD_PRELOAD", "")
46
+ return "libsfnettee.so" in ld or "_sfteepreload" in ld
47
+
48
+
49
+ def patch_niquests(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
50
+ """
51
+ Patch niquests using event hooks for clean instrumentation.
52
+
53
+ Registers hooks on Session (sync only) to:
54
+ 1. Inject tracing headers (pre_send hook)
55
+ 2. Consume response body and track results (response hook)
56
+
57
+ When LD_PRELOAD is active: ULTRA-FAST path with <10ns overhead (header injection only).
58
+ When LD_PRELOAD is NOT active: Full capture path with body/header recording.
59
+
60
+ NOTE: AsyncSession is not patched - async support is disabled.
61
+ """
62
+ try:
63
+ import niquests # type: ignore
64
+ except ImportError:
65
+ return
66
+
67
+ skip = domains_to_not_propagate_headers_to or []
68
+ preload_active = _tee_preload_active()
69
+
70
+ # Initialize C extension for ultra-fast header checking (if available)
71
+ if preload_active:
72
+ init_fast_header_check(skip)
73
+
74
+ # ========================================================================
75
+ # SYNC HOOKS (for Session)
76
+ # ========================================================================
77
+
78
+ def pre_send_hook(req, **kwargs):
79
+ """
80
+ Inject tracing headers before request transmission (sync).
81
+
82
+ Called by niquests after connection selection, before sending.
83
+ Modifies req.headers in-place.
84
+ """
85
+ url = str(req.url)
86
+ headers = req.headers # MutableMapping - modify in-place
87
+ inject_headers_ultrafast(headers, url, skip)
88
+ return req
89
+
90
+ if preload_active:
91
+ # LD_PRELOAD mode: Only inject headers, C extension handles capture
92
+ def response_hook(resp, **kwargs):
93
+ """Track request success/failure (LD_PRELOAD mode - sync)."""
94
+ url = str(resp.url)
95
+ try:
96
+ # CRITICAL: Consume body to release connection
97
+ # For sync Session, .content is a property (not coroutine)
98
+ _ = resp.content
99
+ track_request_result(success=True, url=url)
100
+ except Exception as e:
101
+ track_request_result(success=False, error=e, url=url)
102
+ return resp
103
+
104
+ else:
105
+ # Python-only mode: Full capture (body + headers)
106
+ def response_hook(resp, **kwargs):
107
+ """Capture and record request (Python-only mode - sync)."""
108
+ url = str(resp.url)
109
+
110
+ # Skip capture for HTTPS when ssl_socket.py is active (avoids double-capture)
111
+ is_https = url.startswith("https://")
112
+ if is_https and is_ssl_socket_active():
113
+ return resp
114
+
115
+ method = resp.request.method
116
+ t0 = int(time.time() * 1_000)
117
+
118
+ trace_id = trace_id_ctx.get(None) or ""
119
+
120
+ status = getattr(resp, "status_code", 0)
121
+ success = False
122
+ err = None
123
+ req_data = b""
124
+ resp_data = b""
125
+ req_headers = b""
126
+ resp_headers = b""
127
+
128
+ try:
129
+ # Capture response data
130
+ resp_data = resp.content # Also consumes body
131
+ success = True
132
+
133
+ # Capture headers
134
+ if HAS_ORJSON:
135
+ req_headers = orjson.dumps({str(k): str(v) for k, v in resp.request.headers.items()})
136
+ resp_headers = orjson.dumps({str(k): str(v) for k, v in resp.headers.items()})
137
+ else:
138
+ req_headers = json.dumps({str(k): str(v) for k, v in resp.request.headers.items()}).encode("utf-8")
139
+ resp_headers = json.dumps({str(k): str(v) for k, v in resp.headers.items()}).encode("utf-8")
140
+
141
+ # Capture request body if available
142
+ if hasattr(resp.request, "body") and resp.request.body:
143
+ body = resp.request.body
144
+ if isinstance(body, bytes):
145
+ req_data = body
146
+ elif isinstance(body, str):
147
+ req_data = body.encode("utf-8")
148
+
149
+ track_request_result(success=True, url=url)
150
+ except Exception as exc:
151
+ err = str(exc)[:255]
152
+ track_request_result(success=False, error=exc, url=url)
153
+ finally:
154
+ record_network_request(
155
+ trace_id,
156
+ url,
157
+ method,
158
+ status,
159
+ success,
160
+ err,
161
+ timestamp_start=t0,
162
+ timestamp_end=int(time.time() * 1_000),
163
+ request_data=req_data,
164
+ response_data=resp_data,
165
+ request_headers=req_headers,
166
+ response_headers=resp_headers,
167
+ )
168
+
169
+ return resp
170
+
171
+ # ========================================================================
172
+ # PATCH Session.__init__ to register hooks
173
+ # ========================================================================
174
+
175
+ SessionCls = niquests.Session
176
+ _original_session_init = SessionCls.__init__
177
+
178
+ def patched_session_init(self, *args, **kwargs):
179
+ _original_session_init(self, *args, **kwargs)
180
+ # Register hooks on the session instance
181
+ self.hooks["pre_send"].append(pre_send_hook)
182
+ self.hooks["response"].append(response_hook)
183
+
184
+ SessionCls.__init__ = patched_session_init
185
+
186
+ # ========================================================================
187
+ # PATCH module-level functions to pass hooks
188
+ # ========================================================================
189
+
190
+ # Module-level sync functions (niquests.get, niquests.post, etc.)
191
+ # These create temporary sessions internally, so we wrap them to pass hooks
192
+
193
+ _sync_hooks = {
194
+ "pre_send": [pre_send_hook],
195
+ "response": [response_hook],
196
+ }
197
+
198
+ def _wrap_module_sync(original_fn):
199
+ """Wrap module-level sync functions to pass hooks."""
200
+
201
+ def wrapper(*args, **kwargs):
202
+ # Merge our hooks with any user-provided hooks
203
+ user_hooks = kwargs.get("hooks") or {}
204
+ merged_hooks = {}
205
+ for hook_type in ["pre_send", "response"]:
206
+ merged_hooks[hook_type] = _sync_hooks.get(
207
+ hook_type, []
208
+ ) + user_hooks.get(hook_type, [])
209
+ kwargs["hooks"] = merged_hooks
210
+ return original_fn(*args, **kwargs)
211
+
212
+ return wrapper
213
+
214
+ # Patch module-level sync functions
215
+ niquests.request = _wrap_module_sync(niquests.request)
216
+ for verb in verbs:
217
+ if hasattr(niquests, verb):
218
+ setattr(niquests, verb, _wrap_module_sync(getattr(niquests, verb)))
@@ -0,0 +1,399 @@
1
+ import os
2
+ import threading
3
+ import time
4
+ from typing import List, Optional, Sequence, Tuple, Union
5
+
6
+ from ...thread_local import outbound_header_base_ctx, trace_id_ctx
7
+ from .utils import (
8
+ init_fast_header_check,
9
+ inject_headers_ultrafast,
10
+ is_ssl_socket_active,
11
+ record_network_request,
12
+ )
13
+ from .._patch_tracker import is_already_patched, mark_as_patched
14
+
15
+ # JSON serialization - try fast orjson first, fallback to stdlib json
16
+ try:
17
+ import orjson
18
+ HAS_ORJSON = True
19
+ except ImportError:
20
+ import json
21
+ HAS_ORJSON = False
22
+
23
+
24
+ def _tee_preload_active() -> bool:
25
+ """Detect if LD_PRELOAD tee is active (same logic as http_client.py)."""
26
+ if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
27
+ return True
28
+ ld = os.getenv("LD_PRELOAD", "")
29
+ return "libsfnettee.so" in ld or "_sfteepreload" in ld
30
+
31
+
32
+ # ----------------------- Type/encoding helpers (FIX) -----------------------
33
+ def _normalize_url_to_str(val: Union[str, bytes, None]) -> Optional[str]:
34
+ """Return URL as str for internal logic (trace/allow), safe-decoding bytes."""
35
+ if val is None:
36
+ return None
37
+ if isinstance(val, bytes):
38
+ try:
39
+ return val.decode("utf-8", "replace")
40
+ except Exception:
41
+ return val.decode("latin1", "replace")
42
+ return val # already str
43
+
44
+
45
+ def _ensure_bytes(s: str) -> bytes:
46
+ """Encode a str header safely to bytes (UTF-8)."""
47
+ return s.encode("utf-8")
48
+
49
+
50
+ def _patch_pycurl_minimal_header_injection(pycurl_module):
51
+ """
52
+ MINIMAL header injection for pycurl in LD_PRELOAD mode.
53
+
54
+ CRITICAL: We must patch pycurl even in LD_PRELOAD mode because pycurl
55
+ builds the HTTP request in C before calling send(). Headers must be
56
+ injected BEFORE serialization, not after.
57
+
58
+ Uses a lightweight wrapper that only intercepts setopt/perform.
59
+ All other methods delegate directly to wrapped C object.
60
+ Target overhead: <20μs.
61
+ """
62
+ _OrigCurl = pycurl_module.Curl
63
+ HTTPHEADER = pycurl_module.HTTPHEADER
64
+
65
+ class CurlWrapper:
66
+ """Lightweight wrapper - only intercepts setopt(HTTPHEADER) and perform()."""
67
+ __slots__ = ('_curl', '_user_headers')
68
+
69
+ def __init__(self, *args, **kwargs):
70
+ # Create real Curl instance
71
+ self._curl = _OrigCurl(*args, **kwargs)
72
+ self._user_headers = None
73
+
74
+ def setopt(self, opt, val):
75
+ """Intercept HTTPHEADER, pass everything else through."""
76
+ if opt == HTTPHEADER:
77
+ # Store but don't call setopt yet (avoid double call)
78
+ self._user_headers = val
79
+ return
80
+ # Direct delegation to C method
81
+ return self._curl.setopt(opt, val)
82
+
83
+ def perform(self):
84
+ """Inject headers before perform."""
85
+ base_dict = outbound_header_base_ctx.get()
86
+
87
+ # Fast path: no sf-veritas headers
88
+ if not base_dict:
89
+ if self._user_headers is not None:
90
+ self._curl.setopt(HTTPHEADER, self._user_headers)
91
+ return self._curl.perform()
92
+
93
+ # Fast path: no user headers
94
+ if self._user_headers is None:
95
+ pycurl_headers = base_dict.get("_pycurl_headers")
96
+ if pycurl_headers is None:
97
+ cached_headers = base_dict.get("_cached_headers")
98
+ if not cached_headers:
99
+ return self._curl.perform()
100
+ pycurl_headers = [f"{k}: {v}" for k, v in cached_headers.items()]
101
+ base_dict["_pycurl_headers"] = pycurl_headers
102
+
103
+ self._curl.setopt(HTTPHEADER, pycurl_headers)
104
+ return self._curl.perform()
105
+
106
+ # Merge path
107
+ cached_headers = base_dict.get("_cached_headers")
108
+ if not cached_headers:
109
+ self._curl.setopt(HTTPHEADER, self._user_headers)
110
+ return self._curl.perform()
111
+
112
+ if isinstance(self._user_headers[0], bytes):
113
+ pycurl_headers_bytes = base_dict.get("_pycurl_headers_bytes")
114
+ if pycurl_headers_bytes is None:
115
+ pycurl_headers_bytes = [f"{k}: {v}".encode('utf-8') for k, v in cached_headers.items()]
116
+ base_dict["_pycurl_headers_bytes"] = pycurl_headers_bytes
117
+ merged = self._user_headers + pycurl_headers_bytes
118
+ else:
119
+ pycurl_headers = base_dict.get("_pycurl_headers")
120
+ if pycurl_headers is None:
121
+ pycurl_headers = [f"{k}: {v}" for k, v in cached_headers.items()]
122
+ base_dict["_pycurl_headers"] = pycurl_headers
123
+ merged = self._user_headers + pycurl_headers
124
+
125
+ self._curl.setopt(HTTPHEADER, merged)
126
+ return self._curl.perform()
127
+
128
+ def __getattr__(self, name):
129
+ """Delegate all other methods directly to wrapped Curl object."""
130
+ return getattr(self._curl, name)
131
+
132
+ # Replace Curl with wrapper factory
133
+ pycurl_module.Curl = CurlWrapper
134
+
135
+
136
+ def _normalize_headers(
137
+ base: Sequence[Union[str, bytes]],
138
+ injected: Sequence[str],
139
+ ) -> Tuple[Sequence[Union[str, bytes]], bool]:
140
+ """
141
+ Ensure we return a list where *all* items are the same type.
142
+ - If user provided bytes headers, return bytes for *everything* (including injected).
143
+ - If user provided str headers, return str for everything.
144
+ - If no base headers, default to str.
145
+ Returns: (normalized_headers, are_bytes)
146
+ """
147
+ are_bytes = False
148
+ for h in base:
149
+ if isinstance(h, bytes):
150
+ are_bytes = True
151
+ break
152
+
153
+ if are_bytes:
154
+ merged: List[bytes] = []
155
+ # Copy original as bytes (already bytes)
156
+ merged.extend([h if isinstance(h, bytes) else _ensure_bytes(h) for h in base])
157
+ # Append injected as bytes
158
+ merged.extend(_ensure_bytes(h) for h in injected)
159
+ return merged, True
160
+ else:
161
+ merged2: List[str] = []
162
+ # Copy original as str (decode if user passed bytes)
163
+ for h in base:
164
+ if isinstance(h, bytes):
165
+ merged2.append(h.decode("utf-8", "replace"))
166
+ else:
167
+ merged2.append(h)
168
+ # Append injected as str
169
+ merged2.extend(injected)
170
+ return merged2, False
171
+
172
+
173
+ def patch_pycurl(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
174
+ # Idempotency guard: prevent double-patching (handles forks, reloading)
175
+ if is_already_patched("pycurl"):
176
+ return
177
+ mark_as_patched("pycurl")
178
+
179
+ try:
180
+ import pycurl
181
+ except ImportError:
182
+ return
183
+
184
+ skip = domains_to_not_propagate_headers_to or []
185
+ preload_active = _tee_preload_active()
186
+
187
+ # Initialize C extension for ultra-fast header checking (if available)
188
+ if preload_active:
189
+ init_fast_header_check(skip)
190
+
191
+ # CRITICAL: Even in LD_PRELOAD mode, we MUST patch pycurl!
192
+ # Unlike curl-cffi (which goes through Python socket calls), pycurl builds
193
+ # the entire HTTP request in C before calling send(). By then, headers are
194
+ # already serialized in the buffer - too late to inject!
195
+ #
196
+ # Solution: Minimal patch that ONLY injects headers before pycurl serializes.
197
+ # No capture, no tracking - just get headers from ContextVar and inject.
198
+ if preload_active:
199
+ return _patch_pycurl_minimal_header_injection(pycurl)
200
+
201
+ _OrigCurl = pycurl.Curl
202
+
203
+ class WrappedCurl(_OrigCurl): # ➊ subclass libcurl handle
204
+ def __init__(self, *args, **kwargs):
205
+ super().__init__(*args, **kwargs)
206
+ self._sf_url: Union[str, bytes, None] = None
207
+ self._sf_method: Union[str, bytes, None] = None
208
+ self._sf_headers: List[Union[str, bytes]] = []
209
+ self._sf_request_body: bytes = b""
210
+ self._sf_response_buffer: List[bytes] = []
211
+ self._sf_original_writefunction = None
212
+ self._sf_injected_headers: List[Union[str, bytes]] = (
213
+ []
214
+ ) # Track what we injected (normalized)
215
+ self._sf_in_header_injection: bool = False # Flag to prevent recursion
216
+
217
+ # --- intercept option setting -------------------------------------------------
218
+ def setopt(self, opt, val):
219
+ if opt == pycurl.URL:
220
+ self._sf_url = val # may be bytes or str
221
+ # Inject headers immediately after URL is set (needed for CurlMulti)
222
+ self._inject_and_set_headers()
223
+ elif opt == pycurl.CUSTOMREQUEST:
224
+ self._sf_method = val # may be bytes or str, normalize later
225
+ elif opt == pycurl.HTTPHEADER:
226
+ # User is setting headers - inject ours immediately
227
+ # BUT if we're already inside _inject_and_set_headers, just pass through
228
+ if self._sf_in_header_injection:
229
+ return super().setopt(opt, val)
230
+
231
+ # Note: val should be a sequence of str or bytes (uniform)
232
+ self._sf_headers = list(val)
233
+ self._inject_and_set_headers()
234
+ return # Don't call super() - we already set them
235
+ elif opt == pycurl.POSTFIELDS:
236
+ # Capture request body for POST/PUT
237
+ if isinstance(val, bytes):
238
+ self._sf_request_body = val
239
+ elif isinstance(val, str):
240
+ self._sf_request_body = val.encode("utf-8")
241
+ elif opt == pycurl.WRITEFUNCTION:
242
+ # Store user's write function to call it later
243
+ self._sf_original_writefunction = val
244
+ return super().setopt(opt, val)
245
+
246
+ def _inject_and_set_headers(self):
247
+ """Build and set headers with our injected trace headers."""
248
+ # If URL not set yet - just set user headers as-is (but keep uniform types)
249
+ if self._sf_url is None:
250
+ if self._sf_headers:
251
+ # still enforce uniformity to avoid mixed-type lists
252
+ normalized, _ = _normalize_headers(self._sf_headers, [])
253
+ self._sf_in_header_injection = True
254
+ try:
255
+ super().setopt(pycurl.HTTPHEADER, normalized)
256
+ finally:
257
+ self._sf_in_header_injection = False
258
+ return
259
+
260
+ url_str = _normalize_url_to_str(self._sf_url)
261
+
262
+ # Use inject_headers_ultrafast to get headers as dict, then convert to pycurl format
263
+ headers_dict = {}
264
+ inject_headers_ultrafast(headers_dict, url_str or "", skip)
265
+
266
+ # Convert dict headers to pycurl string format ["Name: Value"]
267
+ injected: List[str] = []
268
+ for key, value in headers_dict.items():
269
+ injected.append(f"{key}: {value}")
270
+
271
+ # --- FIX: enforce uniform header element type (all-str or all-bytes) ---
272
+ merged, _ = _normalize_headers(self._sf_headers, injected)
273
+ self._sf_injected_headers = list(merged) # store exactly what we set
274
+
275
+ # Apply merged headers using super().setopt with recursion guard
276
+ self._sf_in_header_injection = True
277
+ try:
278
+ super().setopt(pycurl.HTTPHEADER, merged)
279
+ finally:
280
+ self._sf_in_header_injection = False
281
+
282
+ # --- wrapped perform() --------------------------------------------------------
283
+ def perform(self):
284
+ # ULTRA-FAST PATH: LD_PRELOAD mode - headers already injected, skip all capture
285
+ if preload_active:
286
+ # Headers were already injected in _inject_and_set_headers()
287
+ # C extension handles all capture - just perform!
288
+ return super().perform()
289
+
290
+ # SLOW PATH: Python-only mode - need full capture and recording
291
+ url_for_trace = _normalize_url_to_str(self._sf_url) or ""
292
+
293
+ # Skip capture for HTTPS when ssl_socket.py is active (avoids double-capture)
294
+ is_https = url_for_trace.startswith("https://")
295
+ if is_https and is_ssl_socket_active():
296
+ # ssl_socket.py will handle capture, just make the request
297
+ return super().perform()
298
+ # Normalize method for tracing/logging; don't mutate what's set in handle
299
+ method_s: str
300
+ if isinstance(self._sf_method, bytes):
301
+ try:
302
+ method_s = self._sf_method.decode("utf-8", "replace").upper()
303
+ except Exception:
304
+ method_s = "GET"
305
+ elif isinstance(self._sf_method, str):
306
+ method_s = self._sf_method.upper()
307
+ else:
308
+ method_s = "GET"
309
+
310
+ # Use inject_headers_ultrafast to get headers as dict, then convert to pycurl format
311
+ headers_dict = {}
312
+ inject_headers_ultrafast(headers_dict, url_for_trace, skip)
313
+
314
+ # Convert dict headers to pycurl string format ["Name: Value"]
315
+ injected_now: List[str] = []
316
+ for key, value in headers_dict.items():
317
+ injected_now.append(f"{key}: {value}")
318
+
319
+ # Get trace_id for capture
320
+ trace_id = trace_id_ctx.get(None) or ""
321
+
322
+ # --- FIX: ensure uniform header types again at perform-time --------------
323
+ merged, _ = _normalize_headers(self._sf_headers, injected_now)
324
+
325
+ # Capture request headers for recording (JSON bytes)
326
+ req_headers = b""
327
+ try:
328
+ # If merged is bytes, decode elements for JSON; else use as-is
329
+ hdrs_for_json = [
330
+ (
331
+ h.decode("utf-8", "replace")
332
+ if isinstance(h, (bytes, bytearray))
333
+ else h
334
+ )
335
+ for h in merged
336
+ ]
337
+ if HAS_ORJSON:
338
+ req_headers = orjson.dumps(hdrs_for_json)
339
+ else:
340
+ req_headers = json.dumps(hdrs_for_json).encode("utf-8")
341
+ except Exception: # noqa: BLE001
342
+ pass
343
+
344
+ # Let libcurl negotiate & decode encodings for us
345
+ super().setopt(pycurl.ACCEPT_ENCODING, "")
346
+
347
+ # Push merged headers down using recursion guard
348
+ self._sf_in_header_injection = True
349
+ try:
350
+ super().setopt(pycurl.HTTPHEADER, merged)
351
+ finally:
352
+ self._sf_in_header_injection = False
353
+
354
+ # Set up response capture - ALWAYS chain with user's writefunction if exists
355
+ def capture_and_forward(data):
356
+ """Capture response data and forward to user's writefunction if set."""
357
+ self._sf_response_buffer.append(data)
358
+ if self._sf_original_writefunction is not None:
359
+ # Call user's callback and return its result
360
+ return self._sf_original_writefunction(data)
361
+ return len(data)
362
+
363
+ super().setopt(pycurl.WRITEFUNCTION, capture_and_forward)
364
+
365
+ # timing / status / error capture
366
+ ts0 = int(time.time() * 1_000)
367
+ status = 0
368
+ err: Optional[str] = None
369
+ resp_data = b""
370
+ try:
371
+ rv = super().perform()
372
+ status = int(self.getinfo(pycurl.RESPONSE_CODE) or 0)
373
+
374
+ # Collect response data - we always capture now
375
+ resp_data = b"".join(self._sf_response_buffer)
376
+
377
+ return rv
378
+ except Exception as e:
379
+ err = str(e)[:255]
380
+ raise
381
+ finally:
382
+ ts1 = int(time.time() * 1_000)
383
+ # Only capture if LD_PRELOAD is NOT active (avoid duplicates)
384
+ if not preload_active:
385
+ record_network_request(
386
+ trace_id,
387
+ url_for_trace,
388
+ method_s,
389
+ status,
390
+ err is None,
391
+ err,
392
+ ts0,
393
+ ts1,
394
+ request_data=self._sf_request_body,
395
+ response_data=resp_data,
396
+ request_headers=req_headers,
397
+ )
398
+
399
+ pycurl.Curl = WrappedCurl