sf-veritas 0.10.3__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sf-veritas might be problematic. Click here for more details.

Files changed (132) hide show
  1. sf_veritas/__init__.py +20 -0
  2. sf_veritas/_sffastlog.c +889 -0
  3. sf_veritas/_sffastlog.cpython-314-x86_64-linux-gnu.so +0 -0
  4. sf_veritas/_sffastnet.c +924 -0
  5. sf_veritas/_sffastnet.cpython-314-x86_64-linux-gnu.so +0 -0
  6. sf_veritas/_sffastnetworkrequest.c +730 -0
  7. sf_veritas/_sffastnetworkrequest.cpython-314-x86_64-linux-gnu.so +0 -0
  8. sf_veritas/_sffuncspan.c +2155 -0
  9. sf_veritas/_sffuncspan.cpython-314-x86_64-linux-gnu.so +0 -0
  10. sf_veritas/_sffuncspan_config.c +617 -0
  11. sf_veritas/_sffuncspan_config.cpython-314-x86_64-linux-gnu.so +0 -0
  12. sf_veritas/_sfheadercheck.c +341 -0
  13. sf_veritas/_sfheadercheck.cpython-314-x86_64-linux-gnu.so +0 -0
  14. sf_veritas/_sfnetworkhop.c +1451 -0
  15. sf_veritas/_sfnetworkhop.cpython-314-x86_64-linux-gnu.so +0 -0
  16. sf_veritas/_sfservice.c +1175 -0
  17. sf_veritas/_sfservice.cpython-314-x86_64-linux-gnu.so +0 -0
  18. sf_veritas/_sfteepreload.c +5167 -0
  19. sf_veritas/app_config.py +49 -0
  20. sf_veritas/cli.py +336 -0
  21. sf_veritas/constants.py +10 -0
  22. sf_veritas/custom_excepthook.py +304 -0
  23. sf_veritas/custom_log_handler.py +129 -0
  24. sf_veritas/custom_output_wrapper.py +144 -0
  25. sf_veritas/custom_print.py +146 -0
  26. sf_veritas/django_app.py +5 -0
  27. sf_veritas/env_vars.py +186 -0
  28. sf_veritas/exception_handling_middleware.py +18 -0
  29. sf_veritas/exception_metaclass.py +69 -0
  30. sf_veritas/fast_frame_info.py +116 -0
  31. sf_veritas/fast_network_hop.py +293 -0
  32. sf_veritas/frame_tools.py +112 -0
  33. sf_veritas/funcspan_config_loader.py +556 -0
  34. sf_veritas/function_span_profiler.py +1174 -0
  35. sf_veritas/import_hook.py +62 -0
  36. sf_veritas/infra_details/__init__.py +3 -0
  37. sf_veritas/infra_details/get_infra_details.py +24 -0
  38. sf_veritas/infra_details/kubernetes/__init__.py +3 -0
  39. sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
  40. sf_veritas/infra_details/kubernetes/get_details.py +7 -0
  41. sf_veritas/infra_details/running_on/__init__.py +17 -0
  42. sf_veritas/infra_details/running_on/kubernetes.py +11 -0
  43. sf_veritas/interceptors.py +497 -0
  44. sf_veritas/libsfnettee.so +0 -0
  45. sf_veritas/local_env_detect.py +118 -0
  46. sf_veritas/package_metadata.py +6 -0
  47. sf_veritas/patches/__init__.py +0 -0
  48. sf_veritas/patches/concurrent_futures.py +19 -0
  49. sf_veritas/patches/constants.py +1 -0
  50. sf_veritas/patches/exceptions.py +82 -0
  51. sf_veritas/patches/multiprocessing.py +32 -0
  52. sf_veritas/patches/network_libraries/__init__.py +76 -0
  53. sf_veritas/patches/network_libraries/aiohttp.py +281 -0
  54. sf_veritas/patches/network_libraries/curl_cffi.py +363 -0
  55. sf_veritas/patches/network_libraries/http_client.py +419 -0
  56. sf_veritas/patches/network_libraries/httpcore.py +515 -0
  57. sf_veritas/patches/network_libraries/httplib2.py +204 -0
  58. sf_veritas/patches/network_libraries/httpx.py +515 -0
  59. sf_veritas/patches/network_libraries/niquests.py +211 -0
  60. sf_veritas/patches/network_libraries/pycurl.py +385 -0
  61. sf_veritas/patches/network_libraries/requests.py +633 -0
  62. sf_veritas/patches/network_libraries/tornado.py +341 -0
  63. sf_veritas/patches/network_libraries/treq.py +270 -0
  64. sf_veritas/patches/network_libraries/urllib_request.py +468 -0
  65. sf_veritas/patches/network_libraries/utils.py +398 -0
  66. sf_veritas/patches/os.py +17 -0
  67. sf_veritas/patches/threading.py +218 -0
  68. sf_veritas/patches/web_frameworks/__init__.py +54 -0
  69. sf_veritas/patches/web_frameworks/aiohttp.py +793 -0
  70. sf_veritas/patches/web_frameworks/async_websocket_consumer.py +317 -0
  71. sf_veritas/patches/web_frameworks/blacksheep.py +527 -0
  72. sf_veritas/patches/web_frameworks/bottle.py +502 -0
  73. sf_veritas/patches/web_frameworks/cherrypy.py +678 -0
  74. sf_veritas/patches/web_frameworks/cors_utils.py +122 -0
  75. sf_veritas/patches/web_frameworks/django.py +944 -0
  76. sf_veritas/patches/web_frameworks/eve.py +395 -0
  77. sf_veritas/patches/web_frameworks/falcon.py +926 -0
  78. sf_veritas/patches/web_frameworks/fastapi.py +724 -0
  79. sf_veritas/patches/web_frameworks/flask.py +520 -0
  80. sf_veritas/patches/web_frameworks/klein.py +501 -0
  81. sf_veritas/patches/web_frameworks/litestar.py +551 -0
  82. sf_veritas/patches/web_frameworks/pyramid.py +428 -0
  83. sf_veritas/patches/web_frameworks/quart.py +824 -0
  84. sf_veritas/patches/web_frameworks/robyn.py +697 -0
  85. sf_veritas/patches/web_frameworks/sanic.py +857 -0
  86. sf_veritas/patches/web_frameworks/starlette.py +723 -0
  87. sf_veritas/patches/web_frameworks/strawberry.py +813 -0
  88. sf_veritas/patches/web_frameworks/tornado.py +481 -0
  89. sf_veritas/patches/web_frameworks/utils.py +91 -0
  90. sf_veritas/print_override.py +13 -0
  91. sf_veritas/regular_data_transmitter.py +409 -0
  92. sf_veritas/request_interceptor.py +401 -0
  93. sf_veritas/request_utils.py +550 -0
  94. sf_veritas/server_status.py +1 -0
  95. sf_veritas/shutdown_flag.py +11 -0
  96. sf_veritas/subprocess_startup.py +3 -0
  97. sf_veritas/test_cli.py +145 -0
  98. sf_veritas/thread_local.py +970 -0
  99. sf_veritas/timeutil.py +114 -0
  100. sf_veritas/transmit_exception_to_sailfish.py +28 -0
  101. sf_veritas/transmitter.py +132 -0
  102. sf_veritas/types.py +47 -0
  103. sf_veritas/unified_interceptor.py +1580 -0
  104. sf_veritas/utils.py +39 -0
  105. sf_veritas-0.10.3.dist-info/METADATA +97 -0
  106. sf_veritas-0.10.3.dist-info/RECORD +132 -0
  107. sf_veritas-0.10.3.dist-info/WHEEL +5 -0
  108. sf_veritas-0.10.3.dist-info/entry_points.txt +2 -0
  109. sf_veritas-0.10.3.dist-info/top_level.txt +1 -0
  110. sf_veritas.libs/libbrotlicommon-6ce2a53c.so.1.0.6 +0 -0
  111. sf_veritas.libs/libbrotlidec-811d1be3.so.1.0.6 +0 -0
  112. sf_veritas.libs/libcom_err-730ca923.so.2.1 +0 -0
  113. sf_veritas.libs/libcrypt-52aca757.so.1.1.0 +0 -0
  114. sf_veritas.libs/libcrypto-bdaed0ea.so.1.1.1k +0 -0
  115. sf_veritas.libs/libcurl-eaa3cf66.so.4.5.0 +0 -0
  116. sf_veritas.libs/libgssapi_krb5-323bbd21.so.2.2 +0 -0
  117. sf_veritas.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
  118. sf_veritas.libs/libk5crypto-9a74ff38.so.3.1 +0 -0
  119. sf_veritas.libs/libkeyutils-2777d33d.so.1.6 +0 -0
  120. sf_veritas.libs/libkrb5-a55300e8.so.3.3 +0 -0
  121. sf_veritas.libs/libkrb5support-e6594cfc.so.0.1 +0 -0
  122. sf_veritas.libs/liblber-2-d20824ef.4.so.2.10.9 +0 -0
  123. sf_veritas.libs/libldap-2-cea2a960.4.so.2.10.9 +0 -0
  124. sf_veritas.libs/libnghttp2-39367a22.so.14.17.0 +0 -0
  125. sf_veritas.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
  126. sf_veritas.libs/libpsl-99becdd3.so.5.3.1 +0 -0
  127. sf_veritas.libs/libsasl2-7de4d792.so.3.0.0 +0 -0
  128. sf_veritas.libs/libselinux-d0805dcb.so.1 +0 -0
  129. sf_veritas.libs/libssh-c11d285b.so.4.8.7 +0 -0
  130. sf_veritas.libs/libssl-60250281.so.1.1.1k +0 -0
  131. sf_veritas.libs/libunistring-05abdd40.so.2.1.0 +0 -0
  132. sf_veritas.libs/libuuid-95b83d40.so.1.3.0 +0 -0
@@ -0,0 +1,211 @@
1
+ """
2
+ Monkey-patch Niquests using EVENT HOOKS for clean, maintainable instrumentation.
3
+
4
+ Uses niquests' built-in event hook system:
5
+ - pre_send: Inject headers before request transmission
6
+ - response: Consume body and track results after response
7
+
8
+ This approach is cleaner than wrapping every method and automatically
9
+ handles all sync request types (streaming/non-streaming).
10
+
11
+ NOTE: Async support is disabled for niquests.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import time
18
+ from typing import List, Optional
19
+
20
+ from ...thread_local import trace_id_ctx
21
+ from ..constants import supported_network_verbs as verbs
22
+ from .utils import (
23
+ init_fast_header_check,
24
+ inject_headers_ultrafast,
25
+ record_network_request,
26
+ track_request_result,
27
+ )
28
+
29
+ # JSON serialization - try fast orjson first, fallback to stdlib json
30
+ try:
31
+ import orjson
32
+
33
+ HAS_ORJSON = True
34
+ except ImportError:
35
+ import json
36
+
37
+ HAS_ORJSON = False
38
+
39
+
40
+ def _tee_preload_active() -> bool:
41
+ """Detect if LD_PRELOAD tee is active."""
42
+ if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
43
+ return True
44
+ ld = os.getenv("LD_PRELOAD", "")
45
+ return "libsfnettee.so" in ld or "_sfteepreload" in ld
46
+
47
+
48
+ def patch_niquests(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
49
+ """
50
+ Patch niquests using event hooks for clean instrumentation.
51
+
52
+ Registers hooks on Session (sync only) to:
53
+ 1. Inject tracing headers (pre_send hook)
54
+ 2. Consume response body and track results (response hook)
55
+
56
+ When LD_PRELOAD is active: ULTRA-FAST path with <10ns overhead (header injection only).
57
+ When LD_PRELOAD is NOT active: Full capture path with body/header recording.
58
+
59
+ NOTE: AsyncSession is not patched - async support is disabled.
60
+ """
61
+ try:
62
+ import niquests # type: ignore
63
+ except ImportError:
64
+ return
65
+
66
+ skip = domains_to_not_propagate_headers_to or []
67
+ preload_active = _tee_preload_active()
68
+
69
+ # Initialize C extension for ultra-fast header checking (if available)
70
+ if preload_active:
71
+ init_fast_header_check(skip)
72
+
73
+ # ========================================================================
74
+ # SYNC HOOKS (for Session)
75
+ # ========================================================================
76
+
77
+ def pre_send_hook(req, **kwargs):
78
+ """
79
+ Inject tracing headers before request transmission (sync).
80
+
81
+ Called by niquests after connection selection, before sending.
82
+ Modifies req.headers in-place.
83
+ """
84
+ url = str(req.url)
85
+ headers = req.headers # MutableMapping - modify in-place
86
+ inject_headers_ultrafast(headers, url, skip)
87
+ return req
88
+
89
+ if preload_active:
90
+ # LD_PRELOAD mode: Only inject headers, C extension handles capture
91
+ def response_hook(resp, **kwargs):
92
+ """Track request success/failure (LD_PRELOAD mode - sync)."""
93
+ url = str(resp.url)
94
+ try:
95
+ # CRITICAL: Consume body to release connection
96
+ # For sync Session, .content is a property (not coroutine)
97
+ _ = resp.content
98
+ track_request_result(success=True, url=url)
99
+ except Exception as e:
100
+ track_request_result(success=False, error=e, url=url)
101
+ return resp
102
+
103
+ else:
104
+ # Python-only mode: Full capture (body + headers)
105
+ def response_hook(resp, **kwargs):
106
+ """Capture and record request (Python-only mode - sync)."""
107
+ url = str(resp.url)
108
+ method = resp.request.method
109
+ t0 = int(time.time() * 1_000)
110
+
111
+ trace_id = trace_id_ctx.get(None) or ""
112
+
113
+ status = getattr(resp, "status_code", 0)
114
+ success = False
115
+ err = None
116
+ req_data = b""
117
+ resp_data = b""
118
+ req_headers = b""
119
+ resp_headers = b""
120
+
121
+ try:
122
+ # Capture response data
123
+ resp_data = resp.content # Also consumes body
124
+ success = True
125
+
126
+ # Capture headers
127
+ if HAS_ORJSON:
128
+ req_headers = orjson.dumps({str(k): str(v) for k, v in resp.request.headers.items()})
129
+ resp_headers = orjson.dumps({str(k): str(v) for k, v in resp.headers.items()})
130
+ else:
131
+ req_headers = json.dumps({str(k): str(v) for k, v in resp.request.headers.items()}).encode("utf-8")
132
+ resp_headers = json.dumps({str(k): str(v) for k, v in resp.headers.items()}).encode("utf-8")
133
+
134
+ # Capture request body if available
135
+ if hasattr(resp.request, "body") and resp.request.body:
136
+ body = resp.request.body
137
+ if isinstance(body, bytes):
138
+ req_data = body
139
+ elif isinstance(body, str):
140
+ req_data = body.encode("utf-8")
141
+
142
+ track_request_result(success=True, url=url)
143
+ except Exception as exc:
144
+ err = str(exc)[:255]
145
+ track_request_result(success=False, error=exc, url=url)
146
+ finally:
147
+ record_network_request(
148
+ trace_id,
149
+ url,
150
+ method,
151
+ status,
152
+ success,
153
+ err,
154
+ timestamp_start=t0,
155
+ timestamp_end=int(time.time() * 1_000),
156
+ request_data=req_data,
157
+ response_data=resp_data,
158
+ request_headers=req_headers,
159
+ response_headers=resp_headers,
160
+ )
161
+
162
+ return resp
163
+
164
+ # ========================================================================
165
+ # PATCH Session.__init__ to register hooks
166
+ # ========================================================================
167
+
168
+ SessionCls = niquests.Session
169
+ _original_session_init = SessionCls.__init__
170
+
171
+ def patched_session_init(self, *args, **kwargs):
172
+ _original_session_init(self, *args, **kwargs)
173
+ # Register hooks on the session instance
174
+ self.hooks["pre_send"].append(pre_send_hook)
175
+ self.hooks["response"].append(response_hook)
176
+
177
+ SessionCls.__init__ = patched_session_init
178
+
179
+ # ========================================================================
180
+ # PATCH module-level functions to pass hooks
181
+ # ========================================================================
182
+
183
+ # Module-level sync functions (niquests.get, niquests.post, etc.)
184
+ # These create temporary sessions internally, so we wrap them to pass hooks
185
+
186
+ _sync_hooks = {
187
+ "pre_send": [pre_send_hook],
188
+ "response": [response_hook],
189
+ }
190
+
191
+ def _wrap_module_sync(original_fn):
192
+ """Wrap module-level sync functions to pass hooks."""
193
+
194
+ def wrapper(*args, **kwargs):
195
+ # Merge our hooks with any user-provided hooks
196
+ user_hooks = kwargs.get("hooks", {})
197
+ merged_hooks = {}
198
+ for hook_type in ["pre_send", "response"]:
199
+ merged_hooks[hook_type] = _sync_hooks.get(
200
+ hook_type, []
201
+ ) + user_hooks.get(hook_type, [])
202
+ kwargs["hooks"] = merged_hooks
203
+ return original_fn(*args, **kwargs)
204
+
205
+ return wrapper
206
+
207
+ # Patch module-level sync functions
208
+ niquests.request = _wrap_module_sync(niquests.request)
209
+ for verb in verbs:
210
+ if hasattr(niquests, verb):
211
+ setattr(niquests, verb, _wrap_module_sync(getattr(niquests, verb)))
@@ -0,0 +1,385 @@
1
+ import os
2
+ import threading
3
+ import time
4
+ from typing import List, Optional, Sequence, Tuple, Union
5
+
6
+ from ...thread_local import outbound_header_base_ctx, trace_id_ctx
7
+ from .utils import (
8
+ init_fast_header_check,
9
+ inject_headers_ultrafast,
10
+ record_network_request,
11
+ )
12
+
13
+ # JSON serialization - try fast orjson first, fallback to stdlib json
14
+ try:
15
+ import orjson
16
+ HAS_ORJSON = True
17
+ except ImportError:
18
+ import json
19
+ HAS_ORJSON = False
20
+
21
+
22
+ def _tee_preload_active() -> bool:
23
+ """Detect if LD_PRELOAD tee is active (same logic as http_client.py)."""
24
+ if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
25
+ return True
26
+ ld = os.getenv("LD_PRELOAD", "")
27
+ return "libsfnettee.so" in ld or "_sfteepreload" in ld
28
+
29
+
30
+ # ----------------------- Type/encoding helpers (FIX) -----------------------
31
+ def _normalize_url_to_str(val: Union[str, bytes, None]) -> Optional[str]:
32
+ """Return URL as str for internal logic (trace/allow), safe-decoding bytes."""
33
+ if val is None:
34
+ return None
35
+ if isinstance(val, bytes):
36
+ try:
37
+ return val.decode("utf-8", "replace")
38
+ except Exception:
39
+ return val.decode("latin1", "replace")
40
+ return val # already str
41
+
42
+
43
+ def _ensure_bytes(s: str) -> bytes:
44
+ """Encode a str header safely to bytes (UTF-8)."""
45
+ return s.encode("utf-8")
46
+
47
+
48
+ def _patch_pycurl_minimal_header_injection(pycurl_module):
49
+ """
50
+ MINIMAL header injection for pycurl in LD_PRELOAD mode.
51
+
52
+ CRITICAL: We must patch pycurl even in LD_PRELOAD mode because pycurl
53
+ builds the HTTP request in C before calling send(). Headers must be
54
+ injected BEFORE serialization, not after.
55
+
56
+ Uses a lightweight wrapper that only intercepts setopt/perform.
57
+ All other methods delegate directly to wrapped C object.
58
+ Target overhead: <20μs.
59
+ """
60
+ _OrigCurl = pycurl_module.Curl
61
+ HTTPHEADER = pycurl_module.HTTPHEADER
62
+
63
+ class CurlWrapper:
64
+ """Lightweight wrapper - only intercepts setopt(HTTPHEADER) and perform()."""
65
+ __slots__ = ('_curl', '_user_headers')
66
+
67
+ def __init__(self, *args, **kwargs):
68
+ # Create real Curl instance
69
+ self._curl = _OrigCurl(*args, **kwargs)
70
+ self._user_headers = None
71
+
72
+ def setopt(self, opt, val):
73
+ """Intercept HTTPHEADER, pass everything else through."""
74
+ if opt == HTTPHEADER:
75
+ # Store but don't call setopt yet (avoid double call)
76
+ self._user_headers = val
77
+ return
78
+ # Direct delegation to C method
79
+ return self._curl.setopt(opt, val)
80
+
81
+ def perform(self):
82
+ """Inject headers before perform."""
83
+ base_dict = outbound_header_base_ctx.get()
84
+
85
+ # Fast path: no sf-veritas headers
86
+ if not base_dict:
87
+ if self._user_headers is not None:
88
+ self._curl.setopt(HTTPHEADER, self._user_headers)
89
+ return self._curl.perform()
90
+
91
+ # Fast path: no user headers
92
+ if self._user_headers is None:
93
+ pycurl_headers = base_dict.get("_pycurl_headers")
94
+ if pycurl_headers is None:
95
+ cached_headers = base_dict.get("_cached_headers")
96
+ if not cached_headers:
97
+ return self._curl.perform()
98
+ pycurl_headers = [f"{k}: {v}" for k, v in cached_headers.items()]
99
+ base_dict["_pycurl_headers"] = pycurl_headers
100
+
101
+ self._curl.setopt(HTTPHEADER, pycurl_headers)
102
+ return self._curl.perform()
103
+
104
+ # Merge path
105
+ cached_headers = base_dict.get("_cached_headers")
106
+ if not cached_headers:
107
+ self._curl.setopt(HTTPHEADER, self._user_headers)
108
+ return self._curl.perform()
109
+
110
+ if isinstance(self._user_headers[0], bytes):
111
+ pycurl_headers_bytes = base_dict.get("_pycurl_headers_bytes")
112
+ if pycurl_headers_bytes is None:
113
+ pycurl_headers_bytes = [f"{k}: {v}".encode('utf-8') for k, v in cached_headers.items()]
114
+ base_dict["_pycurl_headers_bytes"] = pycurl_headers_bytes
115
+ merged = self._user_headers + pycurl_headers_bytes
116
+ else:
117
+ pycurl_headers = base_dict.get("_pycurl_headers")
118
+ if pycurl_headers is None:
119
+ pycurl_headers = [f"{k}: {v}" for k, v in cached_headers.items()]
120
+ base_dict["_pycurl_headers"] = pycurl_headers
121
+ merged = self._user_headers + pycurl_headers
122
+
123
+ self._curl.setopt(HTTPHEADER, merged)
124
+ return self._curl.perform()
125
+
126
+ def __getattr__(self, name):
127
+ """Delegate all other methods directly to wrapped Curl object."""
128
+ return getattr(self._curl, name)
129
+
130
+ # Replace Curl with wrapper factory
131
+ pycurl_module.Curl = CurlWrapper
132
+
133
+
134
+ def _normalize_headers(
135
+ base: Sequence[Union[str, bytes]],
136
+ injected: Sequence[str],
137
+ ) -> Tuple[Sequence[Union[str, bytes]], bool]:
138
+ """
139
+ Ensure we return a list where *all* items are the same type.
140
+ - If user provided bytes headers, return bytes for *everything* (including injected).
141
+ - If user provided str headers, return str for everything.
142
+ - If no base headers, default to str.
143
+ Returns: (normalized_headers, are_bytes)
144
+ """
145
+ are_bytes = False
146
+ for h in base:
147
+ if isinstance(h, bytes):
148
+ are_bytes = True
149
+ break
150
+
151
+ if are_bytes:
152
+ merged: List[bytes] = []
153
+ # Copy original as bytes (already bytes)
154
+ merged.extend([h if isinstance(h, bytes) else _ensure_bytes(h) for h in base])
155
+ # Append injected as bytes
156
+ merged.extend(_ensure_bytes(h) for h in injected)
157
+ return merged, True
158
+ else:
159
+ merged2: List[str] = []
160
+ # Copy original as str (decode if user passed bytes)
161
+ for h in base:
162
+ if isinstance(h, bytes):
163
+ merged2.append(h.decode("utf-8", "replace"))
164
+ else:
165
+ merged2.append(h)
166
+ # Append injected as str
167
+ merged2.extend(injected)
168
+ return merged2, False
169
+
170
+
171
+ def patch_pycurl(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
172
+ try:
173
+ import pycurl
174
+ except ImportError:
175
+ return
176
+
177
+ skip = domains_to_not_propagate_headers_to or []
178
+ preload_active = _tee_preload_active()
179
+
180
+ # Initialize C extension for ultra-fast header checking (if available)
181
+ if preload_active:
182
+ init_fast_header_check(skip)
183
+
184
+ # CRITICAL: Even in LD_PRELOAD mode, we MUST patch pycurl!
185
+ # Unlike curl-cffi (which goes through Python socket calls), pycurl builds
186
+ # the entire HTTP request in C before calling send(). By then, headers are
187
+ # already serialized in the buffer - too late to inject!
188
+ #
189
+ # Solution: Minimal patch that ONLY injects headers before pycurl serializes.
190
+ # No capture, no tracking - just get headers from ContextVar and inject.
191
+ if preload_active:
192
+ return _patch_pycurl_minimal_header_injection(pycurl)
193
+
194
+ _OrigCurl = pycurl.Curl
195
+
196
+ class WrappedCurl(_OrigCurl): # ➊ subclass libcurl handle
197
+ def __init__(self, *args, **kwargs):
198
+ super().__init__(*args, **kwargs)
199
+ self._sf_url: Union[str, bytes, None] = None
200
+ self._sf_method: Union[str, bytes, None] = None
201
+ self._sf_headers: List[Union[str, bytes]] = []
202
+ self._sf_request_body: bytes = b""
203
+ self._sf_response_buffer: List[bytes] = []
204
+ self._sf_original_writefunction = None
205
+ self._sf_injected_headers: List[Union[str, bytes]] = (
206
+ []
207
+ ) # Track what we injected (normalized)
208
+ self._sf_in_header_injection: bool = False # Flag to prevent recursion
209
+
210
+ # --- intercept option setting -------------------------------------------------
211
+ def setopt(self, opt, val):
212
+ if opt == pycurl.URL:
213
+ self._sf_url = val # may be bytes or str
214
+ # Inject headers immediately after URL is set (needed for CurlMulti)
215
+ self._inject_and_set_headers()
216
+ elif opt == pycurl.CUSTOMREQUEST:
217
+ self._sf_method = val # may be bytes or str, normalize later
218
+ elif opt == pycurl.HTTPHEADER:
219
+ # User is setting headers - inject ours immediately
220
+ # BUT if we're already inside _inject_and_set_headers, just pass through
221
+ if self._sf_in_header_injection:
222
+ return super().setopt(opt, val)
223
+
224
+ # Note: val should be a sequence of str or bytes (uniform)
225
+ self._sf_headers = list(val)
226
+ self._inject_and_set_headers()
227
+ return # Don't call super() - we already set them
228
+ elif opt == pycurl.POSTFIELDS:
229
+ # Capture request body for POST/PUT
230
+ if isinstance(val, bytes):
231
+ self._sf_request_body = val
232
+ elif isinstance(val, str):
233
+ self._sf_request_body = val.encode("utf-8")
234
+ elif opt == pycurl.WRITEFUNCTION:
235
+ # Store user's write function to call it later
236
+ self._sf_original_writefunction = val
237
+ return super().setopt(opt, val)
238
+
239
+ def _inject_and_set_headers(self):
240
+ """Build and set headers with our injected trace headers."""
241
+ # If URL not set yet - just set user headers as-is (but keep uniform types)
242
+ if self._sf_url is None:
243
+ if self._sf_headers:
244
+ # still enforce uniformity to avoid mixed-type lists
245
+ normalized, _ = _normalize_headers(self._sf_headers, [])
246
+ self._sf_in_header_injection = True
247
+ try:
248
+ super().setopt(pycurl.HTTPHEADER, normalized)
249
+ finally:
250
+ self._sf_in_header_injection = False
251
+ return
252
+
253
+ url_str = _normalize_url_to_str(self._sf_url)
254
+
255
+ # Use inject_headers_ultrafast to get headers as dict, then convert to pycurl format
256
+ headers_dict = {}
257
+ inject_headers_ultrafast(headers_dict, url_str or "", skip)
258
+
259
+ # Convert dict headers to pycurl string format ["Name: Value"]
260
+ injected: List[str] = []
261
+ for key, value in headers_dict.items():
262
+ injected.append(f"{key}: {value}")
263
+
264
+ # --- FIX: enforce uniform header element type (all-str or all-bytes) ---
265
+ merged, _ = _normalize_headers(self._sf_headers, injected)
266
+ self._sf_injected_headers = list(merged) # store exactly what we set
267
+
268
+ # Apply merged headers using super().setopt with recursion guard
269
+ self._sf_in_header_injection = True
270
+ try:
271
+ super().setopt(pycurl.HTTPHEADER, merged)
272
+ finally:
273
+ self._sf_in_header_injection = False
274
+
275
+ # --- wrapped perform() --------------------------------------------------------
276
+ def perform(self):
277
+ # ULTRA-FAST PATH: LD_PRELOAD mode - headers already injected, skip all capture
278
+ if preload_active:
279
+ # Headers were already injected in _inject_and_set_headers()
280
+ # C extension handles all capture - just perform!
281
+ return super().perform()
282
+
283
+ # SLOW PATH: Python-only mode - need full capture and recording
284
+ url_for_trace = _normalize_url_to_str(self._sf_url) or ""
285
+ # Normalize method for tracing/logging; don't mutate what's set in handle
286
+ method_s: str
287
+ if isinstance(self._sf_method, bytes):
288
+ try:
289
+ method_s = self._sf_method.decode("utf-8", "replace").upper()
290
+ except Exception:
291
+ method_s = "GET"
292
+ elif isinstance(self._sf_method, str):
293
+ method_s = self._sf_method.upper()
294
+ else:
295
+ method_s = "GET"
296
+
297
+ # Use inject_headers_ultrafast to get headers as dict, then convert to pycurl format
298
+ headers_dict = {}
299
+ inject_headers_ultrafast(headers_dict, url_for_trace, skip)
300
+
301
+ # Convert dict headers to pycurl string format ["Name: Value"]
302
+ injected_now: List[str] = []
303
+ for key, value in headers_dict.items():
304
+ injected_now.append(f"{key}: {value}")
305
+
306
+ # Get trace_id for capture
307
+ trace_id = trace_id_ctx.get(None) or ""
308
+
309
+ # --- FIX: ensure uniform header types again at perform-time --------------
310
+ merged, _ = _normalize_headers(self._sf_headers, injected_now)
311
+
312
+ # Capture request headers for recording (JSON bytes)
313
+ req_headers = b""
314
+ try:
315
+ # If merged is bytes, decode elements for JSON; else use as-is
316
+ hdrs_for_json = [
317
+ (
318
+ h.decode("utf-8", "replace")
319
+ if isinstance(h, (bytes, bytearray))
320
+ else h
321
+ )
322
+ for h in merged
323
+ ]
324
+ if HAS_ORJSON:
325
+ req_headers = orjson.dumps(hdrs_for_json)
326
+ else:
327
+ req_headers = json.dumps(hdrs_for_json).encode("utf-8")
328
+ except Exception: # noqa: BLE001
329
+ pass
330
+
331
+ # Let libcurl negotiate & decode encodings for us
332
+ super().setopt(pycurl.ACCEPT_ENCODING, "")
333
+
334
+ # Push merged headers down using recursion guard
335
+ self._sf_in_header_injection = True
336
+ try:
337
+ super().setopt(pycurl.HTTPHEADER, merged)
338
+ finally:
339
+ self._sf_in_header_injection = False
340
+
341
+ # Set up response capture if no user writefunction exists
342
+ if self._sf_original_writefunction is None:
343
+
344
+ def capture_response(data):
345
+ self._sf_response_buffer.append(data)
346
+ return len(data)
347
+
348
+ super().setopt(pycurl.WRITEFUNCTION, capture_response)
349
+
350
+ # timing / status / error capture
351
+ ts0 = int(time.time() * 1_000)
352
+ status = 0
353
+ err: Optional[str] = None
354
+ resp_data = b""
355
+ try:
356
+ rv = super().perform()
357
+ status = int(self.getinfo(pycurl.RESPONSE_CODE) or 0)
358
+
359
+ # Collect response data if we captured it
360
+ if self._sf_original_writefunction is None:
361
+ resp_data = b"".join(self._sf_response_buffer)
362
+
363
+ return rv
364
+ except Exception as e:
365
+ err = str(e)[:255]
366
+ raise
367
+ finally:
368
+ ts1 = int(time.time() * 1_000)
369
+ # Only capture if LD_PRELOAD is NOT active (avoid duplicates)
370
+ if not preload_active:
371
+ record_network_request(
372
+ trace_id,
373
+ url_for_trace,
374
+ method_s,
375
+ status,
376
+ err is None,
377
+ err,
378
+ ts0,
379
+ ts1,
380
+ request_data=self._sf_request_body,
381
+ response_data=resp_data,
382
+ request_headers=req_headers,
383
+ )
384
+
385
+ pycurl.Curl = WrappedCurl