sf-veritas 0.10.3__cp39-cp39-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sf-veritas might be problematic. Click here for more details.
- sf_veritas/__init__.py +20 -0
- sf_veritas/_sffastlog.c +889 -0
- sf_veritas/_sffastlog.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffastnet.c +924 -0
- sf_veritas/_sffastnet.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffastnetworkrequest.c +730 -0
- sf_veritas/_sffastnetworkrequest.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffuncspan.c +2155 -0
- sf_veritas/_sffuncspan.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffuncspan_config.c +617 -0
- sf_veritas/_sffuncspan_config.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfheadercheck.c +341 -0
- sf_veritas/_sfheadercheck.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfnetworkhop.c +1451 -0
- sf_veritas/_sfnetworkhop.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfservice.c +1175 -0
- sf_veritas/_sfservice.cpython-39-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfteepreload.c +5167 -0
- sf_veritas/app_config.py +49 -0
- sf_veritas/cli.py +336 -0
- sf_veritas/constants.py +10 -0
- sf_veritas/custom_excepthook.py +304 -0
- sf_veritas/custom_log_handler.py +129 -0
- sf_veritas/custom_output_wrapper.py +144 -0
- sf_veritas/custom_print.py +146 -0
- sf_veritas/django_app.py +5 -0
- sf_veritas/env_vars.py +186 -0
- sf_veritas/exception_handling_middleware.py +18 -0
- sf_veritas/exception_metaclass.py +69 -0
- sf_veritas/fast_frame_info.py +116 -0
- sf_veritas/fast_network_hop.py +293 -0
- sf_veritas/frame_tools.py +112 -0
- sf_veritas/funcspan_config_loader.py +556 -0
- sf_veritas/function_span_profiler.py +1174 -0
- sf_veritas/import_hook.py +62 -0
- sf_veritas/infra_details/__init__.py +3 -0
- sf_veritas/infra_details/get_infra_details.py +24 -0
- sf_veritas/infra_details/kubernetes/__init__.py +3 -0
- sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
- sf_veritas/infra_details/kubernetes/get_details.py +7 -0
- sf_veritas/infra_details/running_on/__init__.py +17 -0
- sf_veritas/infra_details/running_on/kubernetes.py +11 -0
- sf_veritas/interceptors.py +497 -0
- sf_veritas/libsfnettee.so +0 -0
- sf_veritas/local_env_detect.py +118 -0
- sf_veritas/package_metadata.py +6 -0
- sf_veritas/patches/__init__.py +0 -0
- sf_veritas/patches/concurrent_futures.py +19 -0
- sf_veritas/patches/constants.py +1 -0
- sf_veritas/patches/exceptions.py +82 -0
- sf_veritas/patches/multiprocessing.py +32 -0
- sf_veritas/patches/network_libraries/__init__.py +76 -0
- sf_veritas/patches/network_libraries/aiohttp.py +281 -0
- sf_veritas/patches/network_libraries/curl_cffi.py +363 -0
- sf_veritas/patches/network_libraries/http_client.py +419 -0
- sf_veritas/patches/network_libraries/httpcore.py +515 -0
- sf_veritas/patches/network_libraries/httplib2.py +204 -0
- sf_veritas/patches/network_libraries/httpx.py +515 -0
- sf_veritas/patches/network_libraries/niquests.py +211 -0
- sf_veritas/patches/network_libraries/pycurl.py +385 -0
- sf_veritas/patches/network_libraries/requests.py +633 -0
- sf_veritas/patches/network_libraries/tornado.py +341 -0
- sf_veritas/patches/network_libraries/treq.py +270 -0
- sf_veritas/patches/network_libraries/urllib_request.py +468 -0
- sf_veritas/patches/network_libraries/utils.py +398 -0
- sf_veritas/patches/os.py +17 -0
- sf_veritas/patches/threading.py +218 -0
- sf_veritas/patches/web_frameworks/__init__.py +54 -0
- sf_veritas/patches/web_frameworks/aiohttp.py +793 -0
- sf_veritas/patches/web_frameworks/async_websocket_consumer.py +317 -0
- sf_veritas/patches/web_frameworks/blacksheep.py +527 -0
- sf_veritas/patches/web_frameworks/bottle.py +502 -0
- sf_veritas/patches/web_frameworks/cherrypy.py +678 -0
- sf_veritas/patches/web_frameworks/cors_utils.py +122 -0
- sf_veritas/patches/web_frameworks/django.py +944 -0
- sf_veritas/patches/web_frameworks/eve.py +395 -0
- sf_veritas/patches/web_frameworks/falcon.py +926 -0
- sf_veritas/patches/web_frameworks/fastapi.py +724 -0
- sf_veritas/patches/web_frameworks/flask.py +520 -0
- sf_veritas/patches/web_frameworks/klein.py +501 -0
- sf_veritas/patches/web_frameworks/litestar.py +551 -0
- sf_veritas/patches/web_frameworks/pyramid.py +428 -0
- sf_veritas/patches/web_frameworks/quart.py +824 -0
- sf_veritas/patches/web_frameworks/robyn.py +697 -0
- sf_veritas/patches/web_frameworks/sanic.py +857 -0
- sf_veritas/patches/web_frameworks/starlette.py +723 -0
- sf_veritas/patches/web_frameworks/strawberry.py +813 -0
- sf_veritas/patches/web_frameworks/tornado.py +481 -0
- sf_veritas/patches/web_frameworks/utils.py +91 -0
- sf_veritas/print_override.py +13 -0
- sf_veritas/regular_data_transmitter.py +409 -0
- sf_veritas/request_interceptor.py +401 -0
- sf_veritas/request_utils.py +550 -0
- sf_veritas/server_status.py +1 -0
- sf_veritas/shutdown_flag.py +11 -0
- sf_veritas/subprocess_startup.py +3 -0
- sf_veritas/test_cli.py +145 -0
- sf_veritas/thread_local.py +970 -0
- sf_veritas/timeutil.py +114 -0
- sf_veritas/transmit_exception_to_sailfish.py +28 -0
- sf_veritas/transmitter.py +132 -0
- sf_veritas/types.py +47 -0
- sf_veritas/unified_interceptor.py +1580 -0
- sf_veritas/utils.py +39 -0
- sf_veritas-0.10.3.dist-info/METADATA +97 -0
- sf_veritas-0.10.3.dist-info/RECORD +132 -0
- sf_veritas-0.10.3.dist-info/WHEEL +5 -0
- sf_veritas-0.10.3.dist-info/entry_points.txt +2 -0
- sf_veritas-0.10.3.dist-info/top_level.txt +1 -0
- sf_veritas.libs/libbrotlicommon-6ce2a53c.so.1.0.6 +0 -0
- sf_veritas.libs/libbrotlidec-811d1be3.so.1.0.6 +0 -0
- sf_veritas.libs/libcom_err-730ca923.so.2.1 +0 -0
- sf_veritas.libs/libcrypt-52aca757.so.1.1.0 +0 -0
- sf_veritas.libs/libcrypto-bdaed0ea.so.1.1.1k +0 -0
- sf_veritas.libs/libcurl-eaa3cf66.so.4.5.0 +0 -0
- sf_veritas.libs/libgssapi_krb5-323bbd21.so.2.2 +0 -0
- sf_veritas.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
- sf_veritas.libs/libk5crypto-9a74ff38.so.3.1 +0 -0
- sf_veritas.libs/libkeyutils-2777d33d.so.1.6 +0 -0
- sf_veritas.libs/libkrb5-a55300e8.so.3.3 +0 -0
- sf_veritas.libs/libkrb5support-e6594cfc.so.0.1 +0 -0
- sf_veritas.libs/liblber-2-d20824ef.4.so.2.10.9 +0 -0
- sf_veritas.libs/libldap-2-cea2a960.4.so.2.10.9 +0 -0
- sf_veritas.libs/libnghttp2-39367a22.so.14.17.0 +0 -0
- sf_veritas.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
- sf_veritas.libs/libpsl-99becdd3.so.5.3.1 +0 -0
- sf_veritas.libs/libsasl2-7de4d792.so.3.0.0 +0 -0
- sf_veritas.libs/libselinux-d0805dcb.so.1 +0 -0
- sf_veritas.libs/libssh-c11d285b.so.4.8.7 +0 -0
- sf_veritas.libs/libssl-60250281.so.1.1.1k +0 -0
- sf_veritas.libs/libunistring-05abdd40.so.2.1.0 +0 -0
- sf_veritas.libs/libuuid-95b83d40.so.1.3.0 +0 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instrument urllib.request so that
|
|
3
|
+
|
|
4
|
+
• Every call to urlopen() or OpenerDirector.open() propagates
|
|
5
|
+
SAILFISH tracing headers (unless destination host is excluded).
|
|
6
|
+
• Every call triggers record_network_request(…) UNLESS LD_PRELOAD is active.
|
|
7
|
+
|
|
8
|
+
The patch is safe to import multiple times.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import io
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
from typing import List, Optional
|
|
17
|
+
|
|
18
|
+
from ...thread_local import trace_id_ctx
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import wrapt
|
|
22
|
+
|
|
23
|
+
HAS_WRAPT = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
HAS_WRAPT = False
|
|
26
|
+
|
|
27
|
+
# JSON serialization - try fast orjson first, fallback to stdlib json
|
|
28
|
+
try:
|
|
29
|
+
import orjson
|
|
30
|
+
|
|
31
|
+
HAS_ORJSON = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
import json
|
|
34
|
+
|
|
35
|
+
HAS_ORJSON = False
|
|
36
|
+
|
|
37
|
+
from .utils import (
|
|
38
|
+
init_fast_header_check,
|
|
39
|
+
inject_headers_ultrafast,
|
|
40
|
+
record_network_request,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# ------------------------------- config / helpers --------------------------------- #
|
|
44
|
+
|
|
45
|
+
_SF_REQ_ALREADY_INJECTED_ATTR = "_sf_already_injected"
|
|
46
|
+
_SF_URLLIB_DEBUG = os.getenv("SF_URLLIB_DEBUG", "0") == "1"
|
|
47
|
+
# If true, honor urllib's env proxy logic; if false, we build a proxy-less opener for wire I/O.
|
|
48
|
+
trust_env = os.getenv("SF_URLLIB_TRUST_ENV", "false").lower() == "true"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _tee_preload_active() -> bool:
|
|
52
|
+
"""Detect if LD_PRELOAD tee is active."""
|
|
53
|
+
if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
|
|
54
|
+
return True
|
|
55
|
+
ld = os.getenv("LD_PRELOAD", "")
|
|
56
|
+
return "libsfnettee.so" in ld or "_sfteepreload" in ld
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _has_header_case_insensitive(req, name: str) -> bool:
|
|
60
|
+
"""True if Request already has a header named `name` (case-insensitive)."""
|
|
61
|
+
try:
|
|
62
|
+
items = req.header_items() # type: ignore[attr-defined]
|
|
63
|
+
except Exception:
|
|
64
|
+
try:
|
|
65
|
+
items = list(getattr(req, "headers", {}).items())
|
|
66
|
+
except Exception:
|
|
67
|
+
items = []
|
|
68
|
+
lname = name.lower()
|
|
69
|
+
for k, _ in items:
|
|
70
|
+
if str(k).lower() == lname:
|
|
71
|
+
return True
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class _ResponseTee:
|
|
76
|
+
"""
|
|
77
|
+
File-like wrapper for urllib responses that tees bytes into an internal buffer
|
|
78
|
+
as the caller consumes them. On EOF/close, invokes on_complete(buffer_bytes).
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
__slots__ = ("_resp", "_buf", "_cap", "_done", "_on_complete", "_truncated")
|
|
82
|
+
|
|
83
|
+
def __init__(self, resp, on_complete, cap_bytes: int = 256 * 1024):
|
|
84
|
+
self._resp = resp
|
|
85
|
+
self._buf = io.BytesIO()
|
|
86
|
+
self._cap = cap_bytes
|
|
87
|
+
self._done = False
|
|
88
|
+
self._truncated = False
|
|
89
|
+
self._on_complete = on_complete
|
|
90
|
+
|
|
91
|
+
# -------- helpers --------
|
|
92
|
+
def _accumulate(self, chunk: bytes) -> None:
|
|
93
|
+
if not chunk:
|
|
94
|
+
return
|
|
95
|
+
if self._buf.tell() < self._cap:
|
|
96
|
+
remaining = self._cap - self._buf.tell()
|
|
97
|
+
if len(chunk) > remaining:
|
|
98
|
+
self._buf.write(chunk[:remaining])
|
|
99
|
+
self._truncated = True
|
|
100
|
+
else:
|
|
101
|
+
self._buf.write(chunk)
|
|
102
|
+
|
|
103
|
+
def _finish_if_needed(self, reached_eof: bool) -> None:
|
|
104
|
+
if reached_eof and not self._done:
|
|
105
|
+
self._done = True
|
|
106
|
+
try:
|
|
107
|
+
payload = self._buf.getvalue()
|
|
108
|
+
self._on_complete(payload, self._truncated)
|
|
109
|
+
finally:
|
|
110
|
+
self._buf.close()
|
|
111
|
+
|
|
112
|
+
# -------- file-like API --------
|
|
113
|
+
def read(self, size: int = -1) -> bytes:
|
|
114
|
+
data = self._resp.read(size)
|
|
115
|
+
self._accumulate(data)
|
|
116
|
+
self._finish_if_needed(reached_eof=(not data))
|
|
117
|
+
return data
|
|
118
|
+
|
|
119
|
+
def readinto(self, b) -> int:
|
|
120
|
+
n = self._resp.readinto(b)
|
|
121
|
+
if n and n > 0:
|
|
122
|
+
self._accumulate(memoryview(b)[:n].tobytes())
|
|
123
|
+
self._finish_if_needed(reached_eof=(n == 0))
|
|
124
|
+
return n
|
|
125
|
+
|
|
126
|
+
def readline(self, size: int = -1) -> bytes:
|
|
127
|
+
line = self._resp.readline(size)
|
|
128
|
+
self._accumulate(line)
|
|
129
|
+
self._finish_if_needed(reached_eof=(line == b""))
|
|
130
|
+
return line
|
|
131
|
+
|
|
132
|
+
def __iter__(self):
|
|
133
|
+
for line in self._resp:
|
|
134
|
+
self._accumulate(line)
|
|
135
|
+
yield line
|
|
136
|
+
self._finish_if_needed(reached_eof=True)
|
|
137
|
+
|
|
138
|
+
def close(self):
|
|
139
|
+
try:
|
|
140
|
+
self._resp.close()
|
|
141
|
+
finally:
|
|
142
|
+
self._finish_if_needed(reached_eof=True)
|
|
143
|
+
|
|
144
|
+
def __getattr__(self, name):
|
|
145
|
+
return getattr(self._resp, name)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ------------------------------- patcher --------------------------------- #
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def patch_urllib_request(
|
|
152
|
+
domains_to_not_propagate_headers_to: Optional[List[str]] = None,
|
|
153
|
+
) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Apply patches. When LD_PRELOAD is active:
|
|
156
|
+
- ALWAYS inject headers (trace_id + funcspan_override)
|
|
157
|
+
- SKIP capture/emission (LD_PRELOAD handles at socket layer)
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
import socket as _socket # for _GLOBAL_DEFAULT_TIMEOUT
|
|
161
|
+
import urllib.error
|
|
162
|
+
import urllib.request as _ur
|
|
163
|
+
except ImportError:
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
exclude: List[str] = domains_to_not_propagate_headers_to or []
|
|
167
|
+
preload_active = _tee_preload_active()
|
|
168
|
+
|
|
169
|
+
# Initialize C extension fast check once when LD_PRELOAD is active
|
|
170
|
+
if preload_active:
|
|
171
|
+
init_fast_header_check(exclude)
|
|
172
|
+
|
|
173
|
+
_orig_urlopen = _ur.urlopen
|
|
174
|
+
_orig_opener_open = _ur.OpenerDirector.open # type: ignore[attr-defined]
|
|
175
|
+
|
|
176
|
+
# -------- internal helpers (no recursion!) --------
|
|
177
|
+
|
|
178
|
+
def _ensure_content_length_semantics(req: _ur.Request) -> None:
|
|
179
|
+
"""
|
|
180
|
+
Guarantee standards-compliant body semantics:
|
|
181
|
+
|
|
182
|
+
- For POST/PUT/PATCH/DELETE/OPTIONS with no body: send Content-Length: 0
|
|
183
|
+
- For HEAD: no body
|
|
184
|
+
- Also ensure a benign Content-Type for zero-length bodies on body-capable verbs.
|
|
185
|
+
Some stacks 400 when Content-Type is missing on e.g. PUT with empty body.
|
|
186
|
+
"""
|
|
187
|
+
method = req.get_method()
|
|
188
|
+
|
|
189
|
+
if method == "HEAD":
|
|
190
|
+
# HEAD must not have a body.
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
body_capable = {"POST", "PUT", "PATCH", "DELETE", "OPTIONS"}
|
|
194
|
+
|
|
195
|
+
if method in body_capable:
|
|
196
|
+
has_body = getattr(req, "data", None) is not None
|
|
197
|
+
|
|
198
|
+
if not has_body:
|
|
199
|
+
# Prefer setting an empty body so urllib emits Content-Length: 0.
|
|
200
|
+
try:
|
|
201
|
+
req.data = b""
|
|
202
|
+
has_body = True
|
|
203
|
+
except Exception:
|
|
204
|
+
# Fallback to explicit header only.
|
|
205
|
+
if not _has_header_case_insensitive(req, "Content-Length"):
|
|
206
|
+
try:
|
|
207
|
+
req.add_header("Content-Length", "0")
|
|
208
|
+
except Exception:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
# Make sure a benign Content-Type exists for empty bodies on these verbs.
|
|
212
|
+
# urllib adds a default Content-Type for POST, but not always for others.
|
|
213
|
+
if not _has_header_case_insensitive(req, "Content-Type"):
|
|
214
|
+
try:
|
|
215
|
+
# Use application/octet-stream to avoid implying form encoding.
|
|
216
|
+
req.add_header("Content-Type", "application/octet-stream")
|
|
217
|
+
except Exception:
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
def _maybe_inject_headers(req: _ur.Request) -> None:
|
|
221
|
+
if getattr(req, _SF_REQ_ALREADY_INJECTED_ATTR, False):
|
|
222
|
+
return
|
|
223
|
+
headers_dict = dict(req.headers)
|
|
224
|
+
inject_headers_ultrafast(headers_dict, req.full_url, exclude)
|
|
225
|
+
for k, v in headers_dict.items():
|
|
226
|
+
if not _has_header_case_insensitive(req, k):
|
|
227
|
+
req.add_header(k, v)
|
|
228
|
+
setattr(req, _SF_REQ_ALREADY_INJECTED_ATTR, True)
|
|
229
|
+
|
|
230
|
+
def _proxyless_open(req: _ur.Request, timeout):
|
|
231
|
+
"""Open using a proxy-less opener, calling the ORIGINAL .open to avoid re-entry."""
|
|
232
|
+
if trust_env:
|
|
233
|
+
# Honor env proxies/config via the original urlopen
|
|
234
|
+
return _orig_urlopen(req, timeout=timeout)
|
|
235
|
+
opener = _ur.build_opener(_ur.ProxyHandler({}))
|
|
236
|
+
return _orig_opener_open(opener, req, timeout=timeout)
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------ #
|
|
239
|
+
# Core helper used by both urlopen and OpenerDirector.open
|
|
240
|
+
# ------------------------------------------------------------------ #
|
|
241
|
+
def _inject_and_record(
|
|
242
|
+
opener_call, # callable(req, timeout=?)
|
|
243
|
+
req_or_url,
|
|
244
|
+
data,
|
|
245
|
+
timeout,
|
|
246
|
+
):
|
|
247
|
+
# 1) Normalize to a Request object (no duplicate 'data' passing later)
|
|
248
|
+
if isinstance(req_or_url, _ur.Request):
|
|
249
|
+
req = req_or_url
|
|
250
|
+
else:
|
|
251
|
+
req = _ur.Request(req_or_url, data=data)
|
|
252
|
+
|
|
253
|
+
# 2) Header injection + body semantics (single pass)
|
|
254
|
+
_maybe_inject_headers(req)
|
|
255
|
+
_ensure_content_length_semantics(req)
|
|
256
|
+
method = req.get_method()
|
|
257
|
+
|
|
258
|
+
# 3) Trace id for capture (skip when LD_PRELOAD active)
|
|
259
|
+
if not preload_active:
|
|
260
|
+
trace_id = trace_id_ctx.get(None) or ""
|
|
261
|
+
else:
|
|
262
|
+
trace_id = ""
|
|
263
|
+
|
|
264
|
+
# 4) Serialize request headers/data for capture
|
|
265
|
+
req_data = b""
|
|
266
|
+
req_headers = b""
|
|
267
|
+
try:
|
|
268
|
+
if getattr(req, "data", None):
|
|
269
|
+
if isinstance(req.data, bytes):
|
|
270
|
+
req_data = req.data
|
|
271
|
+
elif isinstance(req.data, str):
|
|
272
|
+
req_data = req.data.encode("utf-8")
|
|
273
|
+
if HAS_ORJSON:
|
|
274
|
+
req_headers = orjson.dumps({str(k): str(v) for k, v in req.headers.items()})
|
|
275
|
+
else:
|
|
276
|
+
req_headers = json.dumps({str(k): str(v) for k, v in req.headers.items()}).encode("utf-8")
|
|
277
|
+
except Exception:
|
|
278
|
+
pass
|
|
279
|
+
|
|
280
|
+
# 5) Perform I/O
|
|
281
|
+
t0 = int(time.time() * 1_000)
|
|
282
|
+
try:
|
|
283
|
+
resp = opener_call(req, timeout=timeout)
|
|
284
|
+
status = (
|
|
285
|
+
getattr(resp, "status", None) or getattr(resp, "getcode", lambda: 0)()
|
|
286
|
+
)
|
|
287
|
+
success = status < 400
|
|
288
|
+
|
|
289
|
+
if _SF_URLLIB_DEBUG:
|
|
290
|
+
try:
|
|
291
|
+
print(
|
|
292
|
+
f"[SF urllib] {method} {req.full_url} -> {status}", flush=True
|
|
293
|
+
)
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
if HAS_ORJSON:
|
|
298
|
+
resp_headers = orjson.dumps({str(k): str(v) for k, v in resp.headers.items()})
|
|
299
|
+
else:
|
|
300
|
+
resp_headers = json.dumps({str(k): str(v) for k, v in resp.headers.items()}).encode("utf-8")
|
|
301
|
+
|
|
302
|
+
if preload_active:
|
|
303
|
+
return resp
|
|
304
|
+
|
|
305
|
+
def _on_complete(resp_bytes: bytes, _truncated: bool):
|
|
306
|
+
record_network_request(
|
|
307
|
+
trace_id,
|
|
308
|
+
req.full_url,
|
|
309
|
+
method,
|
|
310
|
+
status,
|
|
311
|
+
success,
|
|
312
|
+
None,
|
|
313
|
+
timestamp_start=t0,
|
|
314
|
+
timestamp_end=int(time.time() * 1_000),
|
|
315
|
+
request_data=req_data,
|
|
316
|
+
response_data=resp_bytes,
|
|
317
|
+
request_headers=req_headers,
|
|
318
|
+
response_headers=resp_headers,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
cap = int(os.getenv("SF_URLOPEN_CAPTURE_CAP_BYTES", "262144"))
|
|
322
|
+
return _ResponseTee(resp, _on_complete, cap_bytes=cap)
|
|
323
|
+
|
|
324
|
+
except urllib.error.HTTPError as e:
|
|
325
|
+
# 4xx/5xx → exception; capture and re-raise
|
|
326
|
+
if _SF_URLLIB_DEBUG:
|
|
327
|
+
try:
|
|
328
|
+
print(
|
|
329
|
+
f"[SF urllib] {req.get_method()} {req.full_url} -> {getattr(e, 'code', 0)} (HTTPError)",
|
|
330
|
+
flush=True,
|
|
331
|
+
)
|
|
332
|
+
except Exception:
|
|
333
|
+
pass
|
|
334
|
+
|
|
335
|
+
if HAS_ORJSON:
|
|
336
|
+
resp_headers = orjson.dumps({str(k): str(v) for k, v in e.headers.items()})
|
|
337
|
+
else:
|
|
338
|
+
resp_headers = json.dumps({str(k): str(v) for k, v in e.headers.items()}).encode("utf-8")
|
|
339
|
+
|
|
340
|
+
if not preload_active:
|
|
341
|
+
body = b""
|
|
342
|
+
try:
|
|
343
|
+
cap = int(os.getenv("SF_URLOPEN_CAPTURE_CAP_BYTES", "262144"))
|
|
344
|
+
body = e.read()
|
|
345
|
+
if len(body) > cap:
|
|
346
|
+
body = body[:cap]
|
|
347
|
+
# Put body back so downstream can still read it
|
|
348
|
+
e.fp = io.BytesIO(body)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
record_network_request(
|
|
353
|
+
trace_id,
|
|
354
|
+
req.full_url,
|
|
355
|
+
req.get_method(),
|
|
356
|
+
getattr(e, "code", 0) or 0,
|
|
357
|
+
False,
|
|
358
|
+
str(e),
|
|
359
|
+
timestamp_start=t0,
|
|
360
|
+
timestamp_end=int(time.time() * 1_000),
|
|
361
|
+
request_data=req_data,
|
|
362
|
+
response_data=body,
|
|
363
|
+
request_headers=req_headers,
|
|
364
|
+
response_headers=resp_headers,
|
|
365
|
+
)
|
|
366
|
+
raise
|
|
367
|
+
|
|
368
|
+
except Exception as e:
|
|
369
|
+
if _SF_URLLIB_DEBUG:
|
|
370
|
+
try:
|
|
371
|
+
print(
|
|
372
|
+
f"[SF urllib] {req.get_method()} {req.full_url} -> exception: {e}",
|
|
373
|
+
flush=True,
|
|
374
|
+
)
|
|
375
|
+
except Exception:
|
|
376
|
+
pass
|
|
377
|
+
if not preload_active:
|
|
378
|
+
record_network_request(
|
|
379
|
+
trace_id,
|
|
380
|
+
req.full_url,
|
|
381
|
+
req.get_method(),
|
|
382
|
+
0,
|
|
383
|
+
False,
|
|
384
|
+
str(e)[:255],
|
|
385
|
+
timestamp_start=t0,
|
|
386
|
+
timestamp_end=int(time.time() * 1_000),
|
|
387
|
+
request_data=req_data,
|
|
388
|
+
request_headers=req_headers,
|
|
389
|
+
)
|
|
390
|
+
raise
|
|
391
|
+
|
|
392
|
+
# ------------------------------------------------------------------ #
|
|
393
|
+
# Module-level urlopen patch
|
|
394
|
+
# ------------------------------------------------------------------ #
|
|
395
|
+
if HAS_WRAPT:
|
|
396
|
+
|
|
397
|
+
def instrumented_urlopen(wrapped, instance, args, kwargs):
|
|
398
|
+
# urlopen(url, data=None, timeout=..., *, cafile=..., capath=..., cadefault=..., context=...)
|
|
399
|
+
url = args[0] if len(args) > 0 else kwargs.pop("url", "")
|
|
400
|
+
data = args[1] if len(args) > 1 else kwargs.pop("data", None)
|
|
401
|
+
|
|
402
|
+
if len(args) > 2:
|
|
403
|
+
timeout = args[2]
|
|
404
|
+
else:
|
|
405
|
+
timeout = kwargs.pop("timeout", _ur.socket._GLOBAL_DEFAULT_TIMEOUT) # type: ignore
|
|
406
|
+
|
|
407
|
+
# We pass a callable that avoids proxies unless trust_env is set
|
|
408
|
+
return _inject_and_record(_proxyless_open, url, data, timeout)
|
|
409
|
+
|
|
410
|
+
wrapt.wrap_function_wrapper("urllib.request", "urlopen", instrumented_urlopen)
|
|
411
|
+
else:
|
|
412
|
+
|
|
413
|
+
def patched_urlopen(url, data=None, timeout=None, *a, **kw): # type: ignore
|
|
414
|
+
if "timeout" in kw:
|
|
415
|
+
timeout = kw.pop("timeout")
|
|
416
|
+
if "data" in kw:
|
|
417
|
+
data = kw.pop("data")
|
|
418
|
+
return _inject_and_record(_proxyless_open, url, data, timeout)
|
|
419
|
+
|
|
420
|
+
_ur.urlopen = patched_urlopen # type: ignore[assignment]
|
|
421
|
+
|
|
422
|
+
# ------------------------------------------------------------------ #
|
|
423
|
+
# OpenerDirector.open patch (covers build_opener, install_opener, etc.)
|
|
424
|
+
# ------------------------------------------------------------------ #
|
|
425
|
+
if HAS_WRAPT:
|
|
426
|
+
|
|
427
|
+
def instrumented_opener_open(wrapped, instance, args, kwargs):
|
|
428
|
+
# Signature: open(self, fullurl, data=None, timeout=None)
|
|
429
|
+
fullurl = args[0] if len(args) > 0 else kwargs.pop("fullurl", "")
|
|
430
|
+
data = args[1] if len(args) > 1 else kwargs.pop("data", None)
|
|
431
|
+
timeout = args[2] if len(args) > 2 else kwargs.pop("timeout", None)
|
|
432
|
+
|
|
433
|
+
# If caller passed a Request that we already injected, short-circuit:
|
|
434
|
+
if isinstance(fullurl, _ur.Request) and getattr(
|
|
435
|
+
fullurl, _SF_REQ_ALREADY_INJECTED_ATTR, False
|
|
436
|
+
):
|
|
437
|
+
if trust_env:
|
|
438
|
+
# Delegate to wrapped opener respecting env/proxies
|
|
439
|
+
return wrapped(fullurl, timeout=timeout)
|
|
440
|
+
# Use proxy-less opener BUT call the ORIGINAL .open to avoid re-entry
|
|
441
|
+
opener = _ur.build_opener(_ur.ProxyHandler({}))
|
|
442
|
+
return _orig_opener_open(opener, fullurl, timeout=timeout)
|
|
443
|
+
|
|
444
|
+
# Otherwise, flow through our injector and open without proxies (by default)
|
|
445
|
+
return _inject_and_record(_proxyless_open, fullurl, data, timeout)
|
|
446
|
+
|
|
447
|
+
wrapt.wrap_function_wrapper(
|
|
448
|
+
_ur.OpenerDirector, "open", instrumented_opener_open
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
|
|
452
|
+
def patched_opener_open(self, fullurl, data=None, timeout=None, *a, **kw): # type: ignore[override]
|
|
453
|
+
if "timeout" in kw:
|
|
454
|
+
timeout = kw.pop("timeout")
|
|
455
|
+
if "data" in kw:
|
|
456
|
+
data = kw.pop("data")
|
|
457
|
+
|
|
458
|
+
if isinstance(fullurl, _ur.Request) and getattr(
|
|
459
|
+
fullurl, _SF_REQ_ALREADY_INJECTED_ATTR, False
|
|
460
|
+
):
|
|
461
|
+
if trust_env:
|
|
462
|
+
return _orig_opener_open(self, fullurl, timeout=timeout)
|
|
463
|
+
opener = _ur.build_opener(_ur.ProxyHandler({}))
|
|
464
|
+
return _orig_opener_open(opener, fullurl, timeout=timeout)
|
|
465
|
+
|
|
466
|
+
return _inject_and_record(_proxyless_open, fullurl, data, timeout)
|
|
467
|
+
|
|
468
|
+
_ur.OpenerDirector.open = patched_opener_open # type: ignore[assignment]
|