sf-veritas 0.9.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sf-veritas might be problematic. Click here for more details.
- sf_veritas/.gitignore +2 -0
- sf_veritas/__init__.py +4 -0
- sf_veritas/app_config.py +49 -0
- sf_veritas/cli.py +336 -0
- sf_veritas/constants.py +3 -0
- sf_veritas/custom_excepthook.py +285 -0
- sf_veritas/custom_log_handler.py +53 -0
- sf_veritas/custom_output_wrapper.py +107 -0
- sf_veritas/custom_print.py +34 -0
- sf_veritas/django_app.py +5 -0
- sf_veritas/env_vars.py +83 -0
- sf_veritas/exception_handling_middleware.py +18 -0
- sf_veritas/exception_metaclass.py +69 -0
- sf_veritas/frame_tools.py +112 -0
- sf_veritas/import_hook.py +62 -0
- sf_veritas/infra_details/__init__.py +3 -0
- sf_veritas/infra_details/get_infra_details.py +24 -0
- sf_veritas/infra_details/kubernetes/__init__.py +3 -0
- sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
- sf_veritas/infra_details/kubernetes/get_details.py +7 -0
- sf_veritas/infra_details/running_on/__init__.py +17 -0
- sf_veritas/infra_details/running_on/kubernetes.py +11 -0
- sf_veritas/interceptors.py +252 -0
- sf_veritas/local_env_detect.py +118 -0
- sf_veritas/package_metadata.py +6 -0
- sf_veritas/patches/__init__.py +0 -0
- sf_veritas/patches/concurrent_futures.py +19 -0
- sf_veritas/patches/constants.py +1 -0
- sf_veritas/patches/exceptions.py +82 -0
- sf_veritas/patches/multiprocessing.py +32 -0
- sf_veritas/patches/network_libraries/__init__.py +51 -0
- sf_veritas/patches/network_libraries/aiohttp.py +100 -0
- sf_veritas/patches/network_libraries/curl_cffi.py +93 -0
- sf_veritas/patches/network_libraries/http_client.py +64 -0
- sf_veritas/patches/network_libraries/httpcore.py +152 -0
- sf_veritas/patches/network_libraries/httplib2.py +76 -0
- sf_veritas/patches/network_libraries/httpx.py +123 -0
- sf_veritas/patches/network_libraries/niquests.py +192 -0
- sf_veritas/patches/network_libraries/pycurl.py +71 -0
- sf_veritas/patches/network_libraries/requests.py +187 -0
- sf_veritas/patches/network_libraries/tornado.py +139 -0
- sf_veritas/patches/network_libraries/treq.py +122 -0
- sf_veritas/patches/network_libraries/urllib_request.py +129 -0
- sf_veritas/patches/network_libraries/utils.py +101 -0
- sf_veritas/patches/os.py +17 -0
- sf_veritas/patches/threading.py +32 -0
- sf_veritas/patches/web_frameworks/__init__.py +45 -0
- sf_veritas/patches/web_frameworks/aiohttp.py +133 -0
- sf_veritas/patches/web_frameworks/async_websocket_consumer.py +132 -0
- sf_veritas/patches/web_frameworks/blacksheep.py +107 -0
- sf_veritas/patches/web_frameworks/bottle.py +142 -0
- sf_veritas/patches/web_frameworks/cherrypy.py +246 -0
- sf_veritas/patches/web_frameworks/django.py +307 -0
- sf_veritas/patches/web_frameworks/eve.py +138 -0
- sf_veritas/patches/web_frameworks/falcon.py +229 -0
- sf_veritas/patches/web_frameworks/fastapi.py +145 -0
- sf_veritas/patches/web_frameworks/flask.py +186 -0
- sf_veritas/patches/web_frameworks/klein.py +40 -0
- sf_veritas/patches/web_frameworks/litestar.py +217 -0
- sf_veritas/patches/web_frameworks/pyramid.py +89 -0
- sf_veritas/patches/web_frameworks/quart.py +155 -0
- sf_veritas/patches/web_frameworks/robyn.py +114 -0
- sf_veritas/patches/web_frameworks/sanic.py +120 -0
- sf_veritas/patches/web_frameworks/starlette.py +144 -0
- sf_veritas/patches/web_frameworks/strawberry.py +269 -0
- sf_veritas/patches/web_frameworks/tornado.py +129 -0
- sf_veritas/patches/web_frameworks/utils.py +55 -0
- sf_veritas/print_override.py +13 -0
- sf_veritas/regular_data_transmitter.py +358 -0
- sf_veritas/request_interceptor.py +399 -0
- sf_veritas/request_utils.py +104 -0
- sf_veritas/server_status.py +1 -0
- sf_veritas/shutdown_flag.py +11 -0
- sf_veritas/subprocess_startup.py +3 -0
- sf_veritas/test_cli.py +145 -0
- sf_veritas/thread_local.py +436 -0
- sf_veritas/timeutil.py +114 -0
- sf_veritas/transmit_exception_to_sailfish.py +28 -0
- sf_veritas/transmitter.py +58 -0
- sf_veritas/types.py +44 -0
- sf_veritas/unified_interceptor.py +323 -0
- sf_veritas/utils.py +39 -0
- sf_veritas-0.9.7.dist-info/METADATA +83 -0
- sf_veritas-0.9.7.dist-info/RECORD +86 -0
- sf_veritas-0.9.7.dist-info/WHEEL +4 -0
- sf_veritas-0.9.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
|
|
3
|
+
from ..thread_local import get_context, set_context
|
|
4
|
+
|
|
5
|
+
_original_process_init = multiprocessing.Process.__init__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patched_process_init(self, *args, **kwargs):
|
|
9
|
+
current_context = get_context()
|
|
10
|
+
|
|
11
|
+
original_target = kwargs.get("target")
|
|
12
|
+
if original_target:
|
|
13
|
+
|
|
14
|
+
def wrapped_target(*targs, **tkwargs):
|
|
15
|
+
set_context(current_context)
|
|
16
|
+
original_target(*targs, **tkwargs)
|
|
17
|
+
|
|
18
|
+
kwargs["target"] = wrapped_target
|
|
19
|
+
elif args and callable(args[0]):
|
|
20
|
+
original_target = args[0]
|
|
21
|
+
|
|
22
|
+
def wrapped_target(*targs, **tkwargs):
|
|
23
|
+
set_context(current_context)
|
|
24
|
+
original_target(*targs, **tkwargs)
|
|
25
|
+
|
|
26
|
+
args = (wrapped_target,) + args[1:]
|
|
27
|
+
|
|
28
|
+
_original_process_init(self, *args, **kwargs)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def patch_multiprocessing():
|
|
32
|
+
multiprocessing.Process.__init__ = patched_process_init
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from .aiohttp import patch_aiohttp
|
|
4
|
+
from .curl_cffi import patch_curl_cffi
|
|
5
|
+
from .http_client import patch_http_client
|
|
6
|
+
from .httpcore import patch_httpcore
|
|
7
|
+
from .httplib2 import patch_httplib2
|
|
8
|
+
from .httpx import patch_httpx
|
|
9
|
+
from .niquests import patch_niquests
|
|
10
|
+
from .pycurl import patch_pycurl
|
|
11
|
+
from .requests import patch_requests
|
|
12
|
+
from .tornado import patch_tornado
|
|
13
|
+
from .treq import patch_treq
|
|
14
|
+
from .urllib_request import patch_urllib_request
|
|
15
|
+
|
|
16
|
+
# from .aioh2 import patch_aioh2 # Asynchronous HTTP/2 client, no clear extension hooks
|
|
17
|
+
# from .http_prompt import patch_http_prompt # CLI HTTP client, minimal public API
|
|
18
|
+
# from .mureq import patch_mureq # Specialized crawler client, little documentation
|
|
19
|
+
# from .reqboost import patch_reqboost # High-performance batch client, docs scarce
|
|
20
|
+
# from .impit import (patch_impit) # Used by Crawlee's ImpitHttpClient
|
|
21
|
+
# from .h11 import patch_h11 # Low-level HTTP/1.1 protocol library
|
|
22
|
+
# from .aioquic import patch_aioquic # QUIC/HTTP-3 client, no standard headers API
|
|
23
|
+
# from .qh3 import patch_qh3 # Experimental HTTP/3 client, no docs found
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def patch_all_http_clients(
|
|
27
|
+
domains_to_not_propagate_headers_to: Optional[List[str]] = None,
|
|
28
|
+
):
|
|
29
|
+
# fully implemented patches
|
|
30
|
+
patch_requests(domains_to_not_propagate_headers_to)
|
|
31
|
+
patch_urllib_request(domains_to_not_propagate_headers_to)
|
|
32
|
+
patch_http_client(domains_to_not_propagate_headers_to)
|
|
33
|
+
patch_httplib2(domains_to_not_propagate_headers_to)
|
|
34
|
+
patch_pycurl(domains_to_not_propagate_headers_to)
|
|
35
|
+
patch_treq(domains_to_not_propagate_headers_to)
|
|
36
|
+
patch_httpx(domains_to_not_propagate_headers_to)
|
|
37
|
+
patch_aiohttp(domains_to_not_propagate_headers_to)
|
|
38
|
+
patch_tornado(domains_to_not_propagate_headers_to)
|
|
39
|
+
patch_curl_cffi(domains_to_not_propagate_headers_to)
|
|
40
|
+
patch_httpcore(domains_to_not_propagate_headers_to)
|
|
41
|
+
patch_niquests(domains_to_not_propagate_headers_to)
|
|
42
|
+
|
|
43
|
+
# # Lesser-used libraries
|
|
44
|
+
# patch_impit(domains_to_not_propagate_headers_to)
|
|
45
|
+
# patch_aioh2(domains_to_not_propagate_headers_to)
|
|
46
|
+
# patch_http_prompt(domains_to_not_propagate_headers_to)
|
|
47
|
+
# patch_mureq(domains_to_not_propagate_headers_to)
|
|
48
|
+
# patch_reqboost(domains_to_not_propagate_headers_to)
|
|
49
|
+
# patch_h11(domains_to_not_propagate_headers_to)
|
|
50
|
+
# patch_aioquic(domains_to_not_propagate_headers_to)
|
|
51
|
+
# patch_qh3(domains_to_not_propagate_headers_to)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
|
+
|
|
4
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
5
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patch_aiohttp(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
9
|
+
"""
|
|
10
|
+
Monkey-patch aiohttp so that every HTTP verb:
|
|
11
|
+
1) injects SAILFISH_TRACING_HEADER when allowed,
|
|
12
|
+
2) measures timing,
|
|
13
|
+
3) calls NetworkRequestTransmitter().do_send via record_network_request.
|
|
14
|
+
"""
|
|
15
|
+
try:
|
|
16
|
+
import aiohttp
|
|
17
|
+
except:
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
skip = domains_to_not_propagate_headers_to or []
|
|
21
|
+
|
|
22
|
+
# 1) Patch the core ClientSession._request coroutine
|
|
23
|
+
orig_request = aiohttp.ClientSession._request
|
|
24
|
+
|
|
25
|
+
async def patched_request(self, verb_name: str, url: Any, **kwargs):
|
|
26
|
+
trace_id, allow = get_trace_and_should_propagate(str(url), skip)
|
|
27
|
+
headers = kwargs.get("headers", {}) or {}
|
|
28
|
+
if allow:
|
|
29
|
+
headers[SAILFISH_TRACING_HEADER] = trace_id
|
|
30
|
+
kwargs["headers"] = headers
|
|
31
|
+
|
|
32
|
+
# 2) Perform & time the request
|
|
33
|
+
start = int(time.time() * 1_000)
|
|
34
|
+
response = await orig_request(self, verb_name, url, **kwargs)
|
|
35
|
+
end = int(time.time() * 1_000)
|
|
36
|
+
|
|
37
|
+
# 3) Record outcome
|
|
38
|
+
status = getattr(response, "status", 0)
|
|
39
|
+
ok = status < 400
|
|
40
|
+
error = None
|
|
41
|
+
if not ok:
|
|
42
|
+
try:
|
|
43
|
+
text = await response.text()
|
|
44
|
+
error = text[:255]
|
|
45
|
+
except Exception:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
record_network_request(
|
|
49
|
+
trace_id,
|
|
50
|
+
str(url),
|
|
51
|
+
verb_name.upper(),
|
|
52
|
+
status,
|
|
53
|
+
ok,
|
|
54
|
+
error,
|
|
55
|
+
timestamp_start=start,
|
|
56
|
+
timestamp_end=end,
|
|
57
|
+
)
|
|
58
|
+
return response
|
|
59
|
+
|
|
60
|
+
aiohttp.ClientSession._request = patched_request
|
|
61
|
+
|
|
62
|
+
# 2) Also patch the module-level aiohttp.request coroutine
|
|
63
|
+
orig_module_request = getattr(aiohttp, "request", None)
|
|
64
|
+
if orig_module_request:
|
|
65
|
+
|
|
66
|
+
async def patched_module_request(verb_name: str, url: str, **kwargs):
|
|
67
|
+
trace_id, allow = get_trace_and_should_propagate(str(url), skip)
|
|
68
|
+
headers = kwargs.get("headers", {}) or {}
|
|
69
|
+
if allow:
|
|
70
|
+
headers[SAILFISH_TRACING_HEADER] = trace_id
|
|
71
|
+
kwargs["headers"] = headers
|
|
72
|
+
|
|
73
|
+
start = int(time.time() * 1_000)
|
|
74
|
+
response = await orig_module_request(verb_name, url, **kwargs)
|
|
75
|
+
end = int(time.time() * 1_000)
|
|
76
|
+
|
|
77
|
+
status = getattr(response, "status", getattr(response, "status_code", 0))
|
|
78
|
+
ok = status < 400
|
|
79
|
+
error = None
|
|
80
|
+
if not ok:
|
|
81
|
+
try:
|
|
82
|
+
body = await response.text()
|
|
83
|
+
error = body[:255]
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
record_network_request(
|
|
88
|
+
trace_id,
|
|
89
|
+
str(url),
|
|
90
|
+
verb_name.upper(),
|
|
91
|
+
status,
|
|
92
|
+
ok,
|
|
93
|
+
error,
|
|
94
|
+
timestamp_start=start,
|
|
95
|
+
timestamp_end=end,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return response
|
|
99
|
+
|
|
100
|
+
aiohttp.request = patched_module_request
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
5
|
+
from ..constants import supported_network_verbs as verbs
|
|
6
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def patch_curl_cffi(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
10
|
+
"""
|
|
11
|
+
Monkey-patch curl_cffi.requests so that EVERY HTTP verb
|
|
12
|
+
injects SAILFISH_TRACING_HEADER (when allowed) and then records the request.
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
import curl_cffi.requests as ccr
|
|
16
|
+
except ImportError:
|
|
17
|
+
return
|
|
18
|
+
|
|
19
|
+
skip = domains_to_not_propagate_headers_to or []
|
|
20
|
+
|
|
21
|
+
def make_wrapper(orig_fn, verb_name):
|
|
22
|
+
def wrapper(*args, **kwargs):
|
|
23
|
+
# 1) Determine HTTP method and URL safely
|
|
24
|
+
if verb_name == "request":
|
|
25
|
+
# support both request(url) and request(method, url, …)
|
|
26
|
+
if len(args) == 1 and isinstance(args[0], str):
|
|
27
|
+
method, url = "GET", args[0]
|
|
28
|
+
elif len(args) >= 2 and isinstance(args[0], str):
|
|
29
|
+
method, url = args[0].upper(), args[1]
|
|
30
|
+
elif len(args) >= 3:
|
|
31
|
+
# bound Session.request(self, method, url, …)
|
|
32
|
+
method, url = args[1].upper(), args[2]
|
|
33
|
+
else:
|
|
34
|
+
method = kwargs.get("method", "").upper()
|
|
35
|
+
url = kwargs.get("url", "")
|
|
36
|
+
else:
|
|
37
|
+
method = verb_name.upper()
|
|
38
|
+
# for module-level: args[0] == url
|
|
39
|
+
# for bound: args[1] == url
|
|
40
|
+
if len(args) >= 1 and isinstance(args[0], str):
|
|
41
|
+
url = args[0]
|
|
42
|
+
elif len(args) >= 2:
|
|
43
|
+
url = args[1]
|
|
44
|
+
else:
|
|
45
|
+
url = kwargs.get("url", "")
|
|
46
|
+
|
|
47
|
+
# 2) Trace-id + skip-list check
|
|
48
|
+
trace_id, allow = get_trace_and_should_propagate(url, skip)
|
|
49
|
+
headers = kwargs.get("headers", {}) or {}
|
|
50
|
+
if allow:
|
|
51
|
+
headers[SAILFISH_TRACING_HEADER] = trace_id
|
|
52
|
+
kwargs["headers"] = headers
|
|
53
|
+
|
|
54
|
+
# 3) Perform the real call
|
|
55
|
+
start = int(time.time() * 1_000)
|
|
56
|
+
resp = orig_fn(*args, **kwargs)
|
|
57
|
+
end = int(time.time() * 1_000)
|
|
58
|
+
|
|
59
|
+
# 4) Record the network request
|
|
60
|
+
status = getattr(resp, "status_code", None) or getattr(resp, "status", 0)
|
|
61
|
+
ok = getattr(resp, "ok", status < 400)
|
|
62
|
+
error = None if ok else getattr(resp, "text", str(resp))[:255]
|
|
63
|
+
|
|
64
|
+
record_network_request(
|
|
65
|
+
trace_id,
|
|
66
|
+
url,
|
|
67
|
+
method,
|
|
68
|
+
status,
|
|
69
|
+
ok,
|
|
70
|
+
error,
|
|
71
|
+
timestamp_start=start,
|
|
72
|
+
timestamp_end=end,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return resp
|
|
76
|
+
|
|
77
|
+
return wrapper
|
|
78
|
+
|
|
79
|
+
# Patch module-level verbs
|
|
80
|
+
for verb in verbs:
|
|
81
|
+
orig = getattr(ccr, verb, None)
|
|
82
|
+
if orig:
|
|
83
|
+
setattr(ccr, verb, make_wrapper(orig, verb))
|
|
84
|
+
|
|
85
|
+
# Patch Session & AsyncSession methods
|
|
86
|
+
for cls_name in ("Session", "AsyncSession"):
|
|
87
|
+
cls = getattr(ccr, cls_name, None)
|
|
88
|
+
if not cls:
|
|
89
|
+
continue
|
|
90
|
+
for verb in verbs:
|
|
91
|
+
orig = getattr(cls, verb, None)
|
|
92
|
+
if orig:
|
|
93
|
+
setattr(cls, verb, make_wrapper(orig, verb))
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
5
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patch_http_client(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
9
|
+
# ensure we always have a list
|
|
10
|
+
if domains_to_not_propagate_headers_to is None:
|
|
11
|
+
domains_to_not_propagate_headers_to = []
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import http.client as _hc
|
|
15
|
+
except ImportError:
|
|
16
|
+
return
|
|
17
|
+
|
|
18
|
+
original_request = _hc.HTTPConnection.request
|
|
19
|
+
|
|
20
|
+
def patched_request(
|
|
21
|
+
self, method, url, body=None, headers=None, *, encode_chunked=False
|
|
22
|
+
):
|
|
23
|
+
# timestamp for recording
|
|
24
|
+
start_ts = int(time.time() * 1_000)
|
|
25
|
+
|
|
26
|
+
# get the trace_id and check if we should propagate
|
|
27
|
+
trace_id, allow = get_trace_and_should_propagate(
|
|
28
|
+
url, domains_to_not_propagate_headers_to
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# copy headers and inject only if allowed
|
|
32
|
+
headers = headers.copy() if headers else {}
|
|
33
|
+
if allow:
|
|
34
|
+
headers[SAILFISH_TRACING_HEADER] = trace_id
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# perform the real request
|
|
38
|
+
result = original_request(
|
|
39
|
+
self,
|
|
40
|
+
method,
|
|
41
|
+
url,
|
|
42
|
+
body=body,
|
|
43
|
+
headers=headers,
|
|
44
|
+
encode_chunked=encode_chunked,
|
|
45
|
+
)
|
|
46
|
+
# fire off our network-record GraphQL mutation
|
|
47
|
+
record_network_request(
|
|
48
|
+
trace_id, url, method, 0, True, timestamp_start=start_ts
|
|
49
|
+
)
|
|
50
|
+
return result
|
|
51
|
+
except Exception as e:
|
|
52
|
+
# record failures too
|
|
53
|
+
record_network_request(
|
|
54
|
+
trace_id,
|
|
55
|
+
url,
|
|
56
|
+
method,
|
|
57
|
+
0,
|
|
58
|
+
False,
|
|
59
|
+
error=str(e),
|
|
60
|
+
timestamp_start=start_ts,
|
|
61
|
+
)
|
|
62
|
+
raise
|
|
63
|
+
|
|
64
|
+
_hc.HTTPConnection.request = patched_request
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
5
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patch_httpcore(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
9
|
+
"""
|
|
10
|
+
Monkey-patch httpcore.ConnectionPool and AsyncConnectionPool
|
|
11
|
+
to inject SAILFISH_TRACING_HEADER (unless excluded)
|
|
12
|
+
and to record every outbound request.
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
import httpcore
|
|
16
|
+
except ImportError:
|
|
17
|
+
return # HTTP Core not present—skip patch
|
|
18
|
+
|
|
19
|
+
# Keep original methods
|
|
20
|
+
orig_sync_req = httpcore.ConnectionPool.request
|
|
21
|
+
orig_sync_stream = httpcore.ConnectionPool.stream
|
|
22
|
+
orig_async_req = httpcore.AsyncConnectionPool.request
|
|
23
|
+
orig_async_stream = httpcore.AsyncConnectionPool.stream
|
|
24
|
+
|
|
25
|
+
# Normalize exclude list
|
|
26
|
+
exclude = domains_to_not_propagate_headers_to or []
|
|
27
|
+
|
|
28
|
+
def _prepare_headers(url, existing_headers):
|
|
29
|
+
"""
|
|
30
|
+
Returns (new_headers, trace_id).
|
|
31
|
+
Only injects if domain not in `exclude`.
|
|
32
|
+
"""
|
|
33
|
+
trace_id, allow = get_trace_and_should_propagate(url, exclude)
|
|
34
|
+
if not allow:
|
|
35
|
+
return list(existing_headers or []), trace_id
|
|
36
|
+
hdrs = list(existing_headers or [])
|
|
37
|
+
hdrs.append((SAILFISH_TRACING_HEADER.encode(), trace_id.encode()))
|
|
38
|
+
return hdrs, trace_id
|
|
39
|
+
|
|
40
|
+
# 1. Sync .request(...)
|
|
41
|
+
def _patched_sync_request(self, method, url, **kwargs):
|
|
42
|
+
ts0 = int(time.time() * 1_000)
|
|
43
|
+
# prepare headers & trace
|
|
44
|
+
headers, trace_id = _prepare_headers(url, kwargs.get("headers"))
|
|
45
|
+
kwargs["headers"] = headers
|
|
46
|
+
|
|
47
|
+
error = None
|
|
48
|
+
try:
|
|
49
|
+
resp = orig_sync_req(self, method, url, **kwargs)
|
|
50
|
+
success = True
|
|
51
|
+
status = getattr(resp, "status_code", 0)
|
|
52
|
+
return resp
|
|
53
|
+
except Exception as e:
|
|
54
|
+
success = False
|
|
55
|
+
status = 0
|
|
56
|
+
error = str(e)[:255]
|
|
57
|
+
raise
|
|
58
|
+
finally:
|
|
59
|
+
ts1 = int(time.time() * 1_000)
|
|
60
|
+
record_network_request(
|
|
61
|
+
trace_id, url, method, status, success, error, ts0, ts1
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# 2. Sync .stream(...)
|
|
65
|
+
def _patched_sync_stream(self, method, url, **kwargs):
|
|
66
|
+
ts0 = int(time.time() * 1_000)
|
|
67
|
+
headers, trace_id = _prepare_headers(url, kwargs.get("headers"))
|
|
68
|
+
kwargs["headers"] = headers
|
|
69
|
+
|
|
70
|
+
error = None
|
|
71
|
+
try:
|
|
72
|
+
stream = orig_sync_stream(self, method, url, **kwargs)
|
|
73
|
+
success = True
|
|
74
|
+
# stream itself yields the body; status often on returned object
|
|
75
|
+
status = 0
|
|
76
|
+
return stream
|
|
77
|
+
except Exception as e:
|
|
78
|
+
success = False
|
|
79
|
+
status = 0
|
|
80
|
+
error = str(e)[:255]
|
|
81
|
+
raise
|
|
82
|
+
finally:
|
|
83
|
+
ts1 = int(time.time() * 1_000)
|
|
84
|
+
record_network_request(
|
|
85
|
+
trace_id, url, method, status, success, error, ts0, ts1
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# 3. Async .request(...)
|
|
89
|
+
async def _patched_async_request(self, method, url, **kwargs):
|
|
90
|
+
ts0 = int(time.time() * 1_000)
|
|
91
|
+
headers, trace_id = _prepare_headers(url, kwargs.get("headers"))
|
|
92
|
+
kwargs["headers"] = headers
|
|
93
|
+
|
|
94
|
+
error = None
|
|
95
|
+
try:
|
|
96
|
+
resp = await orig_async_req(self, method, url, **kwargs)
|
|
97
|
+
success = True
|
|
98
|
+
status = getattr(resp, "status_code", 0)
|
|
99
|
+
return resp
|
|
100
|
+
except Exception as e:
|
|
101
|
+
success = False
|
|
102
|
+
status = 0
|
|
103
|
+
error = str(e)[:255]
|
|
104
|
+
raise
|
|
105
|
+
finally:
|
|
106
|
+
ts1 = int(time.time() * 1_000)
|
|
107
|
+
record_network_request(
|
|
108
|
+
trace_id, url, method, status, success, error, ts0, ts1
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# 4. Async .stream(...)
|
|
112
|
+
def _patched_async_stream(self, method, url, **kwargs):
|
|
113
|
+
ts0 = int(time.time() * 1_000)
|
|
114
|
+
headers, trace_id = _prepare_headers(url, kwargs.get("headers"))
|
|
115
|
+
kwargs["headers"] = headers
|
|
116
|
+
original_cm = orig_async_stream(self, method, url, **kwargs)
|
|
117
|
+
|
|
118
|
+
class _StreamCM:
|
|
119
|
+
def __init__(self, cm):
|
|
120
|
+
self._cm = cm
|
|
121
|
+
self._status = 0
|
|
122
|
+
|
|
123
|
+
async def __aenter__(self):
|
|
124
|
+
response = await self._cm.__aenter__() # now a single Response
|
|
125
|
+
# capture status (httpcore.Response.status or status_code)
|
|
126
|
+
self._status = getattr(
|
|
127
|
+
response, "status_code", getattr(response, "status", 0)
|
|
128
|
+
)
|
|
129
|
+
return response
|
|
130
|
+
|
|
131
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
132
|
+
success = exc_type is None
|
|
133
|
+
ts1 = int(time.time() * 1_000)
|
|
134
|
+
record_network_request(
|
|
135
|
+
trace_id,
|
|
136
|
+
url,
|
|
137
|
+
method,
|
|
138
|
+
self._status,
|
|
139
|
+
success,
|
|
140
|
+
None if success else str(exc)[:255],
|
|
141
|
+
ts0,
|
|
142
|
+
ts1,
|
|
143
|
+
)
|
|
144
|
+
return await self._cm.__aexit__(exc_type, exc, tb)
|
|
145
|
+
|
|
146
|
+
return _StreamCM(original_cm)
|
|
147
|
+
|
|
148
|
+
# Apply patches
|
|
149
|
+
httpcore.ConnectionPool.request = _patched_sync_request
|
|
150
|
+
httpcore.ConnectionPool.stream = _patched_sync_stream
|
|
151
|
+
httpcore.AsyncConnectionPool.request = _patched_async_request
|
|
152
|
+
httpcore.AsyncConnectionPool.stream = _patched_async_stream
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
5
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patch_httplib2(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
9
|
+
"""
|
|
10
|
+
Monkey-patch httplib2.Http.request so that:
|
|
11
|
+
1. We skip header injection for configured domains.
|
|
12
|
+
2. We call NetworkRequestTransmitter().do_send via record_network_request().
|
|
13
|
+
3. All HTTP methods (GET, POST, etc.) continue to work as before.
|
|
14
|
+
"""
|
|
15
|
+
try:
|
|
16
|
+
import httplib2
|
|
17
|
+
except ImportError:
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
# default to an empty blocklist
|
|
21
|
+
if domains_to_not_propagate_headers_to is None:
|
|
22
|
+
domains_to_not_propagate_headers_to = []
|
|
23
|
+
|
|
24
|
+
original_request = httplib2.Http.request
|
|
25
|
+
|
|
26
|
+
def patched_request(self, uri, method="GET", body=None, headers=None, **kwargs):
|
|
27
|
+
start_ts = int(time.time() * 1_000)
|
|
28
|
+
# decide whether to inject header
|
|
29
|
+
trace_id, allow = get_trace_and_should_propagate(
|
|
30
|
+
uri, domains_to_not_propagate_headers_to
|
|
31
|
+
)
|
|
32
|
+
# prepare headers
|
|
33
|
+
headers = headers.copy() if headers else {}
|
|
34
|
+
if allow:
|
|
35
|
+
headers[SAILFISH_TRACING_HEADER] = trace_id
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
# perform the actual HTTP call
|
|
39
|
+
response, content = original_request(
|
|
40
|
+
self, uri, method, body=body, headers=headers, **kwargs
|
|
41
|
+
)
|
|
42
|
+
status_code = getattr(response, "status", None) or getattr(
|
|
43
|
+
response, "status_code", None
|
|
44
|
+
)
|
|
45
|
+
success = isinstance(status_code, int) and 200 <= status_code < 400
|
|
46
|
+
return response, content
|
|
47
|
+
|
|
48
|
+
except Exception as e:
|
|
49
|
+
# record failures
|
|
50
|
+
record_network_request(
|
|
51
|
+
trace_id,
|
|
52
|
+
uri,
|
|
53
|
+
method,
|
|
54
|
+
0,
|
|
55
|
+
False,
|
|
56
|
+
error=str(e)[:255],
|
|
57
|
+
timestamp_start=start_ts,
|
|
58
|
+
timestamp_end=int(time.time() * 1_000),
|
|
59
|
+
)
|
|
60
|
+
raise
|
|
61
|
+
|
|
62
|
+
finally:
|
|
63
|
+
# record successes
|
|
64
|
+
if "status_code" in locals():
|
|
65
|
+
record_network_request(
|
|
66
|
+
trace_id,
|
|
67
|
+
uri,
|
|
68
|
+
method,
|
|
69
|
+
status_code,
|
|
70
|
+
success,
|
|
71
|
+
timestamp_start=start_ts,
|
|
72
|
+
timestamp_end=int(time.time() * 1_000),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# apply our patch
|
|
76
|
+
httplib2.Http.request = patched_request
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from ...constants import SAILFISH_TRACING_HEADER
|
|
4
|
+
from .utils import get_trace_and_should_propagate, record_network_request
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def patch_httpx(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
8
|
+
"""
|
|
9
|
+
Monkey-patch HTTPX to inject SAILFISH_TRACING_HEADER into
|
|
10
|
+
all outbound requests (sync + async + streams), skipping any
|
|
11
|
+
domains in domains_to_not_propagate_headers_to, and recording each.
|
|
12
|
+
Safe to call even if HTTPX is not installed.
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
import httpx
|
|
16
|
+
except ImportError:
|
|
17
|
+
return # No HTTPX installed—nothing to patch
|
|
18
|
+
|
|
19
|
+
domains = domains_to_not_propagate_headers_to or []
|
|
20
|
+
|
|
21
|
+
# Preserve originals
|
|
22
|
+
orig_request = httpx.request
|
|
23
|
+
orig_client_request = httpx.Client.request
|
|
24
|
+
orig_async_request = httpx.AsyncClient.request
|
|
25
|
+
orig_stream = httpx.stream
|
|
26
|
+
orig_client_stream = httpx.Client.stream
|
|
27
|
+
orig_async_client_stream = httpx.AsyncClient.stream
|
|
28
|
+
|
|
29
|
+
# Shared header + record prep
|
|
30
|
+
def _prepare(method: str, url: str, headers: Optional[dict]):
|
|
31
|
+
trace_id, allow = get_trace_and_should_propagate(url, domains)
|
|
32
|
+
hdrs = dict(headers or {})
|
|
33
|
+
if allow:
|
|
34
|
+
hdrs[SAILFISH_TRACING_HEADER] = trace_id
|
|
35
|
+
return trace_id, hdrs
|
|
36
|
+
|
|
37
|
+
# 1) Module-level request
|
|
38
|
+
def _patched_request(method, url, *args, headers=None, **kwargs):
|
|
39
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
40
|
+
resp = orig_request(method, url, *args, headers=hdrs, **kwargs)
|
|
41
|
+
record_network_request(trace_id, str(url), method, resp.status_code, resp.is_success)
|
|
42
|
+
return resp
|
|
43
|
+
|
|
44
|
+
# 2) Sync Client.request
|
|
45
|
+
def _patched_client_request(self, method, url, *args, headers=None, **kwargs):
|
|
46
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
47
|
+
resp = orig_client_request(self, method, url, *args, headers=hdrs, **kwargs)
|
|
48
|
+
record_network_request(trace_id, str(url), method, resp.status_code, resp.is_success)
|
|
49
|
+
return resp
|
|
50
|
+
|
|
51
|
+
# 3) AsyncClient.request
|
|
52
|
+
async def _patched_async_request(self, method, url, *args, headers=None, **kwargs):
|
|
53
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
54
|
+
resp = await orig_async_request(
|
|
55
|
+
self, method, url, *args, headers=hdrs, **kwargs
|
|
56
|
+
)
|
|
57
|
+
record_network_request(trace_id, str(url), method, resp.status_code, resp.is_success)
|
|
58
|
+
return resp
|
|
59
|
+
|
|
60
|
+
# 4a) Module-level streaming
|
|
61
|
+
def _patched_stream(method, url, *args, headers=None, **kwargs):
|
|
62
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
63
|
+
cm = orig_stream(method, url, *args, headers=hdrs, **kwargs)
|
|
64
|
+
|
|
65
|
+
class StreamCM:
|
|
66
|
+
def __enter__(self):
|
|
67
|
+
resp = cm.__enter__()
|
|
68
|
+
resp.read() # ensure .content
|
|
69
|
+
record_network_request(
|
|
70
|
+
trace_id, url, method, resp.status_code, resp.is_success
|
|
71
|
+
)
|
|
72
|
+
return resp
|
|
73
|
+
|
|
74
|
+
def __exit__(self, exc_type, exc, tb):
|
|
75
|
+
return cm.__exit__(exc_type, exc, tb)
|
|
76
|
+
|
|
77
|
+
return StreamCM()
|
|
78
|
+
|
|
79
|
+
# 4b) Sync Client.stream()
|
|
80
|
+
def _patched_client_stream(self, method, url, *args, headers=None, **kwargs):
|
|
81
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
82
|
+
cm = orig_client_stream(self, method, url, *args, headers=hdrs, **kwargs)
|
|
83
|
+
|
|
84
|
+
class ClientStreamCM:
|
|
85
|
+
def __enter__(self):
|
|
86
|
+
resp = cm.__enter__()
|
|
87
|
+
resp.read()
|
|
88
|
+
record_network_request(
|
|
89
|
+
trace_id, str(url), method, resp.status_code, resp.is_success
|
|
90
|
+
)
|
|
91
|
+
return resp
|
|
92
|
+
|
|
93
|
+
def __exit__(self, exc_type, exc, tb):
|
|
94
|
+
return cm.__exit__(exc_type, exc, tb)
|
|
95
|
+
|
|
96
|
+
return ClientStreamCM()
|
|
97
|
+
|
|
98
|
+
# 4c) AsyncClient.stream()
|
|
99
|
+
def _patched_async_client_stream(self, method, url, *args, headers=None, **kwargs):
|
|
100
|
+
trace_id, hdrs = _prepare(method, str(url), headers)
|
|
101
|
+
cm = orig_async_client_stream(self, method, url, *args, headers=hdrs, **kwargs)
|
|
102
|
+
|
|
103
|
+
class AsyncClientStreamCM:
|
|
104
|
+
async def __aenter__(self):
|
|
105
|
+
resp = await cm.__aenter__()
|
|
106
|
+
await resp.aread() # ensure .content
|
|
107
|
+
record_network_request(
|
|
108
|
+
trace_id, str(url), method, resp.status_code, resp.is_success
|
|
109
|
+
)
|
|
110
|
+
return resp
|
|
111
|
+
|
|
112
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
113
|
+
return await cm.__aexit__(exc_type, exc, tb)
|
|
114
|
+
|
|
115
|
+
return AsyncClientStreamCM()
|
|
116
|
+
|
|
117
|
+
# Apply monkey-patches
|
|
118
|
+
httpx.request = _patched_request
|
|
119
|
+
httpx.Client.request = _patched_client_request
|
|
120
|
+
httpx.AsyncClient.request = _patched_async_request
|
|
121
|
+
httpx.stream = _patched_stream
|
|
122
|
+
httpx.Client.stream = _patched_client_stream
|
|
123
|
+
httpx.AsyncClient.stream = _patched_async_client_stream
|