sf-veritas 0.11.10__cp314-cp314-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sf_veritas/__init__.py +46 -0
- sf_veritas/_auto_preload.py +73 -0
- sf_veritas/_sfconfig.c +162 -0
- sf_veritas/_sfconfig.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfcrashhandler.c +267 -0
- sf_veritas/_sfcrashhandler.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffastlog.c +953 -0
- sf_veritas/_sffastlog.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffastnet.c +994 -0
- sf_veritas/_sffastnet.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffastnetworkrequest.c +727 -0
- sf_veritas/_sffastnetworkrequest.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffuncspan.c +2791 -0
- sf_veritas/_sffuncspan.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sffuncspan_config.c +730 -0
- sf_veritas/_sffuncspan_config.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfheadercheck.c +341 -0
- sf_veritas/_sfheadercheck.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfnetworkhop.c +1454 -0
- sf_veritas/_sfnetworkhop.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfservice.c +1223 -0
- sf_veritas/_sfservice.cpython-314-x86_64-linux-gnu.so +0 -0
- sf_veritas/_sfteepreload.c +6227 -0
- sf_veritas/app_config.py +57 -0
- sf_veritas/cli.py +336 -0
- sf_veritas/constants.py +10 -0
- sf_veritas/custom_excepthook.py +304 -0
- sf_veritas/custom_log_handler.py +146 -0
- sf_veritas/custom_output_wrapper.py +153 -0
- sf_veritas/custom_print.py +153 -0
- sf_veritas/django_app.py +5 -0
- sf_veritas/env_vars.py +186 -0
- sf_veritas/exception_handling_middleware.py +18 -0
- sf_veritas/exception_metaclass.py +69 -0
- sf_veritas/fast_frame_info.py +116 -0
- sf_veritas/fast_network_hop.py +293 -0
- sf_veritas/frame_tools.py +112 -0
- sf_veritas/funcspan_config_loader.py +693 -0
- sf_veritas/function_span_profiler.py +1313 -0
- sf_veritas/get_preload_path.py +34 -0
- sf_veritas/import_hook.py +62 -0
- sf_veritas/infra_details/__init__.py +3 -0
- sf_veritas/infra_details/get_infra_details.py +24 -0
- sf_veritas/infra_details/kubernetes/__init__.py +3 -0
- sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
- sf_veritas/infra_details/kubernetes/get_details.py +7 -0
- sf_veritas/infra_details/running_on/__init__.py +17 -0
- sf_veritas/infra_details/running_on/kubernetes.py +11 -0
- sf_veritas/interceptors.py +543 -0
- sf_veritas/libsfnettee.so +0 -0
- sf_veritas/local_env_detect.py +118 -0
- sf_veritas/package_metadata.py +6 -0
- sf_veritas/patches/__init__.py +0 -0
- sf_veritas/patches/_patch_tracker.py +74 -0
- sf_veritas/patches/concurrent_futures.py +19 -0
- sf_veritas/patches/constants.py +1 -0
- sf_veritas/patches/exceptions.py +82 -0
- sf_veritas/patches/multiprocessing.py +32 -0
- sf_veritas/patches/network_libraries/__init__.py +99 -0
- sf_veritas/patches/network_libraries/aiohttp.py +294 -0
- sf_veritas/patches/network_libraries/curl_cffi.py +363 -0
- sf_veritas/patches/network_libraries/http_client.py +670 -0
- sf_veritas/patches/network_libraries/httpcore.py +580 -0
- sf_veritas/patches/network_libraries/httplib2.py +315 -0
- sf_veritas/patches/network_libraries/httpx.py +557 -0
- sf_veritas/patches/network_libraries/niquests.py +218 -0
- sf_veritas/patches/network_libraries/pycurl.py +399 -0
- sf_veritas/patches/network_libraries/requests.py +595 -0
- sf_veritas/patches/network_libraries/ssl_socket.py +822 -0
- sf_veritas/patches/network_libraries/tornado.py +360 -0
- sf_veritas/patches/network_libraries/treq.py +270 -0
- sf_veritas/patches/network_libraries/urllib_request.py +483 -0
- sf_veritas/patches/network_libraries/utils.py +598 -0
- sf_veritas/patches/os.py +17 -0
- sf_veritas/patches/threading.py +231 -0
- sf_veritas/patches/web_frameworks/__init__.py +54 -0
- sf_veritas/patches/web_frameworks/aiohttp.py +798 -0
- sf_veritas/patches/web_frameworks/async_websocket_consumer.py +337 -0
- sf_veritas/patches/web_frameworks/blacksheep.py +532 -0
- sf_veritas/patches/web_frameworks/bottle.py +513 -0
- sf_veritas/patches/web_frameworks/cherrypy.py +683 -0
- sf_veritas/patches/web_frameworks/cors_utils.py +122 -0
- sf_veritas/patches/web_frameworks/django.py +963 -0
- sf_veritas/patches/web_frameworks/eve.py +401 -0
- sf_veritas/patches/web_frameworks/falcon.py +931 -0
- sf_veritas/patches/web_frameworks/fastapi.py +738 -0
- sf_veritas/patches/web_frameworks/flask.py +526 -0
- sf_veritas/patches/web_frameworks/klein.py +501 -0
- sf_veritas/patches/web_frameworks/litestar.py +616 -0
- sf_veritas/patches/web_frameworks/pyramid.py +440 -0
- sf_veritas/patches/web_frameworks/quart.py +841 -0
- sf_veritas/patches/web_frameworks/robyn.py +708 -0
- sf_veritas/patches/web_frameworks/sanic.py +874 -0
- sf_veritas/patches/web_frameworks/starlette.py +742 -0
- sf_veritas/patches/web_frameworks/strawberry.py +1446 -0
- sf_veritas/patches/web_frameworks/tornado.py +485 -0
- sf_veritas/patches/web_frameworks/utils.py +170 -0
- sf_veritas/print_override.py +13 -0
- sf_veritas/regular_data_transmitter.py +444 -0
- sf_veritas/request_interceptor.py +401 -0
- sf_veritas/request_utils.py +550 -0
- sf_veritas/segfault_handler.py +116 -0
- sf_veritas/server_status.py +1 -0
- sf_veritas/shutdown_flag.py +11 -0
- sf_veritas/subprocess_startup.py +3 -0
- sf_veritas/test_cli.py +145 -0
- sf_veritas/thread_local.py +1319 -0
- sf_veritas/timeutil.py +114 -0
- sf_veritas/transmit_exception_to_sailfish.py +28 -0
- sf_veritas/transmitter.py +132 -0
- sf_veritas/types.py +47 -0
- sf_veritas/unified_interceptor.py +1678 -0
- sf_veritas/utils.py +39 -0
- sf_veritas-0.11.10.dist-info/METADATA +97 -0
- sf_veritas-0.11.10.dist-info/RECORD +141 -0
- sf_veritas-0.11.10.dist-info/WHEEL +5 -0
- sf_veritas-0.11.10.dist-info/entry_points.txt +2 -0
- sf_veritas-0.11.10.dist-info/top_level.txt +1 -0
- sf_veritas.libs/libbrotlicommon-6ce2a53c.so.1.0.6 +0 -0
- sf_veritas.libs/libbrotlidec-811d1be3.so.1.0.6 +0 -0
- sf_veritas.libs/libcom_err-730ca923.so.2.1 +0 -0
- sf_veritas.libs/libcrypt-52aca757.so.1.1.0 +0 -0
- sf_veritas.libs/libcrypto-bdaed0ea.so.1.1.1k +0 -0
- sf_veritas.libs/libcurl-eaa3cf66.so.4.5.0 +0 -0
- sf_veritas.libs/libgssapi_krb5-323bbd21.so.2.2 +0 -0
- sf_veritas.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
- sf_veritas.libs/libk5crypto-9a74ff38.so.3.1 +0 -0
- sf_veritas.libs/libkeyutils-2777d33d.so.1.6 +0 -0
- sf_veritas.libs/libkrb5-a55300e8.so.3.3 +0 -0
- sf_veritas.libs/libkrb5support-e6594cfc.so.0.1 +0 -0
- sf_veritas.libs/liblber-2-d20824ef.4.so.2.10.9 +0 -0
- sf_veritas.libs/libldap-2-cea2a960.4.so.2.10.9 +0 -0
- sf_veritas.libs/libnghttp2-39367a22.so.14.17.0 +0 -0
- sf_veritas.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
- sf_veritas.libs/libpsl-99becdd3.so.5.3.1 +0 -0
- sf_veritas.libs/libsasl2-7de4d792.so.3.0.0 +0 -0
- sf_veritas.libs/libselinux-d0805dcb.so.1 +0 -0
- sf_veritas.libs/libssh-c11d285b.so.4.8.7 +0 -0
- sf_veritas.libs/libssl-60250281.so.1.1.1k +0 -0
- sf_veritas.libs/libunistring-05abdd40.so.2.1.0 +0 -0
- sf_veritas.libs/libuuid-95b83d40.so.1.3.0 +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Central tracking for patch application to prevent double-patching.
|
|
3
|
+
|
|
4
|
+
This module maintains a registry of applied patches keyed by (patch_name, pid)
|
|
5
|
+
to handle worker forks (gunicorn/uvicorn) and module reloading correctly.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import threading
|
|
9
|
+
|
|
10
|
+
# Global registry: {(patch_name, pid): True}
|
|
11
|
+
_PATCHED = {}
|
|
12
|
+
_lock = threading.Lock()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_already_patched(patch_name: str) -> bool:
|
|
16
|
+
"""
|
|
17
|
+
Check if a patch has already been applied in this process.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
patch_name: Unique identifier for the patch (e.g., "requests", "httpx")
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
True if already patched in current PID, False otherwise
|
|
24
|
+
"""
|
|
25
|
+
key = (patch_name, os.getpid())
|
|
26
|
+
with _lock:
|
|
27
|
+
return key in _PATCHED
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def mark_as_patched(patch_name: str) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Mark a patch as applied in the current process.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
patch_name: Unique identifier for the patch
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
True if this is the first application, False if already patched
|
|
39
|
+
"""
|
|
40
|
+
key = (patch_name, os.getpid())
|
|
41
|
+
with _lock:
|
|
42
|
+
if key in _PATCHED:
|
|
43
|
+
return False
|
|
44
|
+
_PATCHED[key] = True
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def once(patch_name: str):
|
|
49
|
+
"""
|
|
50
|
+
Decorator/guard for ensuring a patch runs only once per process.
|
|
51
|
+
|
|
52
|
+
Usage:
|
|
53
|
+
@once("requests")
|
|
54
|
+
def patch_requests(...):
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
Or as a guard:
|
|
58
|
+
if not once.check("requests"):
|
|
59
|
+
return # Already patched
|
|
60
|
+
"""
|
|
61
|
+
def decorator(func):
|
|
62
|
+
def wrapper(*args, **kwargs):
|
|
63
|
+
if is_already_patched(patch_name):
|
|
64
|
+
return # Already patched, skip
|
|
65
|
+
mark_as_patched(patch_name)
|
|
66
|
+
return func(*args, **kwargs)
|
|
67
|
+
wrapper.__name__ = func.__name__
|
|
68
|
+
wrapper.__doc__ = func.__doc__
|
|
69
|
+
return wrapper
|
|
70
|
+
return decorator
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Convenience method for checking
|
|
74
|
+
once.check = lambda name: mark_as_patched(name)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
|
2
|
+
from ..thread_local import get_context, set_context
|
|
3
|
+
|
|
4
|
+
_original_submit = ThreadPoolExecutor.submit
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def patched_submit(self, fn, *args, **kwargs):
|
|
8
|
+
current_context = get_context()
|
|
9
|
+
|
|
10
|
+
def wrapped_fn(*fn_args, **fn_kwargs):
|
|
11
|
+
set_context(current_context)
|
|
12
|
+
fn(*fn_args, **fn_kwargs)
|
|
13
|
+
|
|
14
|
+
return _original_submit(self, wrapped_fn, *args, **kwargs)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def patch_concurrent_futures():
|
|
18
|
+
ThreadPoolExecutor.submit = patched_submit
|
|
19
|
+
ProcessPoolExecutor.submit = patched_submit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
supported_network_verbs = ("get", "post", "put", "patch", "delete", "head", "options")
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import sys
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from importlib import abc
|
|
6
|
+
|
|
7
|
+
from ..env_vars import PRINT_CONFIGURATION_STATUSES
|
|
8
|
+
|
|
9
|
+
# Thread-local storage to avoid re-entry problems
|
|
10
|
+
patch_lock = threading.local()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def patch_exceptions(module):
|
|
14
|
+
if hasattr(patch_lock, "active"):
|
|
15
|
+
return
|
|
16
|
+
patch_lock.active = True
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExceptionPatchingFinder(abc.MetaPathFinder):
|
|
20
|
+
def find_spec(self, fullname, path, target=None):
|
|
21
|
+
if hasattr(patch_lock, "loading") and patch_lock.loading:
|
|
22
|
+
return None
|
|
23
|
+
try:
|
|
24
|
+
patch_lock.loading = True
|
|
25
|
+
original_spec = importlib.util.find_spec(fullname, path)
|
|
26
|
+
if original_spec:
|
|
27
|
+
return importlib.util.spec_from_loader(
|
|
28
|
+
fullname,
|
|
29
|
+
ExceptionPatchingLoader(original_spec.loader),
|
|
30
|
+
origin=original_spec.origin,
|
|
31
|
+
)
|
|
32
|
+
return None
|
|
33
|
+
finally:
|
|
34
|
+
patch_lock.loading = False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ExceptionPatchingLoader(abc.Loader):
|
|
38
|
+
def __init__(self, loader):
|
|
39
|
+
self._original_loader = loader
|
|
40
|
+
|
|
41
|
+
def create_module(self, spec):
|
|
42
|
+
return self._original_loader.create_module(spec)
|
|
43
|
+
|
|
44
|
+
def exec_module(self, module):
|
|
45
|
+
self._original_loader.exec_module(module)
|
|
46
|
+
patch_exceptions(module)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def install_import_hook():
|
|
50
|
+
if PRINT_CONFIGURATION_STATUSES:
|
|
51
|
+
print("EXCEPTIONS - install_import_hook", log=False)
|
|
52
|
+
sys.meta_path.insert(0, ExceptionPatchingFinder())
|
|
53
|
+
if PRINT_CONFIGURATION_STATUSES:
|
|
54
|
+
print("EXCEPTIONS - install_import_hook...DONE", log=False)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Initially store the current state of sys.excepthook
|
|
58
|
+
original_excepthook = sys.excepthook
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def monitor_excepthook(interval=1):
|
|
62
|
+
global original_excepthook
|
|
63
|
+
|
|
64
|
+
while True:
|
|
65
|
+
current_hook = sys.excepthook
|
|
66
|
+
if current_hook != original_excepthook and PRINT_CONFIGURATION_STATUSES:
|
|
67
|
+
if PRINT_CONFIGURATION_STATUSES:
|
|
68
|
+
print("sys.excepthook has been modified!")
|
|
69
|
+
original_excepthook = current_hook
|
|
70
|
+
continue
|
|
71
|
+
if PRINT_CONFIGURATION_STATUSES:
|
|
72
|
+
print("No change detected in sys.excepthook.")
|
|
73
|
+
|
|
74
|
+
# Pause for the specified interval before the next check
|
|
75
|
+
time.sleep(interval)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Function to start monitoring in a separate thread
|
|
79
|
+
def start_monitoring(interval=2):
|
|
80
|
+
thread = threading.Thread(target=monitor_excepthook, args=(interval,))
|
|
81
|
+
# thread.daemon = True # This makes the thread exit when the main program exits
|
|
82
|
+
thread.start()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
|
|
3
|
+
from ..thread_local import get_context, set_context
|
|
4
|
+
|
|
5
|
+
_original_process_init = multiprocessing.Process.__init__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patched_process_init(self, *args, **kwargs):
|
|
9
|
+
current_context = get_context()
|
|
10
|
+
|
|
11
|
+
original_target = kwargs.get("target")
|
|
12
|
+
if original_target:
|
|
13
|
+
|
|
14
|
+
def wrapped_target(*targs, **tkwargs):
|
|
15
|
+
set_context(current_context)
|
|
16
|
+
original_target(*targs, **tkwargs)
|
|
17
|
+
|
|
18
|
+
kwargs["target"] = wrapped_target
|
|
19
|
+
elif args and callable(args[0]):
|
|
20
|
+
original_target = args[0]
|
|
21
|
+
|
|
22
|
+
def wrapped_target(*targs, **tkwargs):
|
|
23
|
+
set_context(current_context)
|
|
24
|
+
original_target(*targs, **tkwargs)
|
|
25
|
+
|
|
26
|
+
args = (wrapped_target,) + args[1:]
|
|
27
|
+
|
|
28
|
+
_original_process_init(self, *args, **kwargs)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def patch_multiprocessing():
|
|
32
|
+
multiprocessing.Process.__init__ = patched_process_init
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ...request_interceptor import get_domains_to_not_propagate_headers_to
|
|
5
|
+
from ...env_vars import SF_DEBUG
|
|
6
|
+
from .utils import init_fast_network_tracking
|
|
7
|
+
from .ssl_socket import patch_ssl_sockets # CRITICAL: Import SSL patching
|
|
8
|
+
from .requests import patch_requests
|
|
9
|
+
from .aiohttp import patch_aiohttp
|
|
10
|
+
from .httpx import patch_httpx
|
|
11
|
+
from .httpcore import patch_httpcore
|
|
12
|
+
from .http_client import patch_http_client
|
|
13
|
+
from .urllib_request import patch_urllib_request
|
|
14
|
+
from .httplib2 import patch_httplib2
|
|
15
|
+
from .pycurl import patch_pycurl
|
|
16
|
+
from .niquests import patch_niquests
|
|
17
|
+
from .curl_cffi import patch_curl_cffi
|
|
18
|
+
from .tornado import patch_tornado
|
|
19
|
+
from .treq import patch_treq
|
|
20
|
+
|
|
21
|
+
# from .aioh2 import patch_aioh2 # Asynchronous HTTP/2 client, no clear extension hooks
|
|
22
|
+
# from .http_prompt import patch_http_prompt # CLI HTTP client, minimal public API
|
|
23
|
+
# from .mureq import patch_mureq # Specialized crawler client, little documentation
|
|
24
|
+
# from .reqboost import patch_reqboost # High-performance batch client, docs scarce
|
|
25
|
+
# from .impit import (patch_impit) # Used by Crawlee's ImpitHttpClient
|
|
26
|
+
# from .h11 import patch_h11 # Low-level HTTP/1.1 protocol library
|
|
27
|
+
# from .aioquic import patch_aioquic # QUIC/HTTP-3 client, no standard headers API
|
|
28
|
+
# from .qh3 import patch_qh3 # Experimental HTTP/3 client, no docs found
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def patch_all_http_clients(
|
|
32
|
+
domains_to_not_propagate_headers_to: Optional[List[str]] = None,
|
|
33
|
+
):
|
|
34
|
+
# ====================================================================
|
|
35
|
+
# CRITICAL: PATCH SSL FIRST - This captures ALL HTTPS traffic
|
|
36
|
+
# ====================================================================
|
|
37
|
+
# All HTTP libraries (requests, httpx, urllib3, aiohttp, http.client)
|
|
38
|
+
# use ssl.SSLSocket underneath. By patching at the SSL layer first,
|
|
39
|
+
# we automatically capture all HTTPS traffic with ~15-20ns overhead.
|
|
40
|
+
#
|
|
41
|
+
# This also avoids race conditions from C-level socket hooks.
|
|
42
|
+
# ====================================================================
|
|
43
|
+
if SF_DEBUG:
|
|
44
|
+
print(f"[patches] Calling patch_ssl_sockets() - SF_ENABLE_PYTHON_SSL_TEE={os.getenv('SF_ENABLE_PYTHON_SSL_TEE', 'NOT_SET')}", log=False)
|
|
45
|
+
try:
|
|
46
|
+
patch_ssl_sockets()
|
|
47
|
+
if SF_DEBUG:
|
|
48
|
+
print("[patches] ✓ SSL socket patching complete - all HTTPS captured automatically", log=False)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
if SF_DEBUG:
|
|
51
|
+
print(f"[patches] WARNING: SSL patching failed: {e}", log=False)
|
|
52
|
+
import traceback
|
|
53
|
+
traceback.print_exc()
|
|
54
|
+
|
|
55
|
+
# Enable Python-level header injection (ULTRA-FAST: <100ns)
|
|
56
|
+
# This disables C-level header injection (291µs overhead) and lets Python patches handle it
|
|
57
|
+
os.environ["SF_PYTHON_HEADER_INJECTION"] = "0"
|
|
58
|
+
|
|
59
|
+
# Initialize fast C-based network tracking
|
|
60
|
+
# NOTE: When LD_PRELOAD is active (_sfteepreload), this only initializes the Python senders
|
|
61
|
+
# The actual socket capture is done by _sfteepreload automatically
|
|
62
|
+
init_fast_network_tracking()
|
|
63
|
+
|
|
64
|
+
# Send domains mutation ONCE before patching (not from within each patch function)
|
|
65
|
+
if domains_to_not_propagate_headers_to:
|
|
66
|
+
domains_to_not_propagate_headers_to = get_domains_to_not_propagate_headers_to(
|
|
67
|
+
domains_to_not_propagate_headers_to
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# ULTRA-FAST Python-level header injection (<100ns)
|
|
71
|
+
# Enable core libraries that cover 95% of use cases:
|
|
72
|
+
|
|
73
|
+
# requests covers: requests → urllib3 → http.client (entire stack!)
|
|
74
|
+
patch_requests(domains_to_not_propagate_headers_to)
|
|
75
|
+
|
|
76
|
+
# aiohttp for async HTTP (standalone stack)
|
|
77
|
+
patch_aiohttp(domains_to_not_propagate_headers_to)
|
|
78
|
+
|
|
79
|
+
# Additional libraries (enable if needed):
|
|
80
|
+
patch_http_client(domains_to_not_propagate_headers_to) # Covered by requests
|
|
81
|
+
patch_urllib_request(domains_to_not_propagate_headers_to) # Covered by requests
|
|
82
|
+
patch_httplib2(domains_to_not_propagate_headers_to)
|
|
83
|
+
patch_httpx(domains_to_not_propagate_headers_to)
|
|
84
|
+
patch_httpcore(domains_to_not_propagate_headers_to)
|
|
85
|
+
patch_pycurl(domains_to_not_propagate_headers_to)
|
|
86
|
+
patch_treq(domains_to_not_propagate_headers_to)
|
|
87
|
+
patch_tornado(domains_to_not_propagate_headers_to)
|
|
88
|
+
patch_curl_cffi(domains_to_not_propagate_headers_to)
|
|
89
|
+
patch_niquests(domains_to_not_propagate_headers_to)
|
|
90
|
+
|
|
91
|
+
# # Lesser-used libraries
|
|
92
|
+
# patch_impit(domains_to_not_propagate_headers_to)
|
|
93
|
+
# patch_aioh2(domains_to_not_propagate_headers_to)
|
|
94
|
+
# patch_http_prompt(domains_to_not_propagate_headers_to)
|
|
95
|
+
# patch_mureq(domains_to_not_propagate_headers_to)
|
|
96
|
+
# patch_reqboost(domains_to_not_propagate_headers_to)
|
|
97
|
+
# patch_h11(domains_to_not_propagate_headers_to)
|
|
98
|
+
# patch_aioquic(domains_to_not_propagate_headers_to)
|
|
99
|
+
# patch_qh3(domains_to_not_propagate_headers_to)
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import wrapt
|
|
7
|
+
|
|
8
|
+
HAS_WRAPT = True
|
|
9
|
+
except ImportError:
|
|
10
|
+
HAS_WRAPT = False
|
|
11
|
+
|
|
12
|
+
from ...constants import FUNCSPAN_OVERRIDE_HEADER, SAILFISH_TRACING_HEADER
|
|
13
|
+
from ...thread_local import trace_id_ctx
|
|
14
|
+
from .utils import (
|
|
15
|
+
init_fast_header_check,
|
|
16
|
+
inject_headers_ultrafast,
|
|
17
|
+
is_ssl_socket_active,
|
|
18
|
+
record_network_request,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# JSON serialization - try fast orjson first, fallback to stdlib json
|
|
22
|
+
try:
|
|
23
|
+
import orjson
|
|
24
|
+
|
|
25
|
+
HAS_ORJSON = True
|
|
26
|
+
except ImportError:
|
|
27
|
+
import json
|
|
28
|
+
|
|
29
|
+
HAS_ORJSON = False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _tee_preload_active() -> bool:
|
|
33
|
+
"""Detect if LD_PRELOAD tee is active (same logic as http_client.py)."""
|
|
34
|
+
if os.getenv("SF_TEE_PRELOAD_ONLY", "0") == "1":
|
|
35
|
+
return True
|
|
36
|
+
ld = os.getenv("LD_PRELOAD", "")
|
|
37
|
+
return "libsfnettee.so" in ld or "_sfteepreload" in ld
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def patch_aiohttp(domains_to_not_propagate_headers_to: Optional[List[str]] = None):
|
|
41
|
+
"""
|
|
42
|
+
Monkey-patch aiohttp so that every HTTP verb:
|
|
43
|
+
1) injects SAILFISH_TRACING_HEADER + FUNCSPAN_OVERRIDE_HEADER when allowed,
|
|
44
|
+
2) measures timing (only when LD_PRELOAD not active),
|
|
45
|
+
3) calls NetworkRequestTransmitter().do_send via record_network_request (UNLESS LD_PRELOAD active).
|
|
46
|
+
|
|
47
|
+
When LD_PRELOAD is active: ULTRA-FAST path using TraceConfig with wrapt (OTEL-style, minimal overhead).
|
|
48
|
+
When LD_PRELOAD is NOT active: Full capture path with body/header recording.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
import aiohttp
|
|
52
|
+
except:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
skip = domains_to_not_propagate_headers_to or []
|
|
56
|
+
preload_active = _tee_preload_active()
|
|
57
|
+
|
|
58
|
+
# Initialize C extension for ultra-fast header checking (if available)
|
|
59
|
+
if preload_active:
|
|
60
|
+
init_fast_header_check(skip)
|
|
61
|
+
|
|
62
|
+
if preload_active:
|
|
63
|
+
# ========== ULTRA-FAST PATH: Direct wrapt on _request (bypass TraceConfig overhead!) ==========
|
|
64
|
+
# TraceConfig adds 15-20% overhead, so we patch _request directly like OTEL does for other libraries
|
|
65
|
+
|
|
66
|
+
if HAS_WRAPT:
|
|
67
|
+
# FASTEST: Use wrapt directly on _request method (OTEL-style for minimal overhead)
|
|
68
|
+
async def instrumented_request(wrapped, instance, args, kwargs):
|
|
69
|
+
"""
|
|
70
|
+
Ultra-fast header injection using thread-local cache.
|
|
71
|
+
Bypasses TraceConfig machinery for <5% overhead.
|
|
72
|
+
"""
|
|
73
|
+
# Extract verb and URL from args
|
|
74
|
+
verb_name = args[0] if args else kwargs.get("method", "GET")
|
|
75
|
+
url = str(args[1] if len(args) > 1 else kwargs.get("url", ""))
|
|
76
|
+
|
|
77
|
+
# Direct header mutation (no copy!)
|
|
78
|
+
headers = kwargs.get("headers")
|
|
79
|
+
if headers is None:
|
|
80
|
+
headers = {}
|
|
81
|
+
kwargs["headers"] = headers
|
|
82
|
+
|
|
83
|
+
# CRITICAL: Skip if already injected (prevents double injection)
|
|
84
|
+
if SAILFISH_TRACING_HEADER not in headers:
|
|
85
|
+
# ULTRA-FAST: Thread-local cache + direct ContextVar.get() (<100ns!)
|
|
86
|
+
inject_headers_ultrafast(headers, url, skip)
|
|
87
|
+
|
|
88
|
+
# NO timing, NO capture, NO threads - immediate return!
|
|
89
|
+
return await wrapped(*args, **kwargs)
|
|
90
|
+
|
|
91
|
+
wrapt.wrap_function_wrapper(
|
|
92
|
+
aiohttp.ClientSession, "_request", instrumented_request
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
else:
|
|
96
|
+
# Fallback: Direct patching if wrapt not available
|
|
97
|
+
orig_request = aiohttp.ClientSession._request
|
|
98
|
+
|
|
99
|
+
async def patched_request(self, verb_name: str, url: Any, **kwargs):
|
|
100
|
+
"""Ultra-fast header injection without wrapt."""
|
|
101
|
+
headers = kwargs.get("headers")
|
|
102
|
+
if headers is None:
|
|
103
|
+
headers = {}
|
|
104
|
+
kwargs["headers"] = headers
|
|
105
|
+
|
|
106
|
+
# CRITICAL: Skip if already injected (prevents double injection)
|
|
107
|
+
if SAILFISH_TRACING_HEADER not in headers:
|
|
108
|
+
# ULTRA-FAST: Thread-local cache + direct ContextVar.get() (<100ns!)
|
|
109
|
+
inject_headers_ultrafast(headers, str(url), skip)
|
|
110
|
+
|
|
111
|
+
# NO timing, NO capture - immediate return!
|
|
112
|
+
return await orig_request(self, verb_name, url, **kwargs)
|
|
113
|
+
|
|
114
|
+
aiohttp.ClientSession._request = patched_request
|
|
115
|
+
|
|
116
|
+
else:
|
|
117
|
+
# ========== FULL CAPTURE PATH: When LD_PRELOAD is NOT active ==========
|
|
118
|
+
# Uses wrapper function to capture request/response data
|
|
119
|
+
orig_request = aiohttp.ClientSession._request
|
|
120
|
+
|
|
121
|
+
async def patched_request(self, verb_name: str, url: Any, **kwargs):
|
|
122
|
+
headers = kwargs.get("headers", {}) or {}
|
|
123
|
+
if not isinstance(headers, dict):
|
|
124
|
+
headers = dict(headers)
|
|
125
|
+
|
|
126
|
+
# CRITICAL: Skip if already injected (prevents double injection)
|
|
127
|
+
if SAILFISH_TRACING_HEADER not in headers:
|
|
128
|
+
# ULTRA-FAST: Thread-local cache + direct ContextVar.get() (<100ns!)
|
|
129
|
+
inject_headers_ultrafast(headers, str(url), skip)
|
|
130
|
+
|
|
131
|
+
kwargs["headers"] = headers
|
|
132
|
+
|
|
133
|
+
# Get trace_id for network recording (after injection)
|
|
134
|
+
trace_id = trace_id_ctx.get(None) or ""
|
|
135
|
+
|
|
136
|
+
# SLOW PATH: LD_PRELOAD not active, do full Python-level capture
|
|
137
|
+
# Capture request data as bytes - BEFORE request
|
|
138
|
+
req_data = b""
|
|
139
|
+
req_headers = b""
|
|
140
|
+
try:
|
|
141
|
+
if "json" in kwargs:
|
|
142
|
+
if HAS_ORJSON:
|
|
143
|
+
req_data = orjson.dumps(kwargs["json"])
|
|
144
|
+
else:
|
|
145
|
+
req_data = json.dumps(kwargs["json"]).encode("utf-8")
|
|
146
|
+
elif "data" in kwargs:
|
|
147
|
+
data = kwargs["data"]
|
|
148
|
+
if isinstance(data, bytes):
|
|
149
|
+
req_data = data
|
|
150
|
+
elif isinstance(data, str):
|
|
151
|
+
req_data = data.encode("utf-8")
|
|
152
|
+
|
|
153
|
+
# Capture request headers
|
|
154
|
+
if HAS_ORJSON:
|
|
155
|
+
# Convert keys to str (aiohttp may use istr which orjson doesn't accept)
|
|
156
|
+
req_headers = orjson.dumps({str(k): str(v) for k, v in headers.items()})
|
|
157
|
+
else:
|
|
158
|
+
req_headers = json.dumps({str(k): str(v) for k, v in headers.items()}).encode("utf-8")
|
|
159
|
+
except Exception: # noqa: BLE001
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
# 2) Perform & time the request
|
|
163
|
+
start = int(time.time() * 1_000)
|
|
164
|
+
response = await orig_request(self, verb_name, url, **kwargs)
|
|
165
|
+
end = int(time.time() * 1_000)
|
|
166
|
+
|
|
167
|
+
# Skip capture for HTTPS when ssl_socket.py is active (avoids double-capture)
|
|
168
|
+
url_str = str(url)
|
|
169
|
+
is_https = url_str.startswith("https://")
|
|
170
|
+
if is_https and is_ssl_socket_active():
|
|
171
|
+
return response
|
|
172
|
+
|
|
173
|
+
# 3) Capture response metadata immediately (before response can be closed)
|
|
174
|
+
status = getattr(response, "status", 0)
|
|
175
|
+
ok = status < 400
|
|
176
|
+
|
|
177
|
+
# Capture response headers immediately (cheap and safe)
|
|
178
|
+
resp_headers = b""
|
|
179
|
+
if HAS_ORJSON:
|
|
180
|
+
# Convert keys to str (aiohttp uses istr which orjson doesn't accept)
|
|
181
|
+
resp_headers = orjson.dumps({str(k): str(v) for k, v in response.headers.items()})
|
|
182
|
+
else:
|
|
183
|
+
resp_headers = json.dumps({str(k): str(v) for k, v in response.headers.items()}).encode("utf-8")
|
|
184
|
+
|
|
185
|
+
# Send to C extension immediately - no background task needed!
|
|
186
|
+
record_network_request(
|
|
187
|
+
trace_id,
|
|
188
|
+
str(url),
|
|
189
|
+
verb_name.upper(),
|
|
190
|
+
status,
|
|
191
|
+
ok,
|
|
192
|
+
None,
|
|
193
|
+
timestamp_start=start,
|
|
194
|
+
timestamp_end=end,
|
|
195
|
+
request_data=req_data,
|
|
196
|
+
response_data=b"", # Skip body to avoid consuming stream
|
|
197
|
+
request_headers=req_headers,
|
|
198
|
+
response_headers=resp_headers,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# CRITICAL: Return response immediately!
|
|
202
|
+
return response
|
|
203
|
+
|
|
204
|
+
# Apply the wrapper to ClientSession._request
|
|
205
|
+
aiohttp.ClientSession._request = patched_request
|
|
206
|
+
|
|
207
|
+
# 2) Also patch the module-level aiohttp.request coroutine (for full-capture path)
|
|
208
|
+
orig_module_request = getattr(aiohttp, "request", None)
|
|
209
|
+
if orig_module_request:
|
|
210
|
+
|
|
211
|
+
async def patched_module_request(verb_name: str, url: str, **kwargs):
|
|
212
|
+
headers = kwargs.get("headers", {}) or {}
|
|
213
|
+
if not isinstance(headers, dict):
|
|
214
|
+
headers = dict(headers)
|
|
215
|
+
|
|
216
|
+
# CRITICAL: Skip if already injected (prevents double injection)
|
|
217
|
+
if SAILFISH_TRACING_HEADER not in headers:
|
|
218
|
+
# ULTRA-FAST: Thread-local cache + direct ContextVar.get() (<100ns!)
|
|
219
|
+
inject_headers_ultrafast(headers, str(url), skip)
|
|
220
|
+
|
|
221
|
+
kwargs["headers"] = headers
|
|
222
|
+
|
|
223
|
+
# Get trace_id for network recording (after injection)
|
|
224
|
+
trace_id = trace_id_ctx.get(None) or ""
|
|
225
|
+
|
|
226
|
+
# SLOW PATH: LD_PRELOAD not active, do full Python-level capture
|
|
227
|
+
# Capture request data as bytes - BEFORE request
|
|
228
|
+
req_data = b""
|
|
229
|
+
req_headers = b""
|
|
230
|
+
try:
|
|
231
|
+
if "json" in kwargs:
|
|
232
|
+
if HAS_ORJSON:
|
|
233
|
+
req_data = orjson.dumps(kwargs["json"])
|
|
234
|
+
else:
|
|
235
|
+
req_data = json.dumps(kwargs["json"]).encode("utf-8")
|
|
236
|
+
elif "data" in kwargs:
|
|
237
|
+
data = kwargs["data"]
|
|
238
|
+
if isinstance(data, bytes):
|
|
239
|
+
req_data = data
|
|
240
|
+
elif isinstance(data, str):
|
|
241
|
+
req_data = data.encode("utf-8")
|
|
242
|
+
|
|
243
|
+
# Capture request headers
|
|
244
|
+
if HAS_ORJSON:
|
|
245
|
+
# Convert keys to str (aiohttp may use istr which orjson doesn't accept)
|
|
246
|
+
req_headers = orjson.dumps({str(k): str(v) for k, v in headers.items()})
|
|
247
|
+
else:
|
|
248
|
+
req_headers = json.dumps({str(k): str(v) for k, v in headers.items()}).encode("utf-8")
|
|
249
|
+
except Exception: # noqa: BLE001
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
start = int(time.time() * 1_000)
|
|
253
|
+
response = await orig_module_request(verb_name, url, **kwargs)
|
|
254
|
+
end = int(time.time() * 1_000)
|
|
255
|
+
|
|
256
|
+
# Skip capture for HTTPS when ssl_socket.py is active (avoids double-capture)
|
|
257
|
+
url_str = str(url)
|
|
258
|
+
is_https = url_str.startswith("https://")
|
|
259
|
+
if is_https and is_ssl_socket_active():
|
|
260
|
+
return response
|
|
261
|
+
|
|
262
|
+
status = getattr(
|
|
263
|
+
response, "status", getattr(response, "status_code", 0)
|
|
264
|
+
)
|
|
265
|
+
ok = status < 400
|
|
266
|
+
|
|
267
|
+
# Capture response headers immediately (cheap and safe)
|
|
268
|
+
resp_headers = b""
|
|
269
|
+
if HAS_ORJSON:
|
|
270
|
+
resp_headers = orjson.dumps(dict(response.headers))
|
|
271
|
+
else:
|
|
272
|
+
resp_headers = json.dumps(dict(response.headers)).encode("utf-8")
|
|
273
|
+
|
|
274
|
+
# Send to C extension immediately - no background task needed!
|
|
275
|
+
record_network_request(
|
|
276
|
+
trace_id,
|
|
277
|
+
str(url),
|
|
278
|
+
verb_name.upper(),
|
|
279
|
+
status,
|
|
280
|
+
ok,
|
|
281
|
+
None,
|
|
282
|
+
timestamp_start=start,
|
|
283
|
+
timestamp_end=end,
|
|
284
|
+
request_data=req_data,
|
|
285
|
+
response_data=b"", # Skip body to avoid consuming stream
|
|
286
|
+
request_headers=req_headers,
|
|
287
|
+
response_headers=resp_headers,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# CRITICAL: Return response immediately!
|
|
291
|
+
return response
|
|
292
|
+
|
|
293
|
+
# Apply the wrapper to module-level request function
|
|
294
|
+
aiohttp.request = patched_module_request
|