sf-veritas 0.11.10__cp314-cp314-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. sf_veritas/__init__.py +46 -0
  2. sf_veritas/_auto_preload.py +73 -0
  3. sf_veritas/_sfconfig.c +162 -0
  4. sf_veritas/_sfconfig.cpython-314-x86_64-linux-gnu.so +0 -0
  5. sf_veritas/_sfcrashhandler.c +267 -0
  6. sf_veritas/_sfcrashhandler.cpython-314-x86_64-linux-gnu.so +0 -0
  7. sf_veritas/_sffastlog.c +953 -0
  8. sf_veritas/_sffastlog.cpython-314-x86_64-linux-gnu.so +0 -0
  9. sf_veritas/_sffastnet.c +994 -0
  10. sf_veritas/_sffastnet.cpython-314-x86_64-linux-gnu.so +0 -0
  11. sf_veritas/_sffastnetworkrequest.c +727 -0
  12. sf_veritas/_sffastnetworkrequest.cpython-314-x86_64-linux-gnu.so +0 -0
  13. sf_veritas/_sffuncspan.c +2791 -0
  14. sf_veritas/_sffuncspan.cpython-314-x86_64-linux-gnu.so +0 -0
  15. sf_veritas/_sffuncspan_config.c +730 -0
  16. sf_veritas/_sffuncspan_config.cpython-314-x86_64-linux-gnu.so +0 -0
  17. sf_veritas/_sfheadercheck.c +341 -0
  18. sf_veritas/_sfheadercheck.cpython-314-x86_64-linux-gnu.so +0 -0
  19. sf_veritas/_sfnetworkhop.c +1454 -0
  20. sf_veritas/_sfnetworkhop.cpython-314-x86_64-linux-gnu.so +0 -0
  21. sf_veritas/_sfservice.c +1223 -0
  22. sf_veritas/_sfservice.cpython-314-x86_64-linux-gnu.so +0 -0
  23. sf_veritas/_sfteepreload.c +6227 -0
  24. sf_veritas/app_config.py +57 -0
  25. sf_veritas/cli.py +336 -0
  26. sf_veritas/constants.py +10 -0
  27. sf_veritas/custom_excepthook.py +304 -0
  28. sf_veritas/custom_log_handler.py +146 -0
  29. sf_veritas/custom_output_wrapper.py +153 -0
  30. sf_veritas/custom_print.py +153 -0
  31. sf_veritas/django_app.py +5 -0
  32. sf_veritas/env_vars.py +186 -0
  33. sf_veritas/exception_handling_middleware.py +18 -0
  34. sf_veritas/exception_metaclass.py +69 -0
  35. sf_veritas/fast_frame_info.py +116 -0
  36. sf_veritas/fast_network_hop.py +293 -0
  37. sf_veritas/frame_tools.py +112 -0
  38. sf_veritas/funcspan_config_loader.py +693 -0
  39. sf_veritas/function_span_profiler.py +1313 -0
  40. sf_veritas/get_preload_path.py +34 -0
  41. sf_veritas/import_hook.py +62 -0
  42. sf_veritas/infra_details/__init__.py +3 -0
  43. sf_veritas/infra_details/get_infra_details.py +24 -0
  44. sf_veritas/infra_details/kubernetes/__init__.py +3 -0
  45. sf_veritas/infra_details/kubernetes/get_cluster_name.py +147 -0
  46. sf_veritas/infra_details/kubernetes/get_details.py +7 -0
  47. sf_veritas/infra_details/running_on/__init__.py +17 -0
  48. sf_veritas/infra_details/running_on/kubernetes.py +11 -0
  49. sf_veritas/interceptors.py +543 -0
  50. sf_veritas/libsfnettee.so +0 -0
  51. sf_veritas/local_env_detect.py +118 -0
  52. sf_veritas/package_metadata.py +6 -0
  53. sf_veritas/patches/__init__.py +0 -0
  54. sf_veritas/patches/_patch_tracker.py +74 -0
  55. sf_veritas/patches/concurrent_futures.py +19 -0
  56. sf_veritas/patches/constants.py +1 -0
  57. sf_veritas/patches/exceptions.py +82 -0
  58. sf_veritas/patches/multiprocessing.py +32 -0
  59. sf_veritas/patches/network_libraries/__init__.py +99 -0
  60. sf_veritas/patches/network_libraries/aiohttp.py +294 -0
  61. sf_veritas/patches/network_libraries/curl_cffi.py +363 -0
  62. sf_veritas/patches/network_libraries/http_client.py +670 -0
  63. sf_veritas/patches/network_libraries/httpcore.py +580 -0
  64. sf_veritas/patches/network_libraries/httplib2.py +315 -0
  65. sf_veritas/patches/network_libraries/httpx.py +557 -0
  66. sf_veritas/patches/network_libraries/niquests.py +218 -0
  67. sf_veritas/patches/network_libraries/pycurl.py +399 -0
  68. sf_veritas/patches/network_libraries/requests.py +595 -0
  69. sf_veritas/patches/network_libraries/ssl_socket.py +822 -0
  70. sf_veritas/patches/network_libraries/tornado.py +360 -0
  71. sf_veritas/patches/network_libraries/treq.py +270 -0
  72. sf_veritas/patches/network_libraries/urllib_request.py +483 -0
  73. sf_veritas/patches/network_libraries/utils.py +598 -0
  74. sf_veritas/patches/os.py +17 -0
  75. sf_veritas/patches/threading.py +231 -0
  76. sf_veritas/patches/web_frameworks/__init__.py +54 -0
  77. sf_veritas/patches/web_frameworks/aiohttp.py +798 -0
  78. sf_veritas/patches/web_frameworks/async_websocket_consumer.py +337 -0
  79. sf_veritas/patches/web_frameworks/blacksheep.py +532 -0
  80. sf_veritas/patches/web_frameworks/bottle.py +513 -0
  81. sf_veritas/patches/web_frameworks/cherrypy.py +683 -0
  82. sf_veritas/patches/web_frameworks/cors_utils.py +122 -0
  83. sf_veritas/patches/web_frameworks/django.py +963 -0
  84. sf_veritas/patches/web_frameworks/eve.py +401 -0
  85. sf_veritas/patches/web_frameworks/falcon.py +931 -0
  86. sf_veritas/patches/web_frameworks/fastapi.py +738 -0
  87. sf_veritas/patches/web_frameworks/flask.py +526 -0
  88. sf_veritas/patches/web_frameworks/klein.py +501 -0
  89. sf_veritas/patches/web_frameworks/litestar.py +616 -0
  90. sf_veritas/patches/web_frameworks/pyramid.py +440 -0
  91. sf_veritas/patches/web_frameworks/quart.py +841 -0
  92. sf_veritas/patches/web_frameworks/robyn.py +708 -0
  93. sf_veritas/patches/web_frameworks/sanic.py +874 -0
  94. sf_veritas/patches/web_frameworks/starlette.py +742 -0
  95. sf_veritas/patches/web_frameworks/strawberry.py +1446 -0
  96. sf_veritas/patches/web_frameworks/tornado.py +485 -0
  97. sf_veritas/patches/web_frameworks/utils.py +170 -0
  98. sf_veritas/print_override.py +13 -0
  99. sf_veritas/regular_data_transmitter.py +444 -0
  100. sf_veritas/request_interceptor.py +401 -0
  101. sf_veritas/request_utils.py +550 -0
  102. sf_veritas/segfault_handler.py +116 -0
  103. sf_veritas/server_status.py +1 -0
  104. sf_veritas/shutdown_flag.py +11 -0
  105. sf_veritas/subprocess_startup.py +3 -0
  106. sf_veritas/test_cli.py +145 -0
  107. sf_veritas/thread_local.py +1319 -0
  108. sf_veritas/timeutil.py +114 -0
  109. sf_veritas/transmit_exception_to_sailfish.py +28 -0
  110. sf_veritas/transmitter.py +132 -0
  111. sf_veritas/types.py +47 -0
  112. sf_veritas/unified_interceptor.py +1678 -0
  113. sf_veritas/utils.py +39 -0
  114. sf_veritas-0.11.10.dist-info/METADATA +97 -0
  115. sf_veritas-0.11.10.dist-info/RECORD +141 -0
  116. sf_veritas-0.11.10.dist-info/WHEEL +5 -0
  117. sf_veritas-0.11.10.dist-info/entry_points.txt +2 -0
  118. sf_veritas-0.11.10.dist-info/top_level.txt +1 -0
  119. sf_veritas.libs/libbrotlicommon-6ce2a53c.so.1.0.6 +0 -0
  120. sf_veritas.libs/libbrotlidec-811d1be3.so.1.0.6 +0 -0
  121. sf_veritas.libs/libcom_err-730ca923.so.2.1 +0 -0
  122. sf_veritas.libs/libcrypt-52aca757.so.1.1.0 +0 -0
  123. sf_veritas.libs/libcrypto-bdaed0ea.so.1.1.1k +0 -0
  124. sf_veritas.libs/libcurl-eaa3cf66.so.4.5.0 +0 -0
  125. sf_veritas.libs/libgssapi_krb5-323bbd21.so.2.2 +0 -0
  126. sf_veritas.libs/libidn2-2f4a5893.so.0.3.6 +0 -0
  127. sf_veritas.libs/libk5crypto-9a74ff38.so.3.1 +0 -0
  128. sf_veritas.libs/libkeyutils-2777d33d.so.1.6 +0 -0
  129. sf_veritas.libs/libkrb5-a55300e8.so.3.3 +0 -0
  130. sf_veritas.libs/libkrb5support-e6594cfc.so.0.1 +0 -0
  131. sf_veritas.libs/liblber-2-d20824ef.4.so.2.10.9 +0 -0
  132. sf_veritas.libs/libldap-2-cea2a960.4.so.2.10.9 +0 -0
  133. sf_veritas.libs/libnghttp2-39367a22.so.14.17.0 +0 -0
  134. sf_veritas.libs/libpcre2-8-516f4c9d.so.0.7.1 +0 -0
  135. sf_veritas.libs/libpsl-99becdd3.so.5.3.1 +0 -0
  136. sf_veritas.libs/libsasl2-7de4d792.so.3.0.0 +0 -0
  137. sf_veritas.libs/libselinux-d0805dcb.so.1 +0 -0
  138. sf_veritas.libs/libssh-c11d285b.so.4.8.7 +0 -0
  139. sf_veritas.libs/libssl-60250281.so.1.1.1k +0 -0
  140. sf_veritas.libs/libunistring-05abdd40.so.2.1.0 +0 -0
  141. sf_veritas.libs/libuuid-95b83d40.so.1.3.0 +0 -0
@@ -0,0 +1,2791 @@
1
+ // sf_veritas/_sffuncspan.c
2
+ #define PY_SSIZE_T_CLEAN
3
+ #include <Python.h>
4
+ #include <frameobject.h>
5
+ #include <pthread.h>
6
+ #include <curl/curl.h>
7
+ #include <stdatomic.h>
8
+ #include <stdint.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+ #include <time.h>
12
+ #include <sys/time.h>
13
+ #include <sys/stat.h>
14
+ #include <sys/types.h>
15
+ #include <unistd.h>
16
+ #include <fcntl.h>
17
+ #include "sf_tls.h"
18
+ extern void sf_guard_enter(void);
19
+ extern void sf_guard_leave(void);
20
+ extern int sf_guard_active(void);
21
+
22
+ // ---------- Thread-local guard flag to prevent recursive telemetry capture ----------
23
+ __attribute__((visibility("default")))
24
+
25
+ // ---------- External Config System Integration ----------
26
+ // Priority levels (must match _sffuncspan_config.c)
27
+ #define PRIORITY_HTTP_HEADER 1
28
+ #define PRIORITY_DECORATOR 2
29
+ #define PRIORITY_SAILFISH_FUNCTION 3
30
+ #define PRIORITY_PRAGMA 4
31
+ #define PRIORITY_SAILFISH_FILE 5
32
+ #define PRIORITY_DIRECTORY 6
33
+ #define PRIORITY_ENV_VAR 8
34
+ #define PRIORITY_DEFAULT 9
35
+
36
+ // Config structure matches _sffuncspan_config.c
37
+ typedef struct {
38
+ uint8_t include_arguments;
39
+ uint8_t include_return_value;
40
+ uint8_t autocapture_all_children;
41
+ uint8_t priority; // Priority level (lower = higher priority)
42
+ float sample_rate;
43
+ uint32_t arg_limit_mb;
44
+ uint32_t return_limit_mb;
45
+ uint64_t hash;
46
+ } sf_funcspan_config_t;
47
+
48
+ // Python module reference for config system
49
+ static PyObject *g_config_module = NULL;
50
+ static PyObject *g_config_get_func = NULL;
51
+ static PyObject *g_config_has_override_func = NULL; // Check if HTTP header override is active
52
+
53
+ // Fast inline check for HTTP header override (avoids Python call in hot path)
54
+ static inline int has_http_header_override(void) {
55
+ if (!g_config_has_override_func) return 0;
56
+
57
+ PyObject *result = PyObject_CallObject(g_config_has_override_func, NULL);
58
+ if (!result) {
59
+ if (PyErr_Occurred()) PyErr_Clear();
60
+ return 0;
61
+ }
62
+
63
+ int has_override = PyObject_IsTrue(result);
64
+ Py_DECREF(result);
65
+ return has_override;
66
+ }
67
+
68
+ // Default config to use if config system is not available
69
+ static sf_funcspan_config_t g_fallback_config = {
70
+ .include_arguments = 1,
71
+ .include_return_value = 1,
72
+ .autocapture_all_children = 1,
73
+ .priority = PRIORITY_DEFAULT,
74
+ .sample_rate = 1.0f,
75
+ .arg_limit_mb = 1,
76
+ .return_limit_mb = 1,
77
+ .hash = 0
78
+ };
79
+
80
+ // Initialize config system integration (called once at init)
81
+ static void init_config_system(void) {
82
+ // Try to import _sffuncspan_config module
83
+ g_config_module = PyImport_ImportModule("sf_veritas._sffuncspan_config");
84
+ if (g_config_module) {
85
+ // Get the 'get' function
86
+ g_config_get_func = PyObject_GetAttrString(g_config_module, "get");
87
+ if (!g_config_get_func || !PyCallable_Check(g_config_get_func)) {
88
+ Py_XDECREF(g_config_get_func);
89
+ g_config_get_func = NULL;
90
+ }
91
+
92
+ // Get the 'has_thread_override' function
93
+ g_config_has_override_func = PyObject_GetAttrString(g_config_module, "has_thread_override");
94
+ if (!g_config_has_override_func || !PyCallable_Check(g_config_has_override_func)) {
95
+ Py_XDECREF(g_config_has_override_func);
96
+ g_config_has_override_func = NULL;
97
+ }
98
+
99
+ if (!g_config_get_func && !g_config_has_override_func) {
100
+ Py_DECREF(g_config_module);
101
+ g_config_module = NULL;
102
+ fprintf(stderr, "[_sffuncspan] WARNING: Config module imported but functions not found\n");
103
+ } else {
104
+ fprintf(stderr, "[_sffuncspan] Config system initialized successfully\n");
105
+ }
106
+ } else {
107
+ PyErr_Clear(); // Config module not available, use defaults
108
+ fprintf(stderr, "[_sffuncspan] WARNING: Config module not available, using defaults\n");
109
+ }
110
+ }
111
+
112
+ // Thread-local recursion guard to prevent calling Python from within config lookup
113
+ static _Thread_local int g_in_config_lookup = 0;
114
+
115
+ // Thread-local recursion guard to prevent profiling the profiler itself
116
+ static _Thread_local int g_in_profiler = 0;
117
+
118
+ // Thread-local guard for recursive object serialization
119
+ #define SERIALIZE_MAX_DEPTH 32
120
+ static _Thread_local int g_serialize_depth = 0;
121
+ static _Thread_local PyObject *g_serialize_stack[SERIALIZE_MAX_DEPTH];
122
+
123
+ // Debug counter (only log first few lookups)
124
+ static _Atomic int g_debug_lookup_count = 0;
125
+
126
+ // Simple cache for config lookups (to avoid calling Python during argument capture)
127
+ #define CONFIG_CACHE_SIZE 256
128
+ typedef struct {
129
+ uint64_t hash; // Hash of file_path:func_name
130
+ sf_funcspan_config_t config;
131
+ } config_cache_entry_t;
132
+
133
+ static config_cache_entry_t g_config_cache[CONFIG_CACHE_SIZE];
134
+ static pthread_mutex_t g_config_cache_mutex = PTHREAD_MUTEX_INITIALIZER;
135
+
136
+ // Simple string hash function
137
+ static inline uint64_t simple_hash(const char *str1, const char *str2) {
138
+ uint64_t hash = 5381;
139
+ const unsigned char *s = (const unsigned char *)str1;
140
+ while (*s) {
141
+ hash = ((hash << 5) + hash) + *s++;
142
+ }
143
+ s = (const unsigned char *)str2;
144
+ while (*s) {
145
+ hash = ((hash << 5) + hash) + *s++;
146
+ }
147
+ return hash;
148
+ }
149
+
150
+ // Get config for a function by calling Python config system (with cache)
151
+ static inline sf_funcspan_config_t get_function_config(const char *file_path, const char *func_name) {
152
+ uint64_t func_hash, file_hash;
153
+ uint32_t func_cache_idx, file_cache_idx;
154
+ int count;
155
+ PyObject *args = NULL;
156
+ PyObject *result = NULL;
157
+ PyObject *val = NULL;
158
+ sf_funcspan_config_t config;
159
+
160
+ // First, check cache for function-specific config (exact match)
161
+ func_hash = simple_hash(file_path, func_name);
162
+ func_cache_idx = func_hash % CONFIG_CACHE_SIZE;
163
+
164
+ // Check cache first (no lock for read - this is a simple cache, not perfect but fast)
165
+ if (g_config_cache[func_cache_idx].hash == func_hash) {
166
+ count = atomic_fetch_add(&g_debug_lookup_count, 1);
167
+ if (count < 5) {
168
+ fprintf(stderr, "[_sffuncspan] CACHE HIT (func): %s::%s -> args=%d ret=%d\n",
169
+ func_name, file_path,
170
+ g_config_cache[func_cache_idx].config.include_arguments,
171
+ g_config_cache[func_cache_idx].config.include_return_value);
172
+ }
173
+ return g_config_cache[func_cache_idx].config;
174
+ }
175
+
176
+ // Second, check cache for file-level config (using "<MODULE>" as function name)
177
+ file_hash = simple_hash(file_path, "<MODULE>");
178
+ file_cache_idx = file_hash % CONFIG_CACHE_SIZE;
179
+
180
+ if (g_config_cache[file_cache_idx].hash == file_hash) {
181
+ count = atomic_fetch_add(&g_debug_lookup_count, 1);
182
+ if (count < 5) {
183
+ fprintf(stderr, "[_sffuncspan] CACHE HIT (file): %s::%s -> args=%d ret=%d\n",
184
+ func_name, file_path,
185
+ g_config_cache[file_cache_idx].config.include_arguments,
186
+ g_config_cache[file_cache_idx].config.include_return_value);
187
+ }
188
+ return g_config_cache[file_cache_idx].config;
189
+ }
190
+
191
+ // CACHE MISS - try config module (includes HTTP header overrides!)
192
+ if (g_config_get_func && !g_in_config_lookup) {
193
+ // Prevent recursion
194
+ g_in_config_lookup = 1;
195
+
196
+ args = Py_BuildValue("(ss)", file_path, func_name);
197
+ if (args) {
198
+ result = PyObject_CallObject(g_config_get_func, args);
199
+ Py_DECREF(args);
200
+
201
+ if (result && PyDict_Check(result)) {
202
+ config = g_fallback_config;
203
+
204
+ val = PyDict_GetItemString(result, "include_arguments");
205
+ if (val && PyBool_Check(val)) config.include_arguments = (val == Py_True) ? 1 : 0;
206
+
207
+ val = PyDict_GetItemString(result, "include_return_value");
208
+ if (val && PyBool_Check(val)) config.include_return_value = (val == Py_True) ? 1 : 0;
209
+
210
+ val = PyDict_GetItemString(result, "autocapture_all_children");
211
+ if (val && PyBool_Check(val)) config.autocapture_all_children = (val == Py_True) ? 1 : 0;
212
+
213
+ val = PyDict_GetItemString(result, "arg_limit_mb");
214
+ if (val && PyLong_Check(val)) config.arg_limit_mb = (uint32_t)PyLong_AsLong(val);
215
+
216
+ val = PyDict_GetItemString(result, "return_limit_mb");
217
+ if (val && PyLong_Check(val)) config.return_limit_mb = (uint32_t)PyLong_AsLong(val);
218
+
219
+ val = PyDict_GetItemString(result, "sample_rate");
220
+ if (val && PyFloat_Check(val)) config.sample_rate = (float)PyFloat_AsDouble(val);
221
+
222
+ // DON'T cache configs from _sffuncspan_config.get() because they include
223
+ // thread-local HTTP header overrides that change per-request.
224
+ // Only cache configs that were pre-populated via cache_config().
225
+
226
+ Py_DECREF(result);
227
+ g_in_config_lookup = 0;
228
+ return config;
229
+ }
230
+
231
+ Py_XDECREF(result);
232
+ if (PyErr_Occurred()) PyErr_Clear();
233
+ }
234
+ g_in_config_lookup = 0;
235
+ }
236
+
237
+ // Fallback to defaults
238
+ count = atomic_fetch_add(&g_debug_lookup_count, 1);
239
+ if (count < 5) {
240
+ fprintf(stderr, "[_sffuncspan] CACHE MISS: %s::%s - using fallback config\n", func_name, file_path);
241
+ }
242
+ return g_fallback_config;
243
+ }
244
+
245
+ // Compatibility for Python 3.8
246
+ #if PY_VERSION_HEX < 0x03090000 // Python < 3.9
247
+ static inline PyCodeObject* PyFrame_GetCode(PyFrameObject *frame) {
248
+ PyCodeObject *code = frame->f_code;
249
+ Py_INCREF(code);
250
+ return code;
251
+ }
252
+ #endif
253
+
254
+ // ---------- Ring buffer ----------
255
+ #ifndef SFFS_RING_CAP
256
+ #define SFFS_RING_CAP 524288 // 512K slots for high-throughput (was 64K)
257
+ #endif
258
+
259
+ typedef struct {
260
+ char *body; // malloc'd HTTP JSON body
261
+ size_t len;
262
+ } sffs_msg_t;
263
+
264
+ static sffs_msg_t *g_ring = NULL;
265
+ static size_t g_cap = 0;
266
+ static _Atomic size_t g_head = 0; // consumer
267
+ static _Atomic size_t g_tail = 0; // producer
268
+
269
+ // tiny spinlock to make push MPMC-safe enough for Python producers
270
+ static atomic_flag g_push_lock = ATOMIC_FLAG_INIT;
271
+
272
+ // wake/sleep
273
+ static pthread_mutex_t g_cv_mtx = PTHREAD_MUTEX_INITIALIZER;
274
+ static pthread_cond_t g_cv = PTHREAD_COND_INITIALIZER;
275
+ static _Atomic int g_running = 0;
276
+
277
+ // Thread pool for concurrent senders (configurable via SF_FUNCSPAN_SENDER_THREADS)
278
+ #define MAX_SENDER_THREADS 16
279
+ static pthread_t g_sender_threads[MAX_SENDER_THREADS];
280
+ static int g_num_sender_threads = 0;
281
+
282
+ // curl state - per-thread handles for concurrent HTTP requests
283
+ __thread CURL *g_telem_curl = NULL;
284
+ static struct curl_slist *g_hdrs = NULL;
285
+
286
+ // config (owned strings)
287
+ static char *g_url = NULL;
288
+ static char *g_func_span_query_escaped = NULL;
289
+ static char *g_json_prefix_func_span = NULL;
290
+ static char *g_api_key = NULL;
291
+ static char *g_service_uuid = NULL;
292
+ static char *g_library = NULL;
293
+ static char *g_version = NULL;
294
+ static int g_http2 = 0;
295
+
296
+ // Function span configuration
297
+ static size_t g_variable_capture_size_limit_bytes = 1048576; // 1MB default
298
+ static PyObject *g_capture_from_installed_libraries = NULL; // list of strings or NULL
299
+
300
+ // Sampling configuration for ultra-low overhead
301
+ static _Atomic uint64_t g_sample_counter = 0;
302
+ static uint64_t g_sample_rate = 1; // 1 = capture all, 100 = capture 1/100, 10000 = capture 1/10000
303
+ static int g_enable_sampling = 0; // 0 = disabled (capture all by default), 1 = enabled
304
+
305
+ // Master kill switch from SF_ENABLE_FUNCTION_SPANS env var (default: TRUE)
306
+ // When disabled, profiler hooks run but skip ALL expensive work (config, capture, transmission)
307
+ // NOTHING can override this (not headers, not decorators, nothing)
308
+ static int g_enable_function_spans = 1; // Default: enabled
309
+
310
+ // Debug flag from environment (set in py_init)
311
+ static int SF_DEBUG = 1;
312
+
313
+ // Serialization configuration
314
+ static int g_parse_json_strings = 1; // 1 = auto-parse JSON strings, 0 = keep as strings
315
+
316
+ // Capture control - granular configuration
317
+ static int g_capture_arguments = 1; // 1 = capture arguments, 0 = skip
318
+ static int g_capture_return_value = 1; // 1 = capture return value, 0 = skip
319
+ static size_t g_arg_limit_bytes = 1048576; // 1MB default for arguments
320
+ static size_t g_return_limit_bytes = 1048576; // 1MB default for return values
321
+
322
+ // Django view function filtering
323
+ static int g_include_django_view_functions = 0; // 0 = skip Django view functions (default), 1 = include them
324
+
325
+ // Installed packages filtering - controlled by SF_FUNCSPAN_CAPTURE_INSTALLED_PACKAGES
326
+ static int g_capture_installed_packages = 0; // 0 = skip site-packages/stdlib (default), 1 = capture them
327
+
328
+ // SF Veritas self-capture - controlled by SF_FUNCSPAN_CAPTURE_SF_VERITAS
329
+ static int g_capture_sf_veritas = 0; // 0 = skip sf_veritas (default), 1 = capture our own telemetry code
330
+
331
+ // Performance monitoring
332
+ static _Atomic uint64_t g_spans_recorded = 0;
333
+ static _Atomic uint64_t g_spans_sampled_out = 0;
334
+ static _Atomic uint64_t g_spans_dropped = 0;
335
+
336
+ static const char *JSON_SUFFIX = "}}";
337
+
338
+ // Span ID management - thread-local storage for span stack
339
+ static pthread_key_t g_span_stack_key;
340
+ static pthread_once_t g_span_stack_key_once = PTHREAD_ONCE_INIT;
341
+
342
+ // UUID4-based Span ID Ring Buffer (pre-generated for zero-allocation hot path)
343
+ // Each UUID4 string: "xxxxxxxx-xxxx-4xxx-xxxx-xxxxxxxxxxxx" = 36 chars + null = 37 bytes
344
+ // Buffer size: 10,000 UUIDs = ~370KB (much smaller than 1MB trace_id buffer)
345
+ #define SPAN_UUID_BUFFER_SIZE 10000
346
+ #define SPAN_UUID_REFILL_THRESHOLD 100 // Refill when < 100 UUIDs remain
347
+ #define SPAN_UUID_BATCH_SIZE 100 // Generate 100 at a time
348
+
349
+ typedef struct {
350
+ char uuid[37]; // UUID4 string with null terminator
351
+ } span_uuid_entry_t;
352
+
353
+ static span_uuid_entry_t g_span_uuid_buffer[SPAN_UUID_BUFFER_SIZE];
354
+ static _Atomic size_t g_span_uuid_head = 0; // Read position
355
+ static _Atomic size_t g_span_uuid_tail = 0; // Write position
356
+ static pthread_mutex_t g_span_uuid_lock = PTHREAD_MUTEX_INITIALIZER;
357
+ static pthread_t g_span_uuid_worker_thread;
358
+ static _Atomic int g_span_uuid_worker_running = 0;
359
+ static int g_urandom_fd = -1; // Persistent /dev/urandom file descriptor
360
+ static pthread_mutex_t g_urandom_fd_lock = PTHREAD_MUTEX_INITIALIZER;
361
+
362
+ // Span stack entry
363
+ typedef struct span_entry {
364
+ char *span_id;
365
+ struct span_entry *next;
366
+ } span_entry_t;
367
+
368
+ // Python ContextVar and setter function for async-safe span ID sync
369
+ // These are initialized in py_init() and used in push_span()/pop_span()
370
+ // to sync the C thread-local span stack to Python's async-safe ContextVar
371
+ static PyObject *g_current_span_id_contextvar = NULL;
372
+ static PyObject *g_set_current_span_id_func = NULL;
373
+ static PyObject *g_get_trace_id_func = NULL;
374
+
375
+ // ---------- Helpers for epoch nanoseconds ----------
376
+ static inline uint64_t now_epoch_ns(void) {
377
+ struct timespec ts;
378
+ clock_gettime(CLOCK_REALTIME, &ts);
379
+ return ((uint64_t)ts.tv_sec) * 1000000000ULL + (uint64_t)ts.tv_nsec;
380
+ }
381
+
382
+ static inline uint64_t now_ms(void) {
383
+ #if defined(CLOCK_REALTIME_COARSE)
384
+ struct timespec ts;
385
+ clock_gettime(CLOCK_REALTIME_COARSE, &ts);
386
+ return ((uint64_t)ts.tv_sec) * 1000ULL + (uint64_t)(ts.tv_nsec / 1000000ULL);
387
+ #else
388
+ struct timeval tv;
389
+ gettimeofday(&tv, NULL);
390
+ return ((uint64_t)tv.tv_sec) * 1000ULL + (uint64_t)(tv.tv_usec / 1000ULL);
391
+ #endif
392
+ }
393
+
394
+ static char *str_dup(const char *s) {
395
+ size_t n = strlen(s);
396
+ char *p = (char*)malloc(n + 1);
397
+ if (!p) return NULL;
398
+ memcpy(p, s, n);
399
+ p[n] = 0;
400
+ return p;
401
+ }
402
+
403
+ // escape for generic JSON string fields
404
+ static char *json_escape(const char *s) {
405
+ const unsigned char *in = (const unsigned char*)s;
406
+ size_t extra = 0;
407
+ for (const unsigned char *p = in; *p; ++p) {
408
+ switch (*p) {
409
+ case '\\': case '"': extra++; break;
410
+ default:
411
+ if (*p < 0x20) extra += 5; // \u00XX
412
+ }
413
+ }
414
+ size_t inlen = strlen(s);
415
+ char *out = (char*)malloc(inlen + extra + 1);
416
+ if (!out) return NULL;
417
+
418
+ char *o = out;
419
+ for (const unsigned char *p = in; *p; ++p) {
420
+ switch (*p) {
421
+ case '\\': *o++='\\'; *o++='\\'; break;
422
+ case '"': *o++='\\'; *o++='"'; break;
423
+ default:
424
+ if (*p < 0x20) {
425
+ static const char hex[] = "0123456789abcdef";
426
+ *o++='\\'; *o++='u'; *o++='0'; *o++='0';
427
+ *o++=hex[(*p)>>4]; *o++=hex[(*p)&0xF];
428
+ } else {
429
+ *o++ = (char)*p;
430
+ }
431
+ }
432
+ }
433
+ *o = 0;
434
+ return out;
435
+ }
436
+
437
+ // escape for the GraphQL "query" string (handle \n, \r, \t too)
438
+ static char *json_escape_query(const char *s) {
439
+ const unsigned char *in = (const unsigned char*)s;
440
+ size_t extra = 0;
441
+ for (const unsigned char *p = in; *p; ++p) {
442
+ switch (*p) {
443
+ case '\\': case '"': case '\n': case '\r': case '\t': extra++; break;
444
+ default: break;
445
+ }
446
+ }
447
+ size_t inlen = strlen(s);
448
+ char *out = (char*)malloc(inlen + extra + 1);
449
+ if (!out) return NULL;
450
+ char *o = out;
451
+ for (const unsigned char *p = in; *p; ++p) {
452
+ switch (*p) {
453
+ case '\\': *o++='\\'; *o++='\\'; break;
454
+ case '"': *o++='\\'; *o++='"'; break;
455
+ case '\n': *o++='\\'; *o++='n'; break;
456
+ case '\r': *o++='\\'; *o++='r'; break;
457
+ case '\t': *o++='\\'; *o++='t'; break;
458
+ default: *o++=(char)*p;
459
+ }
460
+ }
461
+ *o=0;
462
+ return out;
463
+ }
464
+
465
+ // generic prefix builder for a given escaped query
466
+ static int build_prefix_for_query(const char *query_escaped, char **out_prefix) {
467
+ const char *p1 = "{\"query\":\"";
468
+ const char *p2 = "\",\"variables\":{";
469
+ const char *k1 = "\"apiKey\":\"";
470
+ const char *k2 = "\",\"serviceUuid\":\"";
471
+ const char *k3 = "\",\"library\":\"";
472
+ const char *k4 = "\",\"version\":\"";
473
+
474
+ size_t n = strlen(p1) + strlen(query_escaped) + strlen(p2)
475
+ + strlen(k1) + strlen(g_api_key)
476
+ + strlen(k2) + strlen(g_service_uuid)
477
+ + strlen(k3) + strlen(g_library)
478
+ + strlen(k4) + strlen(g_version) + 5;
479
+
480
+ char *prefix = (char*)malloc(n);
481
+ if (!prefix) return 0;
482
+
483
+ char *o = prefix;
484
+ o += sprintf(o, "%s%s%s", p1, query_escaped, p2);
485
+ o += sprintf(o, "%s%s", k1, g_api_key);
486
+ o += sprintf(o, "%s%s", k2, g_service_uuid);
487
+ o += sprintf(o, "%s%s", k3, g_library);
488
+ o += sprintf(o, "%s%s\"", k4, g_version);
489
+ *o = '\0';
490
+
491
+ *out_prefix = prefix;
492
+ return 1;
493
+ }
494
+
495
+ // ---------- UUID4 Generation for Span IDs ----------
496
+
497
+ // Generate a single RFC 4122 UUID4 string
498
+ // Format: "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx" where y is [8,9,a,b]
499
+ static void generate_uuid4(char *out) {
500
+ unsigned char bytes[16];
501
+ int got_random = 0;
502
+
503
+ // Use persistent /dev/urandom fd (thread-safe with lock)
504
+ pthread_mutex_lock(&g_urandom_fd_lock);
505
+ if (g_urandom_fd >= 0) {
506
+ ssize_t n = read(g_urandom_fd, bytes, 16);
507
+ if (n == 16) {
508
+ got_random = 1;
509
+ }
510
+ }
511
+ pthread_mutex_unlock(&g_urandom_fd_lock);
512
+
513
+ // Fallback to time-based randomness if read failed
514
+ if (!got_random) {
515
+ uint64_t t = now_epoch_ns();
516
+ for (int i = 0; i < 16; i++) {
517
+ bytes[i] = (unsigned char)(t >> (i * 8));
518
+ }
519
+ }
520
+
521
+ // Set version (4) and variant (RFC 4122) bits
522
+ bytes[6] = (bytes[6] & 0x0F) | 0x40; // Version 4
523
+ bytes[8] = (bytes[8] & 0x3F) | 0x80; // Variant RFC 4122
524
+
525
+ // Format as "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
526
+ static const char hex[] = "0123456789abcdef";
527
+ char *p = out;
528
+ for (int i = 0; i < 16; i++) {
529
+ if (i == 4 || i == 6 || i == 8 || i == 10) {
530
+ *p++ = '-';
531
+ }
532
+ *p++ = hex[bytes[i] >> 4];
533
+ *p++ = hex[bytes[i] & 0x0F];
534
+ }
535
+ *p = '\0';
536
+ }
537
+
538
+ // Pop a UUID from the ring buffer (called on hot path)
539
+ // Returns 1 if successful, 0 if buffer empty
540
+ static int pop_span_uuid(char *out) {
541
+ size_t head = atomic_load(&g_span_uuid_head);
542
+ size_t tail = atomic_load(&g_span_uuid_tail);
543
+
544
+ // Check if buffer is empty
545
+ if (head == tail) {
546
+ // Buffer empty - generate directly (should be rare)
547
+ generate_uuid4(out);
548
+ return 1;
549
+ }
550
+
551
+ // Pop from head (lock-free read)
552
+ size_t idx = head % SPAN_UUID_BUFFER_SIZE;
553
+ memcpy(out, g_span_uuid_buffer[idx].uuid, 37);
554
+
555
+ // Advance head
556
+ atomic_store(&g_span_uuid_head, head + 1);
557
+
558
+ return 1;
559
+ }
560
+
561
+ // Check buffer level and return number of available UUIDs
562
+ static size_t span_uuid_buffer_available(void) {
563
+ size_t head = atomic_load(&g_span_uuid_head);
564
+ size_t tail = atomic_load(&g_span_uuid_tail);
565
+
566
+ if (tail >= head) {
567
+ return tail - head;
568
+ }
569
+ // Handle wraparound
570
+ return SPAN_UUID_BUFFER_SIZE - (head - tail);
571
+ }
572
+
573
+ // Push a batch of UUIDs to the ring buffer (called by worker thread)
574
+ static void push_span_uuid_batch(size_t count) {
575
+ pthread_mutex_lock(&g_span_uuid_lock);
576
+
577
+ size_t tail = atomic_load(&g_span_uuid_tail);
578
+
579
+ for (size_t i = 0; i < count; i++) {
580
+ size_t idx = tail % SPAN_UUID_BUFFER_SIZE;
581
+ generate_uuid4(g_span_uuid_buffer[idx].uuid);
582
+ tail++;
583
+
584
+ // Don't overwrite if buffer is full
585
+ size_t head = atomic_load(&g_span_uuid_head);
586
+ if (tail - head >= SPAN_UUID_BUFFER_SIZE) {
587
+ break;
588
+ }
589
+ }
590
+
591
+ atomic_store(&g_span_uuid_tail, tail);
592
+ pthread_mutex_unlock(&g_span_uuid_lock);
593
+ }
594
+
595
+ // Background worker thread that maintains the UUID buffer
596
+ static void* span_uuid_worker(void *arg) {
597
+ (void)arg;
598
+
599
+ while (atomic_load(&g_span_uuid_worker_running)) {
600
+ size_t available = span_uuid_buffer_available();
601
+
602
+ // Refill if below threshold
603
+ if (available < SPAN_UUID_REFILL_THRESHOLD) {
604
+ size_t needed = SPAN_UUID_BUFFER_SIZE - available;
605
+ size_t batch = needed < SPAN_UUID_BATCH_SIZE ? needed : SPAN_UUID_BATCH_SIZE;
606
+ push_span_uuid_batch(batch);
607
+ }
608
+
609
+ // Sleep for 10ms before checking again
610
+ usleep(10000);
611
+ }
612
+
613
+ return NULL;
614
+ }
615
+
616
+ // Initialize the UUID buffer and start worker thread
617
+ static void init_span_uuid_buffer(void) {
618
+ // Open /dev/urandom once for reuse (avoids opening/closing 10k times)
619
+ g_urandom_fd = open("/dev/urandom", O_RDONLY);
620
+ if (g_urandom_fd < 0) {
621
+ fprintf(stderr, "[_sffuncspan] WARNING: Failed to open /dev/urandom, using time-based fallback\n");
622
+ fflush(stderr);
623
+ }
624
+
625
+ // Pre-fill buffer with 10,000 UUIDs
626
+ push_span_uuid_batch(SPAN_UUID_BUFFER_SIZE);
627
+
628
+ // Start worker thread
629
+ atomic_store(&g_span_uuid_worker_running, 1);
630
+ pthread_create(&g_span_uuid_worker_thread, NULL, span_uuid_worker, NULL);
631
+ }
632
+
633
+ // Shutdown the UUID worker thread
634
+ static void shutdown_span_uuid_buffer(void) {
635
+ // Stop worker thread
636
+ atomic_store(&g_span_uuid_worker_running, 0);
637
+ pthread_join(g_span_uuid_worker_thread, NULL);
638
+
639
+ // Close /dev/urandom fd
640
+ if (g_urandom_fd >= 0) {
641
+ close(g_urandom_fd);
642
+ g_urandom_fd = -1;
643
+ }
644
+ }
645
+
646
+ // ---------- Span stack management ----------
647
+ static void init_span_stack_key(void) {
648
+ pthread_key_create(&g_span_stack_key, NULL);
649
+ }
650
+
651
+ static span_entry_t* get_span_stack(void) {
652
+ pthread_once(&g_span_stack_key_once, init_span_stack_key);
653
+ return (span_entry_t*)pthread_getspecific(g_span_stack_key);
654
+ }
655
+
656
+ static void set_span_stack(span_entry_t *stack) {
657
+ pthread_once(&g_span_stack_key_once, init_span_stack_key);
658
+ pthread_setspecific(g_span_stack_key, stack);
659
+ }
660
+
661
+ static char* generate_span_id(void) {
662
+ // Allocate buffer for UUID4 string (36 chars + null terminator)
663
+ char *span_id = (char*)malloc(37);
664
+ if (!span_id) return NULL;
665
+
666
+ // Pop UUID from ring buffer (lock-free, pre-generated)
667
+ pop_span_uuid(span_id);
668
+
669
+ return span_id;
670
+ }
671
+
672
+ static void push_span(const char *span_id) {
673
+ span_entry_t *entry = (span_entry_t*)malloc(sizeof(span_entry_t));
674
+ if (!entry) return;
675
+ entry->span_id = str_dup(span_id);
676
+ entry->next = get_span_stack();
677
+ set_span_stack(entry);
678
+
679
+ // Sync to Python ContextVar for async-safety
680
+ // This ensures async tasks on the same thread see isolated span IDs
681
+ if (g_set_current_span_id_func && span_id) {
682
+ PyObject *args = Py_BuildValue("(s)", span_id);
683
+ if (args) {
684
+ PyObject *result = PyObject_CallObject(g_set_current_span_id_func, args);
685
+ Py_XDECREF(result);
686
+ Py_DECREF(args);
687
+ if (PyErr_Occurred()) {
688
+ PyErr_Clear(); // Don't let ContextVar errors break profiling
689
+ }
690
+ }
691
+ }
692
+ }
693
+
694
+ static char* pop_span(void) {
695
+ span_entry_t *stack = get_span_stack();
696
+ if (!stack) {
697
+ // Stack is empty - sync None to ContextVar
698
+ if (g_set_current_span_id_func) {
699
+ PyObject *args = Py_BuildValue("(O)", Py_None);
700
+ if (args) {
701
+ PyObject *result = PyObject_CallObject(g_set_current_span_id_func, args);
702
+ Py_XDECREF(result);
703
+ Py_DECREF(args);
704
+ if (PyErr_Occurred()) {
705
+ PyErr_Clear(); // Don't let ContextVar errors break profiling
706
+ }
707
+ }
708
+ }
709
+ return NULL;
710
+ }
711
+
712
+ char *span_id = stack->span_id;
713
+ span_entry_t *next = stack->next;
714
+ free(stack);
715
+ set_span_stack(next);
716
+
717
+ // Sync parent span ID (or None) to ContextVar for async-safety
718
+ if (g_set_current_span_id_func) {
719
+ const char *parent_id = next ? next->span_id : NULL;
720
+ PyObject *args = parent_id ? Py_BuildValue("(s)", parent_id) : Py_BuildValue("(O)", Py_None);
721
+ if (args) {
722
+ PyObject *result = PyObject_CallObject(g_set_current_span_id_func, args);
723
+ Py_XDECREF(result);
724
+ Py_DECREF(args);
725
+ if (PyErr_Occurred()) {
726
+ PyErr_Clear(); // Don't let ContextVar errors break profiling
727
+ }
728
+ }
729
+ }
730
+
731
+ return span_id;
732
+ }
733
+
734
+ static char* peek_parent_span_id(void) {
735
+ span_entry_t *stack = get_span_stack();
736
+ if (!stack) return NULL;
737
+ return stack->span_id ? str_dup(stack->span_id) : NULL;
738
+ }
739
+
740
+ // ---------- Build function span body ----------
741
+ static int build_body_func_span(
742
+ const char *session_id,
743
+ const char *span_id,
744
+ const char *parent_span_id,
745
+ const char *file_path,
746
+ int line_number,
747
+ int column_number,
748
+ const char *function_name,
749
+ const char *arguments_json,
750
+ const char *return_value_json,
751
+ uint64_t start_time_ns,
752
+ uint64_t duration_ns,
753
+ char **out_body,
754
+ size_t *out_len
755
+ ) {
756
+ // Escape all string fields
757
+ char *sid_esc = json_escape(session_id ? session_id : "");
758
+ char *spanid_esc = json_escape(span_id ? span_id : "");
759
+ char *pspanid_esc = parent_span_id ? json_escape(parent_span_id) : NULL;
760
+ char *file_esc = json_escape(file_path ? file_path : "");
761
+ char *func_esc = json_escape(function_name ? function_name : "");
762
+ char *args_esc = json_escape(arguments_json ? arguments_json : "{}");
763
+ char *ret_esc = return_value_json ? json_escape(return_value_json) : NULL;
764
+
765
+ if (!sid_esc || !spanid_esc || !file_esc || !func_esc || !args_esc) {
766
+ free(sid_esc); free(spanid_esc); free(pspanid_esc); free(file_esc);
767
+ free(func_esc); free(args_esc); free(ret_esc);
768
+ return 0;
769
+ }
770
+
771
+ uint64_t tms = now_ms();
772
+ const char *k_sid = ",\"sessionId\":\"";
773
+ const char *k_spanid = ",\"spanId\":\"";
774
+ const char *k_pspanid = ",\"parentSpanId\":\"";
775
+ const char *k_pspanid_null = ",\"parentSpanId\":null";
776
+ const char *k_file = ",\"filePath\":\"";
777
+ const char *k_line = ",\"lineNumber\":";
778
+ const char *k_col = ",\"columnNumber\":";
779
+ const char *k_func = ",\"functionName\":\"";
780
+ const char *k_args = ",\"arguments\":\"";
781
+ const char *k_ret = ",\"returnValue\":\"";
782
+ const char *k_ret_null = ",\"returnValue\":null";
783
+ const char *k_start = ",\"startTimeNs\":\"";
784
+ const char *k_dur = ",\"durationNs\":\"";
785
+ const char *k_ts = ",\"timestampMs\":\"";
786
+
787
+ char ts_buf[32], line_buf[16], col_buf[16], start_buf[32], dur_buf[32];
788
+ snprintf(ts_buf, sizeof(ts_buf), "%llu", (unsigned long long)tms);
789
+ snprintf(line_buf, sizeof(line_buf), "%d", line_number);
790
+ snprintf(col_buf, sizeof(col_buf), "%d", column_number);
791
+ snprintf(start_buf, sizeof(start_buf), "%llu", (unsigned long long)start_time_ns);
792
+ snprintf(dur_buf, sizeof(dur_buf), "%llu", (unsigned long long)duration_ns);
793
+
794
+ if (!g_json_prefix_func_span) {
795
+ free(sid_esc); free(spanid_esc); free(pspanid_esc); free(file_esc);
796
+ free(func_esc); free(args_esc); free(ret_esc);
797
+ return 0;
798
+ }
799
+
800
+ size_t len = strlen(g_json_prefix_func_span)
801
+ + strlen(k_sid) + strlen(sid_esc)
802
+ + strlen(k_spanid) + strlen(spanid_esc)
803
+ + (pspanid_esc ? (strlen(k_pspanid) + strlen(pspanid_esc)) : strlen(k_pspanid_null))
804
+ + strlen(k_file) + strlen(file_esc)
805
+ + strlen(k_line) + strlen(line_buf)
806
+ + strlen(k_col) + strlen(col_buf)
807
+ + strlen(k_func) + strlen(func_esc)
808
+ + strlen(k_args) + strlen(args_esc)
809
+ + (ret_esc ? (strlen(k_ret) + strlen(ret_esc)) : strlen(k_ret_null))
810
+ + strlen(k_start) + strlen(start_buf) + 1 // +1 for closing quote
811
+ + strlen(k_dur) + strlen(dur_buf) + 1 // +1 for closing quote
812
+ + strlen(k_ts) + strlen(ts_buf) + 1 // +1 for closing quote
813
+ + strlen(JSON_SUFFIX) + 10;
814
+
815
+ char *body = (char*)malloc(len);
816
+ if (!body) {
817
+ free(sid_esc); free(spanid_esc); free(pspanid_esc); free(file_esc);
818
+ free(func_esc); free(args_esc); free(ret_esc);
819
+ return 0;
820
+ }
821
+
822
+ char *o = body;
823
+ o += sprintf(o, "%s", g_json_prefix_func_span);
824
+ o += sprintf(o, "%s%s\"", k_sid, sid_esc);
825
+ o += sprintf(o, "%s%s\"", k_spanid, spanid_esc);
826
+ if (pspanid_esc) {
827
+ o += sprintf(o, "%s%s\"", k_pspanid, pspanid_esc);
828
+ } else {
829
+ o += sprintf(o, "%s", k_pspanid_null);
830
+ }
831
+ o += sprintf(o, "%s%s\"", k_file, file_esc);
832
+ o += sprintf(o, "%s%s", k_line, line_buf);
833
+ o += sprintf(o, "%s%s", k_col, col_buf);
834
+ o += sprintf(o, "%s%s\"", k_func, func_esc);
835
+ o += sprintf(o, "%s%s\"", k_args, args_esc);
836
+ if (ret_esc) {
837
+ o += sprintf(o, "%s%s\"", k_ret, ret_esc);
838
+ } else {
839
+ o += sprintf(o, "%s", k_ret_null);
840
+ }
841
+ o += sprintf(o, "%s%s\"", k_start, start_buf);
842
+ o += sprintf(o, "%s%s\"", k_dur, dur_buf);
843
+ o += sprintf(o, "%s%s\"", k_ts, ts_buf);
844
+ o += sprintf(o, "%s", JSON_SUFFIX);
845
+ *o = '\0';
846
+
847
+ *out_body = body;
848
+ *out_len = (size_t)(o - body);
849
+
850
+ free(sid_esc); free(spanid_esc); free(pspanid_esc); free(file_esc);
851
+ free(func_esc); free(args_esc); free(ret_esc);
852
+ return 1;
853
+ }
854
+
855
+ // ---------- Sampling ----------
856
+ static inline int should_sample(void) {
857
+ // Special case: sample_rate=0 means "never capture" (hard disable)
858
+ // This is checked via g_sample_rate which stores (1.0 / sample_rate)
859
+ // When sample_rate=0, g_sample_rate is set to a very large value (see configure)
860
+ // We use UINT64_MAX as a sentinel value for sample_rate=0
861
+ if (g_sample_rate == UINT64_MAX) {
862
+ atomic_fetch_add(&g_spans_sampled_out, 1);
863
+ return 0; // Never capture when sample_rate=0
864
+ }
865
+
866
+ if (!g_enable_sampling) {
867
+ return 1; // Sampling disabled, capture all
868
+ }
869
+
870
+ // Fast path: atomic increment and modulo check
871
+ uint64_t counter = atomic_fetch_add(&g_sample_counter, 1);
872
+ if (counter % g_sample_rate == 0) {
873
+ return 1; // This one gets sampled
874
+ }
875
+
876
+ atomic_fetch_add(&g_spans_sampled_out, 1);
877
+ return 0; // Skip this one
878
+ }
879
+
880
+ // ---------- ring ops ----------
881
+ static inline size_t ring_count(void) {
882
+ size_t h = atomic_load_explicit(&g_head, memory_order_acquire);
883
+ size_t t = atomic_load_explicit(&g_tail, memory_order_acquire);
884
+ return t - h;
885
+ }
886
+ static inline int ring_empty(void) { return ring_count() == 0; }
887
+
888
+ static int ring_push(char *body, size_t len) {
889
+ while (atomic_flag_test_and_set_explicit(&g_push_lock, memory_order_acquire)) {
890
+ // brief spin
891
+ }
892
+ size_t t = atomic_load_explicit(&g_tail, memory_order_relaxed);
893
+ size_t h = atomic_load_explicit(&g_head, memory_order_acquire);
894
+ if ((t - h) >= g_cap) {
895
+ atomic_flag_clear_explicit(&g_push_lock, memory_order_release);
896
+ atomic_fetch_add(&g_spans_dropped, 1); // Track dropped spans
897
+ return 0; // full (drop)
898
+ }
899
+ size_t idx = t % g_cap;
900
+ g_ring[idx].body = body;
901
+ g_ring[idx].len = len;
902
+ atomic_store_explicit(&g_tail, t + 1, memory_order_release);
903
+ atomic_flag_clear_explicit(&g_push_lock, memory_order_release);
904
+
905
+ atomic_fetch_add(&g_spans_recorded, 1); // Track recorded spans
906
+
907
+ pthread_mutex_lock(&g_cv_mtx);
908
+ pthread_cond_signal(&g_cv);
909
+ pthread_mutex_unlock(&g_cv_mtx);
910
+ return 1;
911
+ }
912
+
913
+ static int ring_pop(char **body, size_t *len) {
914
+ size_t h = atomic_load_explicit(&g_head, memory_order_relaxed);
915
+ size_t t = atomic_load_explicit(&g_tail, memory_order_acquire);
916
+ if (h == t) return 0;
917
+ size_t idx = h % g_cap;
918
+ *body = g_ring[idx].body;
919
+ *len = g_ring[idx].len;
920
+ g_ring[idx].body = NULL;
921
+ g_ring[idx].len = 0;
922
+ atomic_store_explicit(&g_head, h + 1, memory_order_release);
923
+ return 1;
924
+ }
925
+
926
+ // ---------- curl sink callbacks ----------
927
+ static size_t _sink_write(char *ptr, size_t size, size_t nmemb, void *userdata) {
928
+ (void)ptr; (void)userdata;
929
+ return size * nmemb;
930
+ }
931
+ static size_t _sink_header(char *ptr, size_t size, size_t nmemb, void *userdata) {
932
+ (void)ptr; (void)userdata;
933
+ return size * nmemb;
934
+ }
935
+
936
+ // ---------- pthread cleanup handler for sender threads ----------
937
+ // ---------- sender thread ----------
938
+ static void *sender_main(void *arg) {
939
+ (void)arg;
940
+
941
+ pthread_t tid = pthread_self();
942
+ fprintf(stderr, "[_sffuncspan] Sender thread started (tid=%lu)\n", (unsigned long)tid);
943
+ fflush(stderr);
944
+
945
+ // CRITICAL: Set thread-local guard flag to prevent recursive capture
946
+ sf_guard_enter();
947
+
948
+ // Initialize per-thread curl handle
949
+ fprintf(stderr, "[_sffuncspan] Initializing libcurl handle (tid=%lu)...\n", (unsigned long)tid);
950
+ fflush(stderr);
951
+
952
+ g_telem_curl = curl_easy_init();
953
+ if (!g_telem_curl) {
954
+ fprintf(stderr, "[_sffuncspan] ERROR: curl_easy_init() failed (tid=%lu)\n", (unsigned long)tid);
955
+ fflush(stderr);
956
+ sf_guard_leave();
957
+ return NULL;
958
+ }
959
+
960
+ fprintf(stderr, "[_sffuncspan] Curl handle initialized, configuring URL=%s (tid=%lu)\n", g_url ? g_url : "(null)", (unsigned long)tid);
961
+ fflush(stderr);
962
+
963
+ // Configure per-thread curl handle
964
+ curl_easy_setopt(g_telem_curl, CURLOPT_URL, g_url);
965
+ curl_easy_setopt(g_telem_curl, CURLOPT_TCP_KEEPALIVE, 1L);
966
+ curl_easy_setopt(g_telem_curl, CURLOPT_TCP_NODELAY, 1L); // NEW: Disable Nagle for immediate sends
967
+ curl_easy_setopt(g_telem_curl, CURLOPT_HTTPHEADER, g_hdrs);
968
+ #ifdef CURL_HTTP_VERSION_2TLS
969
+ if (g_http2) {
970
+ curl_easy_setopt(g_telem_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS);
971
+ }
972
+ #endif
973
+ // CRITICAL: Disable SSL verification for self-signed certs (dev/test environments)
974
+ curl_easy_setopt(g_telem_curl, CURLOPT_SSL_VERIFYPEER, 0L);
975
+ curl_easy_setopt(g_telem_curl, CURLOPT_SSL_VERIFYHOST, 0L);
976
+ curl_easy_setopt(g_telem_curl, CURLOPT_WRITEFUNCTION, _sink_write);
977
+ curl_easy_setopt(g_telem_curl, CURLOPT_HEADERFUNCTION, _sink_header);
978
+
979
+ fprintf(stderr, "[_sffuncspan] Sender thread entering main loop (tid=%lu)\n", (unsigned long)tid);
980
+ fflush(stderr);
981
+
982
+ static _Atomic int spans_sent = 0;
983
+
984
+ while (atomic_load(&g_running)) {
985
+ if (ring_empty()) {
986
+ pthread_mutex_lock(&g_cv_mtx);
987
+ if (ring_empty() && atomic_load(&g_running))
988
+ pthread_cond_wait(&g_cv, &g_cv_mtx);
989
+ pthread_mutex_unlock(&g_cv_mtx);
990
+ if (!atomic_load(&g_running)) break;
991
+ }
992
+ char *body = NULL; size_t len = 0;
993
+ while (ring_pop(&body, &len)) {
994
+ if (!body) continue;
995
+
996
+ int count = atomic_fetch_add(&spans_sent, 1);
997
+ if (count < 10 || count % 100 == 0) {
998
+ fprintf(stderr, "[_sffuncspan] Sending span #%d (len=%zu, tid=%lu)\n", count, len, (unsigned long)tid);
999
+ fflush(stderr);
1000
+ }
1001
+
1002
+ curl_easy_setopt(g_telem_curl, CURLOPT_POSTFIELDS, body);
1003
+ curl_easy_setopt(g_telem_curl, CURLOPT_POSTFIELDSIZE, (long)len);
1004
+ CURLcode res = curl_easy_perform(g_telem_curl);
1005
+
1006
+ if (count < 10 && res != CURLE_OK) {
1007
+ fprintf(stderr, "[_sffuncspan] ERROR: curl_easy_perform failed: %s (tid=%lu)\n", curl_easy_strerror(res), (unsigned long)tid);
1008
+ fflush(stderr);
1009
+ }
1010
+
1011
+ free(body);
1012
+ if (!atomic_load(&g_running)) break;
1013
+ }
1014
+ }
1015
+
1016
+ if (g_telem_curl) {
1017
+ curl_easy_cleanup(g_telem_curl);
1018
+ g_telem_curl = NULL;
1019
+ }
1020
+ sf_guard_leave();
1021
+ return NULL;
1022
+ }
1023
+
1024
+ // Forward declaration of serialization function (defined later)
1025
+ static char* serialize_python_object_to_json(PyObject *value, size_t max_size);
1026
+ static char* serialize_python_object_to_json_internal(PyObject *value, size_t max_size);
1027
+
1028
+ // ---------- Ultra-fast C profiler hook ----------
1029
+ // Thread-local storage for call stack (minimal overhead)
1030
+ typedef struct call_frame {
1031
+ uint64_t start_ns;
1032
+ char *span_id;
1033
+ char *function_name; // OWNED (str_dup'd from PyUnicode_AsUTF8), must free
1034
+ char *file_path; // OWNED (str_dup'd from PyUnicode_AsUTF8), must free
1035
+ int line_number;
1036
+ char *arguments_json; // Owned, must free if not NULL
1037
+ PyFrameObject *frame; // Borrowed reference for argument capture
1038
+ sf_funcspan_config_t config; // Config looked up during CALL, reused during RETURN
1039
+ struct call_frame *parent;
1040
+ } call_frame_t;
1041
+
1042
+ static pthread_key_t g_call_stack_key;
1043
+ static pthread_once_t g_call_stack_key_once = PTHREAD_ONCE_INIT;
1044
+
1045
+ static void init_call_stack_key(void) {
1046
+ pthread_key_create(&g_call_stack_key, NULL);
1047
+ }
1048
+
1049
+ static call_frame_t* get_call_stack(void) {
1050
+ pthread_once(&g_call_stack_key_once, init_call_stack_key);
1051
+ return (call_frame_t*)pthread_getspecific(g_call_stack_key);
1052
+ }
1053
+
1054
+ static void set_call_stack(call_frame_t *stack) {
1055
+ pthread_once(&g_call_stack_key_once, init_call_stack_key);
1056
+ pthread_setspecific(g_call_stack_key, stack);
1057
+ }
1058
+
1059
+ // Debug counter for argument capture
1060
+ static _Atomic int g_debug_arg_capture_count = 0;
1061
+
1062
+ // Capture function arguments from a frame (ultra-fast C implementation)
1063
+ static char* capture_arguments_from_frame(PyFrameObject *frame) {
1064
+ int debug_count = atomic_fetch_add(&g_debug_arg_capture_count, 1);
1065
+ int should_debug = (debug_count < 5);
1066
+
1067
+ if (!frame) {
1068
+ if (should_debug) fprintf(stderr, "[_sffuncspan] capture_args: frame is NULL\n");
1069
+ return str_dup("{}");
1070
+ }
1071
+
1072
+ PyCodeObject *code = PyFrame_GetCode(frame);
1073
+ if (!code) {
1074
+ if (should_debug) fprintf(stderr, "[_sffuncspan] capture_args: code is NULL\n");
1075
+ return str_dup("{}");
1076
+ }
1077
+
1078
+ // Get argument count
1079
+ int arg_count = code->co_argcount + code->co_kwonlyargcount;
1080
+ if (should_debug) {
1081
+ fprintf(stderr, "[_sffuncspan] capture_args: arg_count=%d\n", arg_count);
1082
+ }
1083
+
1084
+ if (arg_count == 0) {
1085
+ Py_DECREF(code);
1086
+ if (should_debug) fprintf(stderr, "[_sffuncspan] capture_args: arg_count is 0, returning {}\n");
1087
+ return str_dup("{}");
1088
+ }
1089
+
1090
+ // Build arguments dict as JSON
1091
+ size_t buf_size = 4096;
1092
+ char *buf = (char*)malloc(buf_size);
1093
+ if (!buf) {
1094
+ Py_DECREF(code);
1095
+ return str_dup("{}");
1096
+ }
1097
+
1098
+ size_t pos = 0;
1099
+ buf[pos++] = '{';
1100
+ int added = 0;
1101
+
1102
+ // Get frame locals - CRITICAL: must call PyFrame_FastToLocals first to populate the dict!
1103
+ #if PY_VERSION_HEX >= 0x030B0000 // Python 3.11+
1104
+ PyObject *locals = PyFrame_GetLocals(frame);
1105
+ #else
1106
+ // For Python < 3.11, we must explicitly populate f_locals from the fast locals array
1107
+ PyFrame_FastToLocals(frame);
1108
+ PyObject *locals = frame->f_locals;
1109
+ Py_XINCREF(locals);
1110
+ #endif
1111
+ if (!locals) {
1112
+ Py_DECREF(code);
1113
+ free(buf);
1114
+ if (should_debug) fprintf(stderr, "[_sffuncspan] capture_args: locals is NULL\n");
1115
+ return str_dup("{}");
1116
+ }
1117
+
1118
+ if (should_debug) {
1119
+ Py_ssize_t dict_size = PyDict_Check(locals) ? PyDict_Size(locals) : -1;
1120
+ fprintf(stderr, "[_sffuncspan] capture_args: locals dict size=%zd\n", dict_size);
1121
+ }
1122
+
1123
+ // Get variable names
1124
+ #if PY_VERSION_HEX >= 0x030B0000 // Python 3.11+
1125
+ PyObject *co_varnames = PyCode_GetVarnames(code);
1126
+ #else
1127
+ PyObject *co_varnames = code->co_varnames;
1128
+ Py_XINCREF(co_varnames);
1129
+ #endif
1130
+ if (!co_varnames) {
1131
+ Py_DECREF(locals);
1132
+ Py_DECREF(code);
1133
+ free(buf);
1134
+ return str_dup("{}");
1135
+ }
1136
+
1137
+ // Iterate through argument names
1138
+ for (int i = 0; i < arg_count && i < PyTuple_Size(co_varnames); i++) {
1139
+ PyObject *var_name_obj = PyTuple_GetItem(co_varnames, i);
1140
+ if (!var_name_obj) continue;
1141
+
1142
+ const char *var_name = PyUnicode_AsUTF8(var_name_obj);
1143
+ if (!var_name) continue;
1144
+
1145
+ // Get value from locals
1146
+ PyObject *value = PyDict_GetItemString(locals, var_name);
1147
+ if (!value) continue;
1148
+
1149
+ // Serialize value
1150
+ char *value_json = serialize_python_object_to_json(value, g_arg_limit_bytes / (arg_count > 0 ? arg_count : 1));
1151
+ if (!value_json) continue;
1152
+
1153
+ // Escape variable name
1154
+ char *var_name_esc = json_escape(var_name);
1155
+ if (!var_name_esc) {
1156
+ free(value_json);
1157
+ continue;
1158
+ }
1159
+
1160
+ // Check buffer space
1161
+ size_t needed = strlen(var_name_esc) + strlen(value_json) + 10;
1162
+ if (pos + needed >= buf_size - 10) {
1163
+ free(var_name_esc);
1164
+ free(value_json);
1165
+ break;
1166
+ }
1167
+
1168
+ // Add to JSON
1169
+ if (added > 0) buf[pos++] = ',';
1170
+ buf[pos++] = '"';
1171
+ size_t name_len = strlen(var_name_esc);
1172
+ memcpy(buf + pos, var_name_esc, name_len);
1173
+ pos += name_len;
1174
+ buf[pos++] = '"';
1175
+ buf[pos++] = ':';
1176
+ size_t val_len = strlen(value_json);
1177
+ memcpy(buf + pos, value_json, val_len);
1178
+ pos += val_len;
1179
+
1180
+ free(var_name_esc);
1181
+ free(value_json);
1182
+ added++;
1183
+ }
1184
+
1185
+ buf[pos++] = '}';
1186
+ buf[pos] = '\0';
1187
+
1188
+ Py_DECREF(co_varnames);
1189
+ Py_DECREF(locals);
1190
+ Py_DECREF(code);
1191
+
1192
+ return buf;
1193
+ }
1194
+
1195
+ // Debug counter for profiler callbacks
1196
+ static _Atomic int g_profiler_call_count = 0;
1197
+
1198
+ // Debug counter for accepted functions
1199
+ static _Atomic int g_debug_accepted_count = 0;
1200
+
1201
+ // Profiler ready flag - set to 1 after PyEval_SetProfile() completes successfully
1202
+ // This prevents profiling during profiler installation (when Python may call our
1203
+ // profiler for frames already on the stack, including sf_veritas initialization code)
1204
+ static _Atomic int g_profiler_ready = 0;
1205
+
1206
+ // Interceptors ready flag - set to 1 after setup_interceptors() completes
1207
+ // This prevents profiling during interceptor initialization (which can cause crashes)
1208
+ static _Atomic int g_interceptors_ready = 0;
1209
+
1210
+ // Fast C profiler callback - this replaces the Python _profile_callback
1211
+ static int c_profile_func(PyObject *obj, PyFrameObject *frame, int what, PyObject *arg) {
1212
+ (void)obj;
1213
+ (void)arg;
1214
+
1215
+ // CRITICAL: Recursion guard - prevent profiling the profiler itself!
1216
+ // This prevents infinite recursion when capturing sf_veritas code or calling Python from C
1217
+ if (g_in_profiler) {
1218
+ return 0;
1219
+ }
1220
+ g_in_profiler = 1;
1221
+
1222
+ // CRITICAL: Defensive NULL checks first!
1223
+ if (!frame) {
1224
+ g_in_profiler = 0;
1225
+ return 0;
1226
+ }
1227
+ if (!g_running) {
1228
+ g_in_profiler = 0;
1229
+ return 0;
1230
+ }
1231
+
1232
+ // DEBUG: Log first few calls
1233
+ int call_count = atomic_fetch_add(&g_profiler_call_count, 1);
1234
+ if (call_count < 5) {
1235
+ fprintf(stderr, "[_sffuncspan] PROFILER CALLED #%d, what=%d\n", call_count, what);
1236
+ fflush(stderr);
1237
+ }
1238
+
1239
+ // PROFILER READY CHECK: Skip all calls until profiler is fully initialized
1240
+ // This prevents profiling during PyEval_SetProfile() installation, when Python may
1241
+ // call our profiler for frames already on the stack (including sf_veritas code).
1242
+ if (!atomic_load(&g_profiler_ready)) {
1243
+ if (SF_DEBUG && call_count < 5) {
1244
+ fprintf(stderr, "[_sffuncspan] PROFILER_NOT_READY: Skipping call during initialization\n");
1245
+ fflush(stderr);
1246
+ }
1247
+ g_in_profiler = 0;
1248
+ return 0;
1249
+ }
1250
+
1251
+ // INTERCEPTORS READY CHECK: Skip all calls until interceptors are fully set up
1252
+ // This prevents profiling during setup_interceptors(), which can cause crashes
1253
+ // when profiling code that's in an inconsistent state during initialization.
1254
+ if (!atomic_load(&g_interceptors_ready)) {
1255
+ if (SF_DEBUG && call_count < 5) {
1256
+ fprintf(stderr, "[_sffuncspan] INTERCEPTORS_NOT_READY: Skipping call during interceptor setup\n");
1257
+ fflush(stderr);
1258
+ }
1259
+ g_in_profiler = 0;
1260
+ return 0;
1261
+ }
1262
+
1263
+ // Check for HTTP header override FIRST (highest priority in config hierarchy)
1264
+ // This check is cached per-thread for performance
1265
+ int has_override = has_http_header_override();
1266
+
1267
+ // SPECIAL CASE: sample_rate=0.0 means NEVER capture (unless HTTP header override)
1268
+ // This check happens BEFORE enable_sampling check because 0.0 is an absolute "never"
1269
+ if (!has_override && g_sample_rate == UINT64_MAX) {
1270
+ // sample_rate=0.0 → never capture (UINT64_MAX is sentinel value)
1271
+ atomic_fetch_add(&g_spans_sampled_out, 1);
1272
+ g_in_profiler = 0;
1273
+ return 0;
1274
+ }
1275
+
1276
+ // GLOBAL SAMPLING CHECK: Only apply if there's NO HTTP header override
1277
+ // HTTP headers can bypass global sampling and set their own sample_rate
1278
+ // Note: This only applies when sampling is ENABLED (for probabilistic sampling like 0.1, 0.5, etc)
1279
+ if (!has_override && g_enable_sampling) {
1280
+ if (!should_sample()) {
1281
+ g_in_profiler = 0;
1282
+ return 0;
1283
+ }
1284
+ }
1285
+
1286
+ // MASTER KILL SWITCH: If SF_ENABLE_FUNCTION_SPANS=false, disable by default
1287
+ // BUT: HTTP header overrides have HIGHEST priority and can bypass this
1288
+ // This allows per-request enablement via headers even when globally disabled
1289
+ //
1290
+ // Configuration Hierarchy (highest to lowest):
1291
+ // 1. HTTP Header Override ← We check this first!
1292
+ // 2. Decorator
1293
+ // 3. Function Config (.sailfish)
1294
+ // ...
1295
+ // 8. Environment Variables (SF_ENABLE_FUNCTION_SPANS)
1296
+ if (!has_override && !g_enable_function_spans) {
1297
+ g_in_profiler = 0;
1298
+ return 0;
1299
+ }
1300
+
1301
+ if (call_count < 5) {
1302
+ fprintf(stderr, "[_sffuncspan] DEBUG: Getting code object\n");
1303
+ fflush(stderr);
1304
+ }
1305
+
1306
+ PyCodeObject *code = PyFrame_GetCode(frame);
1307
+ if (!code) {
1308
+ g_in_profiler = 0;
1309
+ return 0;
1310
+ }
1311
+
1312
+ if (call_count < 5) {
1313
+ fprintf(stderr, "[_sffuncspan] DEBUG: Got code object\n");
1314
+ fflush(stderr);
1315
+ }
1316
+
1317
+ // DEFENSIVE: Check for NULL before calling PyUnicode_AsUTF8
1318
+ if (!code->co_filename || !code->co_name) {
1319
+ Py_DECREF(code);
1320
+ g_in_profiler = 0;
1321
+ return 0;
1322
+ }
1323
+
1324
+ if (call_count < 5) {
1325
+ fprintf(stderr, "[_sffuncspan] DEBUG: About to call PyUnicode_AsUTF8\n");
1326
+ fflush(stderr);
1327
+ }
1328
+
1329
+ const char *filename = PyUnicode_AsUTF8(code->co_filename);
1330
+ const char *funcname = PyUnicode_AsUTF8(code->co_name);
1331
+
1332
+ if (call_count < 5) {
1333
+ fprintf(stderr, "[_sffuncspan] DEBUG: Got filename=%s, funcname=%s\n",
1334
+ filename ? filename : "NULL", funcname ? funcname : "NULL");
1335
+ fflush(stderr);
1336
+ }
1337
+
1338
+ // TEMPORARY DEBUG: Log ALL calls to functions in /app/ directory to diagnose filtering
1339
+ static _Atomic int g_app_call_count = 0;
1340
+ if (filename && strstr(filename, "/app/") && what == PyTrace_CALL) {
1341
+ int app_count = atomic_fetch_add(&g_app_call_count, 1);
1342
+ if (app_count < 30) {
1343
+ fprintf(stderr, "[FuncSpanDebug] Profiler sees: %s::%s (what=%d)\n",
1344
+ filename, funcname ? funcname : "NULL", what);
1345
+ fflush(stderr);
1346
+ }
1347
+ }
1348
+
1349
+ // Fast path: Skip if no filename/funcname
1350
+ if (!filename || !funcname) {
1351
+ Py_DECREF(code);
1352
+ g_in_profiler = 0;
1353
+ return 0;
1354
+ }
1355
+
1356
+ if (call_count < 5) {
1357
+ fprintf(stderr, "[_sffuncspan] DEBUG: About to check dunder methods\n");
1358
+ fflush(stderr);
1359
+ }
1360
+
1361
+ // Fast path: Skip dunder methods
1362
+ if (funcname[0] == '_' && funcname[1] == '_') {
1363
+ if (strstr(filename, "/app/app.py")) {
1364
+ fprintf(stderr, "[FuncSpanDebug] FILTERED: %s::%s - dunder method\n", filename, funcname);
1365
+ fflush(stderr);
1366
+ }
1367
+ Py_DECREF(code);
1368
+ g_in_profiler = 0;
1369
+ return 0;
1370
+ }
1371
+
1372
+ if (call_count < 5) {
1373
+ fprintf(stderr, "[_sffuncspan] DEBUG: About to check sf_veritas\n");
1374
+ fflush(stderr);
1375
+ }
1376
+
1377
+ // CRITICAL: Check for sf_veritas code FIRST, before Python stdlib check!
1378
+ // sf_veritas files are often in paths like /lib/python3.9/site-packages/sf_veritas/
1379
+ // which would match the stdlib filter, so we must check this first.
1380
+ int is_sf_veritas = (strstr(filename, "sf_veritas") != NULL);
1381
+
1382
+ if (call_count < 5) {
1383
+ fprintf(stderr, "[_sffuncspan] DEBUG: is_sf_veritas=%d\n", is_sf_veritas);
1384
+ fflush(stderr);
1385
+ }
1386
+
1387
+ // If it's sf_veritas and we don't want to capture it, skip early
1388
+ if (is_sf_veritas && !g_capture_sf_veritas) {
1389
+ // if (SF_DEBUG) {
1390
+ // fprintf(stderr, "[_sffuncspan] FILTERED: %s::%s - sf_veritas code (capture_sf_veritas=false)\n", filename, funcname);
1391
+ // fflush(stderr);
1392
+ // }
1393
+ Py_DECREF(code);
1394
+ g_in_profiler = 0;
1395
+ return 0;
1396
+ }
1397
+
1398
+ // Fast path: ALWAYS skip Python stdlib, frozen modules, bootstrap code
1399
+ // These are Python internals, never capture them
1400
+ // BUT: Skip this check if it's sf_veritas (already handled above)
1401
+ if (!is_sf_veritas) {
1402
+ if (strstr(filename, "/lib/python") ||
1403
+ strstr(filename, "\\lib\\python") ||
1404
+ strstr(filename, "<frozen") ||
1405
+ strstr(filename, "<string>") ||
1406
+ strstr(filename, "importlib") ||
1407
+ strstr(filename, "_bootstrap")) {
1408
+ if (strstr(filename, "/app/app.py")) {
1409
+ fprintf(stderr, "[FuncSpanDebug] FILTERED: %s::%s - Python stdlib/internals\n", filename, funcname);
1410
+ fflush(stderr);
1411
+ }
1412
+ Py_DECREF(code);
1413
+ g_in_profiler = 0;
1414
+ return 0;
1415
+ }
1416
+ }
1417
+
1418
+ // Conditionally skip installed packages (site-packages)
1419
+ // BUT: if it's sf_veritas and we want to capture it, don't skip (already handled above)
1420
+ if (!g_capture_installed_packages && !is_sf_veritas) {
1421
+ if (strstr(filename, "site-packages") ||
1422
+ strstr(filename, "dist-packages")) {
1423
+ if (strstr(filename, "/app/app.py")) {
1424
+ fprintf(stderr, "[FuncSpanDebug] FILTERED: %s::%s - site-packages (capture_installed_packages=false)\n", filename, funcname);
1425
+ fflush(stderr);
1426
+ }
1427
+ Py_DECREF(code);
1428
+ g_in_profiler = 0;
1429
+ return 0;
1430
+ }
1431
+ }
1432
+
1433
+ // Skip Django view functions unless explicitly enabled
1434
+ if (!g_include_django_view_functions) {
1435
+ // Check if this is a Django view function (filename ends with views.py or views/__init__.py)
1436
+ const char *views_py = strstr(filename, "views.py");
1437
+ const char *views_init = strstr(filename, "views/__init__.py");
1438
+ if (views_py || views_init) {
1439
+ // Make sure it's actually the end of the path (not just a substring)
1440
+ size_t flen = strlen(filename);
1441
+ if ((views_py && (views_py == filename + flen - 8)) || // ends with "views.py"
1442
+ (views_init && (views_init == filename + flen - 17))) { // ends with "views/__init__.py"
1443
+ // if (SF_DEBUG) {
1444
+ // fprintf(stderr, "[_sffuncspan] FILTERED: %s::%s - Django view function (include_django_view_functions=false)\n", filename, funcname);
1445
+ // fflush(stderr);
1446
+ // }
1447
+ Py_DECREF(code);
1448
+ g_in_profiler = 0;
1449
+ return 0;
1450
+ }
1451
+ }
1452
+ }
1453
+
1454
+ if (what == PyTrace_CALL) {
1455
+ // Look up config for this function
1456
+ sf_funcspan_config_t func_config = get_function_config(filename, funcname);
1457
+
1458
+ // Debug: Log when a function is ACCEPTED (passed all filters)
1459
+ // ALWAYS log /app/app.py functions to diagnose why they're not being captured
1460
+ if (strstr(filename, "/app/app.py")) {
1461
+ fprintf(stderr, "[FuncSpanDebug] ACCEPTED: %s::%s (event=CALL, capture_installed_packages=%d, capture_sf_veritas=%d)\n",
1462
+ filename, funcname, g_capture_installed_packages, g_capture_sf_veritas);
1463
+ fprintf(stderr, "[FuncSpanDebug] Config: include_arguments=%d, include_return_value=%d, sample_rate=%.2f\n",
1464
+ func_config.include_arguments, func_config.include_return_value, func_config.sample_rate);
1465
+ fflush(stderr);
1466
+ } else if (SF_DEBUG) {
1467
+ int accepted_count = atomic_fetch_add(&g_debug_accepted_count, 1);
1468
+ if (accepted_count < 10) {
1469
+ fprintf(stderr, "[_sffuncspan] ACCEPTED: %s::%s (event=CALL, capture_installed_packages=%d, capture_sf_veritas=%d)\n",
1470
+ filename, funcname, g_capture_installed_packages, g_capture_sf_veritas);
1471
+ fflush(stderr);
1472
+ }
1473
+ }
1474
+
1475
+ // Per-function sampling check (sample_rate is 0.0-1.0)
1476
+ if (func_config.sample_rate < 1.0f) {
1477
+ // Generate a random float between 0.0 and 1.0
1478
+ static _Atomic uint64_t g_per_func_sample_counter = 0;
1479
+ uint64_t sample_val = atomic_fetch_add(&g_per_func_sample_counter, 1);
1480
+ // Simple LCG for pseudo-random: multiply by large prime, modulo 2^32
1481
+ sample_val = (sample_val * 1103515245 + 12345) & 0x7FFFFFFF;
1482
+ float rand_val = (float)sample_val / (float)0x7FFFFFFF;
1483
+
1484
+ if (rand_val >= func_config.sample_rate) {
1485
+ if (strstr(filename, "/app/app.py")) {
1486
+ fprintf(stderr, "[FuncSpanDebug] SAMPLED OUT: %s::%s (sample_rate=%.2f, rand=%.2f)\n",
1487
+ filename, funcname, func_config.sample_rate, rand_val);
1488
+ fflush(stderr);
1489
+ }
1490
+ Py_DECREF(code);
1491
+ atomic_fetch_add(&g_spans_sampled_out, 1);
1492
+ g_in_profiler = 0;
1493
+ return 0; // Skip this span
1494
+ }
1495
+ }
1496
+
1497
+ // Push frame onto call stack (ultra-minimal allocation)
1498
+ call_frame_t *new_frame = (call_frame_t*)malloc(sizeof(call_frame_t));
1499
+ if (!new_frame) {
1500
+ Py_DECREF(code);
1501
+ g_in_profiler = 0;
1502
+ return 0;
1503
+ }
1504
+
1505
+ new_frame->start_ns = now_epoch_ns();
1506
+ new_frame->span_id = generate_span_id();
1507
+
1508
+ // CRITICAL: OWN the strings! PyUnicode_AsUTF8() returns borrowed pointers owned by the code object.
1509
+ // When we Py_DECREF(code), those pointers become invalid. Duplicate them now while code is alive.
1510
+ new_frame->function_name = funcname ? str_dup(funcname) : str_dup("<unknown>");
1511
+ new_frame->file_path = filename ? str_dup(filename) : str_dup("");
1512
+
1513
+ // Check if str_dup failed (malloc failure)
1514
+ if (!new_frame->function_name || !new_frame->file_path) {
1515
+ if (new_frame->function_name) free(new_frame->function_name);
1516
+ if (new_frame->file_path) free(new_frame->file_path);
1517
+ if (new_frame->span_id) free(new_frame->span_id);
1518
+ free(new_frame);
1519
+ Py_DECREF(code);
1520
+ g_in_profiler = 0;
1521
+ return 0;
1522
+ }
1523
+
1524
+ new_frame->line_number = PyFrame_GetLineNumber(frame);
1525
+ new_frame->frame = frame; // Borrowed! (for argument capture on return)
1526
+
1527
+ // Capture arguments NOW (on function entry) if enabled by THIS function's config
1528
+ if (func_config.include_arguments) {
1529
+ // Use per-function arg limit
1530
+ size_t arg_limit = func_config.arg_limit_mb * 1048576;
1531
+ // Temporarily set global for capture_arguments_from_frame to use
1532
+ size_t saved_limit = g_arg_limit_bytes;
1533
+ g_arg_limit_bytes = arg_limit;
1534
+ new_frame->arguments_json = capture_arguments_from_frame(frame);
1535
+ g_arg_limit_bytes = saved_limit;
1536
+ } else {
1537
+ new_frame->arguments_json = NULL;
1538
+ }
1539
+
1540
+ // Store config in frame so PyTrace_RETURN uses same config (handles HTTP header overrides)
1541
+ new_frame->config = func_config;
1542
+
1543
+ new_frame->parent = get_call_stack();
1544
+ set_call_stack(new_frame);
1545
+
1546
+ // CRITICAL: Sync current span ID to Python ContextVar for async-safety!
1547
+ // This allows Python code to call get_current_function_span_id() and get the correct span
1548
+ if (g_set_current_span_id_func && new_frame->span_id) {
1549
+ PyObject *args = Py_BuildValue("(s)", new_frame->span_id);
1550
+ if (args) {
1551
+ PyObject *result = PyObject_CallObject(g_set_current_span_id_func, args);
1552
+ Py_XDECREF(result);
1553
+ Py_DECREF(args);
1554
+ if (PyErr_Occurred()) {
1555
+ PyErr_Clear(); // Don't let ContextVar errors break profiling
1556
+ }
1557
+ }
1558
+ }
1559
+
1560
+ // Debug: Log when span is created and pushed for /app/app.py
1561
+ if (strstr(filename, "/app/app.py")) {
1562
+ fprintf(stderr, "[FuncSpanDebug] SPAN CREATED: %s::%s (span_id=%s, parent_span_id=%s)\n",
1563
+ filename, funcname,
1564
+ new_frame->span_id ? new_frame->span_id : "NULL",
1565
+ new_frame->parent && new_frame->parent->span_id ? new_frame->parent->span_id : "NULL");
1566
+ fprintf(stderr, "[FuncSpanDebug] Synced to ContextVar: %s\n",
1567
+ g_set_current_span_id_func ? "YES" : "NO (setter not available)");
1568
+ fflush(stderr);
1569
+ }
1570
+
1571
+ } else if (what == PyTrace_RETURN || what == PyTrace_EXCEPTION) {
1572
+ // Pop frame and record span
1573
+ call_frame_t *current = get_call_stack();
1574
+ if (!current) {
1575
+ Py_DECREF(code);
1576
+ g_in_profiler = 0;
1577
+ return 0;
1578
+ }
1579
+
1580
+ uint64_t end_ns = now_epoch_ns();
1581
+ uint64_t duration_ns = end_ns - current->start_ns;
1582
+
1583
+ // Get session ID (trace_id) from ContextVar
1584
+ char session_buf[256];
1585
+ session_buf[0] = '\0';
1586
+ int used_real_trace_id = 0;
1587
+
1588
+ if (g_get_trace_id_func) {
1589
+ PyObject *trace_id_result = PyObject_CallObject(g_get_trace_id_func, NULL);
1590
+ if (trace_id_result && trace_id_result != Py_None) {
1591
+ if (PyUnicode_Check(trace_id_result)) {
1592
+ const char *trace_id_str = PyUnicode_AsUTF8(trace_id_result);
1593
+ if (trace_id_str) {
1594
+ snprintf(session_buf, sizeof(session_buf), "%s", trace_id_str);
1595
+ used_real_trace_id = 1;
1596
+ }
1597
+ }
1598
+ }
1599
+ Py_XDECREF(trace_id_result);
1600
+ if (PyErr_Occurred()) {
1601
+ PyErr_Clear();
1602
+ }
1603
+ }
1604
+
1605
+ // Fallback to thread ID if no trace_id available
1606
+ if (session_buf[0] == '\0') {
1607
+ snprintf(session_buf, sizeof(session_buf), "thread-%lu", (unsigned long)pthread_self());
1608
+ }
1609
+
1610
+ // Get parent span ID
1611
+ const char *parent_span_id = current->parent ? current->parent->span_id : NULL;
1612
+
1613
+ // Use config stored during PyTrace_CALL (ensures consistent config even if HTTP headers change)
1614
+ sf_funcspan_config_t func_config = current->config;
1615
+
1616
+ // Capture return value if enabled by config and it's a normal return (not exception)
1617
+ char *return_value_json = NULL;
1618
+ if (func_config.include_return_value && what == PyTrace_RETURN && arg) {
1619
+ size_t return_limit = func_config.return_limit_mb * 1048576;
1620
+ return_value_json = serialize_python_object_to_json(arg, return_limit);
1621
+ }
1622
+
1623
+ // Get arguments JSON (already captured on function entry, or NULL if disabled)
1624
+ const char *arguments_json = current->arguments_json ? current->arguments_json : "{}";
1625
+
1626
+ // Build span body and push to ring - RELEASE GIL for both!
1627
+ // This is the KEY optimization from Opportunity #2
1628
+ char *body = NULL;
1629
+ size_t len = 0;
1630
+ int ok = 0;
1631
+
1632
+ // Debug: Log what span is being created (BEFORE GIL release)
1633
+ static _Atomic int g_span_send_count = 0;
1634
+ int span_count = atomic_fetch_add(&g_span_send_count, 1);
1635
+ if (span_count < 20 || (current->file_path && strstr(current->file_path, "/app/"))) {
1636
+ fprintf(stderr, "[FuncSpanDebug] Creating span for %s::%s (span_id=%s, parent=%s)\n",
1637
+ current->file_path ? current->file_path : "NULL",
1638
+ current->function_name ? current->function_name : "NULL",
1639
+ current->span_id ? current->span_id : "NULL",
1640
+ parent_span_id ? parent_span_id : "NULL");
1641
+ fflush(stderr);
1642
+ }
1643
+
1644
+ // OPPORTUNITY #2 OPTIMIZATION: Release GIL during JSON build + ring push
1645
+ Py_BEGIN_ALLOW_THREADS
1646
+ if (build_body_func_span(
1647
+ session_buf,
1648
+ current->span_id,
1649
+ parent_span_id,
1650
+ current->file_path,
1651
+ current->line_number,
1652
+ 0, // column_number,
1653
+ current->function_name,
1654
+ arguments_json,
1655
+ return_value_json,
1656
+ current->start_ns,
1657
+ duration_ns,
1658
+ &body,
1659
+ &len)) {
1660
+
1661
+ // Push to ring buffer (still GIL-free)
1662
+ ok = ring_push(body, len);
1663
+ }
1664
+ Py_END_ALLOW_THREADS
1665
+
1666
+ if (!ok) {
1667
+ free(body);
1668
+ }
1669
+
1670
+ // Pop from stack
1671
+ set_call_stack(current->parent);
1672
+
1673
+ // CRITICAL: Sync parent span ID (or None) to Python ContextVar for async-safety!
1674
+ // This ensures Python code sees the correct span ID after this function returns
1675
+ if (g_set_current_span_id_func) {
1676
+ const char *parent_span_id = current->parent ? current->parent->span_id : NULL;
1677
+ PyObject *args = parent_span_id ? Py_BuildValue("(s)", parent_span_id) : Py_BuildValue("(O)", Py_None);
1678
+ if (args) {
1679
+ PyObject *result = PyObject_CallObject(g_set_current_span_id_func, args);
1680
+ Py_XDECREF(result);
1681
+ Py_DECREF(args);
1682
+ if (PyErr_Occurred()) {
1683
+ PyErr_Clear(); // Don't let ContextVar errors break profiling
1684
+ }
1685
+ }
1686
+ }
1687
+
1688
+ free(current->span_id);
1689
+ // CRITICAL: Free owned strings (we duplicated them on CALL to prevent UAF)
1690
+ if (current->function_name) free(current->function_name);
1691
+ if (current->file_path) free(current->file_path);
1692
+ if (current->arguments_json) free(current->arguments_json);
1693
+ if (return_value_json) free(return_value_json);
1694
+ free(current);
1695
+ }
1696
+
1697
+ Py_DECREF(code);
1698
+ g_in_profiler = 0;
1699
+ return 0;
1700
+ }
1701
+
1702
+ // ---------- Python API ----------
1703
+ static PyObject *py_init(PyObject *self, PyObject *args, PyObject *kw) {
1704
+ const char *url, *query, *api_key, *service_uuid, *library, *version;
1705
+ int http2 = 0;
1706
+ static char *kwlist[] = {"url","query","api_key","service_uuid","library","version","http2", NULL};
1707
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "ssssssi",
1708
+ kwlist, &url, &query, &api_key, &service_uuid, &library, &version, &http2)) {
1709
+ Py_RETURN_FALSE;
1710
+ }
1711
+ if (g_running) Py_RETURN_TRUE;
1712
+
1713
+ g_url = str_dup(url);
1714
+ g_func_span_query_escaped = json_escape_query(query);
1715
+ g_api_key = str_dup(api_key);
1716
+ g_service_uuid = str_dup(service_uuid);
1717
+ g_library = str_dup(library);
1718
+ g_version = str_dup(version);
1719
+ g_http2 = http2 ? 1 : 0;
1720
+ if (!g_url || !g_func_span_query_escaped || !g_api_key || !g_service_uuid || !g_library || !g_version) {
1721
+ Py_RETURN_FALSE;
1722
+ }
1723
+ if (!build_prefix_for_query(g_func_span_query_escaped, &g_json_prefix_func_span)) {
1724
+ Py_RETURN_FALSE;
1725
+ }
1726
+
1727
+ g_cap = SFFS_RING_CAP;
1728
+ g_ring = (sffs_msg_t*)calloc(g_cap, sizeof(sffs_msg_t));
1729
+ if (!g_ring) { Py_RETURN_FALSE; }
1730
+
1731
+ curl_global_init(CURL_GLOBAL_DEFAULT);
1732
+ g_hdrs = NULL;
1733
+ g_hdrs = curl_slist_append(g_hdrs, "Content-Type: application/json");
1734
+
1735
+ // Initialize config system integration
1736
+ init_config_system();
1737
+
1738
+ // Initialize ContextVar sync for async-safety
1739
+ // This allows C thread-local span stack to sync to Python's async-safe ContextVar
1740
+ PyObject *thread_local_module = PyImport_ImportModule("sf_veritas.thread_local");
1741
+ if (thread_local_module) {
1742
+ // Get ContextVar reference (not strictly needed, but kept for completeness)
1743
+ g_current_span_id_contextvar = PyObject_GetAttrString(thread_local_module, "current_span_id_ctx");
1744
+ if (!g_current_span_id_contextvar) {
1745
+ PyErr_Clear();
1746
+ fprintf(stderr, "[_sffuncspan] WARNING: Could not get current_span_id_ctx ContextVar\n");
1747
+ }
1748
+
1749
+ // Get setter function reference (this is what we actually use)
1750
+ g_set_current_span_id_func = PyObject_GetAttrString(thread_local_module, "_set_current_span_id");
1751
+ if (!g_set_current_span_id_func || !PyCallable_Check(g_set_current_span_id_func)) {
1752
+ Py_XDECREF(g_set_current_span_id_func);
1753
+ g_set_current_span_id_func = NULL;
1754
+ fprintf(stderr, "[_sffuncspan] WARNING: Could not get _set_current_span_id function\n");
1755
+ }
1756
+
1757
+ // Get trace_id getter function reference (for getting session ID)
1758
+ g_get_trace_id_func = PyObject_GetAttrString(thread_local_module, "get_sf_trace_id");
1759
+ if (!g_get_trace_id_func || !PyCallable_Check(g_get_trace_id_func)) {
1760
+ Py_XDECREF(g_get_trace_id_func);
1761
+ g_get_trace_id_func = NULL;
1762
+ fprintf(stderr, "[_sffuncspan] WARNING: Could not get get_sf_trace_id function\n");
1763
+ }
1764
+
1765
+ Py_DECREF(thread_local_module);
1766
+
1767
+ if (g_set_current_span_id_func) {
1768
+ fprintf(stderr, "[_sffuncspan] ContextVar sync initialized for async-safety\n");
1769
+ }
1770
+ } else {
1771
+ PyErr_Clear();
1772
+ fprintf(stderr, "[_sffuncspan] WARNING: Could not import sf_veritas.thread_local module\n");
1773
+ }
1774
+
1775
+ // Initialize SF_DEBUG from environment
1776
+ const char *debug_env = getenv("SF_DEBUG");
1777
+ if (debug_env && (strcmp(debug_env, "1") == 0 || strcmp(debug_env, "true") == 0 || strcmp(debug_env, "True") == 0)) {
1778
+ SF_DEBUG = 1;
1779
+ fprintf(stderr, "[_sffuncspan] SF_DEBUG enabled\n");
1780
+ fflush(stderr);
1781
+ }
1782
+
1783
+ // Parse SF_FUNCSPAN_SENDER_THREADS environment variable (default: 4, max: 16)
1784
+ // FuncSpan expected to have HIGH volume, so default to 4 threads
1785
+ const char *num_threads_env = getenv("SF_FUNCSPAN_SENDER_THREADS");
1786
+ g_num_sender_threads = num_threads_env ? atoi(num_threads_env) : 4;
1787
+ if (g_num_sender_threads < 1) g_num_sender_threads = 1;
1788
+ if (g_num_sender_threads > MAX_SENDER_THREADS) g_num_sender_threads = MAX_SENDER_THREADS;
1789
+
1790
+ atomic_store(&g_running, 1);
1791
+
1792
+ // Start thread pool
1793
+ fprintf(stderr, "[_sffuncspan] Starting %d sender threads...\n", g_num_sender_threads);
1794
+ fflush(stderr);
1795
+
1796
+ for (int i = 0; i < g_num_sender_threads; i++) {
1797
+ if (pthread_create(&g_sender_threads[i], NULL, sender_main, NULL) != 0) {
1798
+ fprintf(stderr, "[_sffuncspan] ERROR: Failed to create sender thread %d\n", i);
1799
+ fflush(stderr);
1800
+ atomic_store(&g_running, 0);
1801
+ // Join any threads that were already created
1802
+ for (int j = 0; j < i; j++) {
1803
+ pthread_join(g_sender_threads[j], NULL);
1804
+ }
1805
+ Py_RETURN_FALSE;
1806
+ }
1807
+ fprintf(stderr, "[_sffuncspan] Created sender thread %d (tid=%lu)\n", i, (unsigned long)g_sender_threads[i]);
1808
+ fflush(stderr);
1809
+ }
1810
+
1811
+ fprintf(stderr, "[_sffuncspan] All %d sender threads created successfully. Libcurl sender initialized.\n", g_num_sender_threads);
1812
+ fflush(stderr);
1813
+
1814
+ // Initialize UUID4 ring buffer for span IDs (pre-generate 10,000 UUIDs)
1815
+ fprintf(stderr, "[_sffuncspan] Initializing UUID4 ring buffer for span IDs...\n");
1816
+ fflush(stderr);
1817
+ init_span_uuid_buffer();
1818
+ fprintf(stderr, "[_sffuncspan] UUID4 ring buffer initialized with %zu UUIDs\n", span_uuid_buffer_available());
1819
+ fflush(stderr);
1820
+
1821
+ Py_RETURN_TRUE;
1822
+ }
1823
+
1824
+ static PyObject *py_configure(PyObject *self, PyObject *args, PyObject *kw) {
1825
+ PyObject *capture_from_installed_libraries = NULL;
1826
+ int variable_capture_size_limit_mb = 1; // default 1MB (deprecated, use arg_limit/return_limit)
1827
+ float sample_rate = 1.0f; // default 1.0 = capture all (probabilistic 0.0-1.0)
1828
+ int enable_sampling = 0; // default disabled
1829
+ int parse_json_strings = 1; // default enabled
1830
+ int capture_arguments = 1; // default enabled
1831
+ int capture_return_value = 1; // default enabled
1832
+ int arg_limit_mb = 1; // default 1MB for arguments
1833
+ int return_limit_mb = 1; // default 1MB for return values
1834
+ int include_django_view_functions = 0; // default disabled
1835
+
1836
+ static char *kwlist[] = {
1837
+ "variable_capture_size_limit_mb",
1838
+ "capture_from_installed_libraries",
1839
+ "sample_rate",
1840
+ "enable_sampling",
1841
+ "parse_json_strings",
1842
+ "capture_arguments",
1843
+ "capture_return_value",
1844
+ "arg_limit_mb",
1845
+ "return_limit_mb",
1846
+ "include_django_view_functions",
1847
+ NULL
1848
+ };
1849
+
1850
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "|iOfppppiip", kwlist,
1851
+ &variable_capture_size_limit_mb,
1852
+ &capture_from_installed_libraries,
1853
+ &sample_rate,
1854
+ &enable_sampling,
1855
+ &parse_json_strings,
1856
+ &capture_arguments,
1857
+ &capture_return_value,
1858
+ &arg_limit_mb,
1859
+ &return_limit_mb,
1860
+ &include_django_view_functions)) {
1861
+ Py_RETURN_NONE;
1862
+ }
1863
+
1864
+ // Legacy: if variable_capture_size_limit_mb is set but not arg/return limits,
1865
+ // use it for both
1866
+ if (arg_limit_mb == 1 && return_limit_mb == 1 && variable_capture_size_limit_mb != 1) {
1867
+ arg_limit_mb = variable_capture_size_limit_mb;
1868
+ return_limit_mb = variable_capture_size_limit_mb;
1869
+ }
1870
+
1871
+ g_variable_capture_size_limit_bytes = (size_t)variable_capture_size_limit_mb * 1048576;
1872
+ g_arg_limit_bytes = (size_t)arg_limit_mb * 1048576;
1873
+ g_return_limit_bytes = (size_t)return_limit_mb * 1048576;
1874
+
1875
+ if (capture_from_installed_libraries && PyList_Check(capture_from_installed_libraries)) {
1876
+ Py_XDECREF(g_capture_from_installed_libraries);
1877
+ Py_INCREF(capture_from_installed_libraries);
1878
+ g_capture_from_installed_libraries = capture_from_installed_libraries;
1879
+ }
1880
+
1881
+ // Configure sampling
1882
+ // sample_rate is now a float (0.0-1.0 probability)
1883
+ // Convert to old modulo format: 1.0=capture all(1), 0.5=capture 50%(2), 0.1=capture 10%(10), etc.
1884
+ // Special case: 0.0 = never capture (hard disable)
1885
+ if (sample_rate <= 0.0f) {
1886
+ g_sample_rate = UINT64_MAX; // Sentinel value for "never capture"
1887
+ } else {
1888
+ if (sample_rate > 1.0f) sample_rate = 1.0f; // Maximum 100%
1889
+ g_sample_rate = (uint64_t)(1.0f / sample_rate); // Convert probability to modulo divisor
1890
+ }
1891
+ g_enable_sampling = enable_sampling;
1892
+
1893
+ // Configure JSON parsing
1894
+ g_parse_json_strings = parse_json_strings;
1895
+
1896
+ // Configure capture control
1897
+ g_capture_arguments = capture_arguments;
1898
+ g_capture_return_value = capture_return_value;
1899
+
1900
+ // Configure Django view function filtering
1901
+ g_include_django_view_functions = include_django_view_functions;
1902
+
1903
+ Py_RETURN_NONE;
1904
+ }
1905
+
1906
+ // ---------- Fast C-based object serialization ----------
1907
+ // Serialize any Python object to JSON string, with aggressive introspection
1908
+ // Returns malloc'd JSON string, caller must free()
1909
+ static char* serialize_python_object_to_json_internal(PyObject *value, size_t max_size) {
1910
+ if (!value) {
1911
+ return str_dup("null");
1912
+ }
1913
+
1914
+ // Fast path: Try direct JSON serialization for primitives
1915
+ if (PyUnicode_Check(value)) {
1916
+ const char *str = PyUnicode_AsUTF8(value);
1917
+ if (!str) return str_dup("null");
1918
+
1919
+ // Try to parse as JSON if enabled and string looks like JSON
1920
+ if (g_parse_json_strings && str[0] && (str[0] == '{' || str[0] == '[')) {
1921
+ // Import json module and try to parse
1922
+ PyObject *json_module = PyImport_ImportModule("json");
1923
+ if (json_module) {
1924
+ PyObject *loads_func = PyObject_GetAttrString(json_module, "loads");
1925
+ if (loads_func && PyCallable_Check(loads_func)) {
1926
+ PyObject *args = PyTuple_Pack(1, value);
1927
+ if (args) {
1928
+ PyObject *parsed = PyObject_CallObject(loads_func, args);
1929
+ Py_DECREF(args);
1930
+
1931
+ if (parsed && !PyErr_Occurred()) {
1932
+ // Successfully parsed! Recursively serialize the parsed object
1933
+ char *parsed_json = serialize_python_object_to_json(parsed, max_size);
1934
+ Py_DECREF(parsed);
1935
+ Py_XDECREF(loads_func);
1936
+ Py_DECREF(json_module);
1937
+ return parsed_json;
1938
+ }
1939
+
1940
+ // Failed to parse, clear error and continue with string
1941
+ Py_XDECREF(parsed);
1942
+ if (PyErr_Occurred()) PyErr_Clear();
1943
+ }
1944
+ }
1945
+ Py_XDECREF(loads_func);
1946
+ Py_DECREF(json_module);
1947
+ }
1948
+ if (PyErr_Occurred()) PyErr_Clear();
1949
+ }
1950
+
1951
+ // Regular string serialization (not JSON or parsing disabled)
1952
+ // Check if string length exceeds max_size
1953
+ size_t str_len = strlen(str);
1954
+
1955
+ if (str_len > max_size) {
1956
+ // Truncate the string to max_size
1957
+ char *truncated_str = (char*)malloc(max_size + 20); // Extra space for <<TRIMMED>>
1958
+ if (!truncated_str) return str_dup("null");
1959
+
1960
+ // Copy up to max_size bytes
1961
+ memcpy(truncated_str, str, max_size);
1962
+ strcpy(truncated_str + max_size, "<<TRIMMED>>");
1963
+
1964
+ char *escaped = json_escape(truncated_str);
1965
+ free(truncated_str);
1966
+ if (!escaped) return str_dup("null");
1967
+
1968
+ size_t len = strlen(escaped) + 3; // quotes + null
1969
+ char *result = (char*)malloc(len);
1970
+ if (!result) { free(escaped); return str_dup("null"); }
1971
+ snprintf(result, len, "\"%s\"", escaped);
1972
+ free(escaped);
1973
+ return result;
1974
+ }
1975
+
1976
+ // String is within size limit, serialize normally
1977
+ char *escaped = json_escape(str);
1978
+ if (!escaped) return str_dup("null");
1979
+ size_t len = strlen(escaped) + 3; // quotes + null
1980
+ char *result = (char*)malloc(len);
1981
+ if (!result) { free(escaped); return str_dup("null"); }
1982
+ snprintf(result, len, "\"%s\"", escaped);
1983
+ free(escaped);
1984
+ return result;
1985
+ }
1986
+
1987
+ // Check bool BEFORE int (since bool is a subclass of int in Python)
1988
+ if (PyBool_Check(value)) {
1989
+ return str_dup(value == Py_True ? "true" : "false");
1990
+ }
1991
+
1992
+ if (PyLong_Check(value)) {
1993
+ long long num = PyLong_AsLongLong(value);
1994
+ char *result = (char*)malloc(32);
1995
+ if (!result) return str_dup("null");
1996
+ snprintf(result, 32, "%lld", num);
1997
+ return result;
1998
+ }
1999
+
2000
+ if (PyFloat_Check(value)) {
2001
+ double num = PyFloat_AsDouble(value);
2002
+ char *result = (char*)malloc(32);
2003
+ if (!result) return str_dup("null");
2004
+ snprintf(result, 32, "%.17g", num);
2005
+ return result;
2006
+ }
2007
+
2008
+ if (value == Py_None) {
2009
+ return str_dup("null");
2010
+ }
2011
+
2012
+ // Bytes - try to decode as UTF-8, fallback to repr
2013
+ if (PyBytes_Check(value)) {
2014
+ char *bytes_data = NULL;
2015
+ Py_ssize_t bytes_len = 0;
2016
+
2017
+ if (PyBytes_AsStringAndSize(value, &bytes_data, &bytes_len) == 0 && bytes_data) {
2018
+ // Try to decode as UTF-8
2019
+ PyObject *decoded = PyUnicode_DecodeUTF8(bytes_data, bytes_len, "strict");
2020
+ if (decoded && PyUnicode_Check(decoded)) {
2021
+ // Successfully decoded to string - recursively serialize it
2022
+ // This will trigger JSON parsing if enabled and string contains JSON
2023
+ char *result = serialize_python_object_to_json(decoded, max_size);
2024
+ Py_DECREF(decoded);
2025
+ return result;
2026
+ }
2027
+ Py_XDECREF(decoded);
2028
+ if (PyErr_Occurred()) PyErr_Clear();
2029
+ }
2030
+
2031
+ // Fallback: not UTF-8 or decode failed, use repr (b'...')
2032
+ PyObject *repr_obj = PyObject_Repr(value);
2033
+ if (repr_obj && PyUnicode_Check(repr_obj)) {
2034
+ const char *repr_str = PyUnicode_AsUTF8(repr_obj);
2035
+ if (repr_str) {
2036
+ char *escaped = json_escape(repr_str);
2037
+ Py_DECREF(repr_obj);
2038
+ if (!escaped) return str_dup("null");
2039
+ size_t len = strlen(escaped) + 3;
2040
+ char *result = (char*)malloc(len);
2041
+ if (!result) { free(escaped); return str_dup("null"); }
2042
+ snprintf(result, len, "\"%s\"", escaped);
2043
+ free(escaped);
2044
+ return result;
2045
+ }
2046
+ }
2047
+ Py_XDECREF(repr_obj);
2048
+ if (PyErr_Occurred()) PyErr_Clear();
2049
+ }
2050
+
2051
+ // Tuples - serialize as JSON arrays
2052
+ if (PyTuple_Check(value)) {
2053
+ Py_ssize_t tuple_len = PyTuple_Size(value);
2054
+ if (tuple_len > 100) tuple_len = 100;
2055
+
2056
+ size_t buf_size = 4096;
2057
+ char *buf = (char*)malloc(buf_size);
2058
+ if (!buf) return str_dup("null");
2059
+ size_t pos = 0;
2060
+ buf[pos++] = '[';
2061
+
2062
+ for (Py_ssize_t i = 0; i < tuple_len; i++) {
2063
+ PyObject *item = PyTuple_GetItem(value, i);
2064
+ char *item_json = serialize_python_object_to_json(item, max_size / 10);
2065
+ size_t item_len = strlen(item_json);
2066
+
2067
+ if (pos + item_len + 2 >= buf_size) {
2068
+ free(item_json);
2069
+ break;
2070
+ }
2071
+
2072
+ if (i > 0) buf[pos++] = ',';
2073
+ memcpy(buf + pos, item_json, item_len);
2074
+ pos += item_len;
2075
+ free(item_json);
2076
+ }
2077
+
2078
+ buf[pos++] = ']';
2079
+ buf[pos] = '\0';
2080
+ return buf;
2081
+ }
2082
+
2083
+ // Lists
2084
+ if (PyList_Check(value)) {
2085
+ Py_ssize_t list_len = PyList_Size(value);
2086
+ if (list_len > 100) list_len = 100; // Limit list introspection
2087
+
2088
+ size_t buf_size = 4096;
2089
+ char *buf = (char*)malloc(buf_size);
2090
+ if (!buf) return str_dup("null");
2091
+ size_t pos = 0;
2092
+ buf[pos++] = '[';
2093
+
2094
+ for (Py_ssize_t i = 0; i < list_len; i++) {
2095
+ PyObject *item = PyList_GetItem(value, i);
2096
+ char *item_json = serialize_python_object_to_json(item, max_size / 10);
2097
+ size_t item_len = strlen(item_json);
2098
+
2099
+ if (pos + item_len + 2 >= buf_size) {
2100
+ free(item_json);
2101
+ break; // truncate
2102
+ }
2103
+
2104
+ if (i > 0) buf[pos++] = ',';
2105
+ memcpy(buf + pos, item_json, item_len);
2106
+ pos += item_len;
2107
+ free(item_json);
2108
+ }
2109
+
2110
+ buf[pos++] = ']';
2111
+ buf[pos] = '\0';
2112
+ return buf;
2113
+ }
2114
+
2115
+ // Dicts
2116
+ if (PyDict_Check(value)) {
2117
+ PyObject *key, *val;
2118
+ Py_ssize_t dict_pos = 0;
2119
+ int count = 0;
2120
+
2121
+ size_t buf_size = 8192;
2122
+ char *buf = (char*)malloc(buf_size);
2123
+ if (!buf) return str_dup("null");
2124
+ size_t pos = 0;
2125
+ buf[pos++] = '{';
2126
+
2127
+ while (PyDict_Next(value, &dict_pos, &key, &val) && count < 50) {
2128
+ const char *key_str = PyUnicode_Check(key) ? PyUnicode_AsUTF8(key) : NULL;
2129
+ if (!key_str) continue;
2130
+
2131
+ // Skip private/dunder keys
2132
+ if (key_str[0] == '_') continue;
2133
+
2134
+ char *key_escaped = json_escape(key_str);
2135
+ char *val_json = serialize_python_object_to_json(val, max_size / 10);
2136
+
2137
+ size_t needed = strlen(key_escaped) + strlen(val_json) + 5;
2138
+ if (pos + needed >= buf_size) {
2139
+ free(key_escaped);
2140
+ free(val_json);
2141
+ break; // truncate
2142
+ }
2143
+
2144
+ if (count > 0) buf[pos++] = ',';
2145
+ buf[pos++] = '"';
2146
+ size_t key_len = strlen(key_escaped);
2147
+ memcpy(buf + pos, key_escaped, key_len);
2148
+ pos += key_len;
2149
+ buf[pos++] = '"';
2150
+ buf[pos++] = ':';
2151
+ size_t val_len = strlen(val_json);
2152
+ memcpy(buf + pos, val_json, val_len);
2153
+ pos += val_len;
2154
+
2155
+ free(key_escaped);
2156
+ free(val_json);
2157
+ count++;
2158
+ }
2159
+
2160
+ buf[pos++] = '}';
2161
+ buf[pos] = '\0';
2162
+ return buf;
2163
+ }
2164
+
2165
+ // Complex object introspection - build result dict
2166
+ PyObject *type_obj = PyObject_Type(value);
2167
+ if (!type_obj) {
2168
+ PyErr_Clear();
2169
+ return str_dup("null");
2170
+ }
2171
+
2172
+ PyObject *type_name_obj = PyObject_GetAttrString(type_obj, "__name__");
2173
+ if (!type_name_obj) PyErr_Clear();
2174
+
2175
+ PyObject *module_obj = PyObject_GetAttrString(type_obj, "__module__");
2176
+ if (!module_obj) PyErr_Clear();
2177
+
2178
+ const char *type_name = type_name_obj && PyUnicode_Check(type_name_obj) ? PyUnicode_AsUTF8(type_name_obj) : "unknown";
2179
+ const char *module_name = module_obj && PyUnicode_Check(module_obj) ? PyUnicode_AsUTF8(module_obj) : "builtins";
2180
+
2181
+ // Use a larger buffer to accommodate trimmed large attributes
2182
+ // max_size / 20 per attribute + overhead = could be 50KB+ per attribute
2183
+ // Allow room for ~10 attributes of max size
2184
+ size_t buf_size = (max_size / 2) > 16384 ? (max_size / 2) : 16384;
2185
+ if (buf_size > 1048576) buf_size = 1048576; // Cap at 1MB
2186
+
2187
+ char *buf = (char*)malloc(buf_size);
2188
+ if (!buf) {
2189
+ Py_XDECREF(type_obj);
2190
+ Py_XDECREF(type_name_obj);
2191
+ Py_XDECREF(module_obj);
2192
+ return str_dup("null");
2193
+ }
2194
+
2195
+ size_t pos = 0;
2196
+ buf[pos++] = '{';
2197
+
2198
+ // Add _type field
2199
+ if (strcmp(module_name, "builtins") == 0) {
2200
+ pos += snprintf(buf + pos, buf_size - pos, "\"_type\":\"%s\"", type_name);
2201
+ } else {
2202
+ pos += snprintf(buf + pos, buf_size - pos, "\"_type\":\"%s.%s\"", module_name, type_name);
2203
+ }
2204
+
2205
+ int added_attrs = 0;
2206
+
2207
+ // Try __dict__ introspection
2208
+ PyObject *obj_dict = PyObject_GetAttrString(value, "__dict__");
2209
+ if (PyErr_Occurred()) PyErr_Clear();
2210
+
2211
+ if (obj_dict && PyDict_Check(obj_dict)) {
2212
+ PyObject *key, *val;
2213
+ Py_ssize_t dict_pos = 0;
2214
+ int attr_count = 0;
2215
+
2216
+ if (!added_attrs) {
2217
+ pos += snprintf(buf + pos, buf_size - pos, ",\"attributes\":{");
2218
+ added_attrs = 1;
2219
+ }
2220
+
2221
+ while (PyDict_Next(obj_dict, &dict_pos, &key, &val) && attr_count < 30) {
2222
+ const char *key_str = PyUnicode_Check(key) ? PyUnicode_AsUTF8(key) : NULL;
2223
+ if (!key_str || key_str[0] == '_') continue; // Skip private
2224
+
2225
+ // Skip callables (methods)
2226
+ if (PyCallable_Check(val)) continue;
2227
+
2228
+ char *key_escaped = json_escape(key_str);
2229
+ char *val_json = serialize_python_object_to_json(val, max_size / 20);
2230
+
2231
+ size_t needed = strlen(key_escaped) + strlen(val_json) + 5;
2232
+ if (pos + needed >= buf_size - 100) {
2233
+ free(key_escaped);
2234
+ free(val_json);
2235
+ break;
2236
+ }
2237
+
2238
+ if (attr_count > 0) buf[pos++] = ',';
2239
+ pos += snprintf(buf + pos, buf_size - pos, "\"%s\":%s", key_escaped, val_json);
2240
+
2241
+ free(key_escaped);
2242
+ free(val_json);
2243
+ attr_count++;
2244
+ }
2245
+
2246
+ if (added_attrs) {
2247
+ buf[pos++] = '}';
2248
+ }
2249
+ }
2250
+ Py_XDECREF(obj_dict);
2251
+ if (PyErr_Occurred()) PyErr_Clear();
2252
+
2253
+ // Try common data attributes
2254
+ const char *data_attrs[] = {"data", "value", "content", "body", "result", "message", "text", NULL};
2255
+ for (int i = 0; data_attrs[i]; i++) {
2256
+ PyObject *attr = PyObject_GetAttrString(value, data_attrs[i]);
2257
+ if (PyErr_Occurred()) PyErr_Clear();
2258
+
2259
+ if (attr && !PyCallable_Check(attr)) {
2260
+ char *attr_json = serialize_python_object_to_json(attr, max_size / 20);
2261
+ size_t needed = strlen(data_attrs[i]) + strlen(attr_json) + 5;
2262
+
2263
+ if (pos + needed < buf_size - 100) {
2264
+ pos += snprintf(buf + pos, buf_size - pos, ",\"%s\":%s", data_attrs[i], attr_json);
2265
+ }
2266
+
2267
+ free(attr_json);
2268
+ }
2269
+ Py_XDECREF(attr);
2270
+ }
2271
+ if (PyErr_Occurred()) PyErr_Clear();
2272
+
2273
+ // Note: We removed _repr from the root level for cleaner output
2274
+ // The attributes and common data fields provide enough context
2275
+
2276
+ buf[pos++] = '}';
2277
+ buf[pos] = '\0';
2278
+
2279
+ Py_XDECREF(type_obj);
2280
+ Py_XDECREF(type_name_obj);
2281
+ Py_XDECREF(module_obj);
2282
+
2283
+ // Check size limit
2284
+ if (pos > max_size) {
2285
+ free(buf);
2286
+ char *truncated = (char*)malloc(256);
2287
+ snprintf(truncated, 256, "{\"_truncated\":true,\"_size\":%zu,\"_type\":\"%s.%s\"}",
2288
+ pos, module_name, type_name);
2289
+ return truncated;
2290
+ }
2291
+
2292
+ return buf;
2293
+ }
2294
+
2295
+ static char* serialize_python_object_to_json(PyObject *value, size_t max_size) {
2296
+ if (!value) return str_dup("null");
2297
+
2298
+ // Detect direct self-references to avoid infinite recursion
2299
+ for (int i = 0; i < g_serialize_depth; i++) {
2300
+ if (g_serialize_stack[i] == value) {
2301
+ return str_dup("\"<recursion>\"");
2302
+ }
2303
+ }
2304
+
2305
+ if (g_serialize_depth >= SERIALIZE_MAX_DEPTH) {
2306
+ return str_dup("\"<max_depth_exceeded>\"");
2307
+ }
2308
+
2309
+ g_serialize_stack[g_serialize_depth] = value;
2310
+ g_serialize_depth++;
2311
+ char *result = serialize_python_object_to_json_internal(value, max_size);
2312
+ g_serialize_depth--;
2313
+ g_serialize_stack[g_serialize_depth] = NULL;
2314
+
2315
+ if (!result) {
2316
+ return str_dup("null");
2317
+ }
2318
+ return result;
2319
+ }
2320
+
2321
+ static PyObject *py_serialize_value(PyObject *self, PyObject *args) {
2322
+ PyObject *value;
2323
+ size_t max_size = 1048576; // 1MB default
2324
+
2325
+ if (!PyArg_ParseTuple(args, "O|n", &value, &max_size)) {
2326
+ Py_RETURN_NONE;
2327
+ }
2328
+
2329
+ char *json_str = serialize_python_object_to_json(value, max_size);
2330
+ if (!json_str) {
2331
+ Py_RETURN_NONE;
2332
+ }
2333
+
2334
+ PyObject *result = PyUnicode_FromString(json_str);
2335
+ free(json_str);
2336
+ return result;
2337
+ }
2338
+
2339
+ static PyObject *py_record_span(PyObject *self, PyObject *args, PyObject *kw) {
2340
+ const char *session_id, *span_id, *parent_span_id = NULL;
2341
+ const char *file_path, *function_name, *arguments_json, *return_value_json = NULL;
2342
+ int line_number = 0, column_number = 0;
2343
+ unsigned long long start_time_ns = 0, duration_ns = 0;
2344
+
2345
+ static char *kwlist[] = {
2346
+ "session_id", "span_id", "parent_span_id", "file_path", "line_number",
2347
+ "column_number", "function_name", "arguments_json", "return_value_json",
2348
+ "start_time_ns", "duration_ns", NULL
2349
+ };
2350
+
2351
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "sszsiissz|KK", kwlist,
2352
+ &session_id, &span_id, &parent_span_id,
2353
+ &file_path, &line_number, &column_number,
2354
+ &function_name, &arguments_json, &return_value_json,
2355
+ &start_time_ns, &duration_ns)) {
2356
+ Py_RETURN_NONE;
2357
+ }
2358
+ if (!g_running) Py_RETURN_NONE;
2359
+
2360
+ // NOTE: DO NOT apply sampling here. This function is called explicitly by user code
2361
+ // (e.g., Strawberry extension, decorators) to record a span that was already decided
2362
+ // to be captured. If we sample here, we get orphaned child spans that reference
2363
+ // non-existent parent_span_ids. Sampling only happens in c_profile_func() for auto-capture.
2364
+
2365
+ // OPTIMIZATION: Release GIL during JSON building + ring push
2366
+ // All string arguments are already C strings from PyArg_ParseTupleAndKeywords,
2367
+ // so we can safely release GIL for the entire body building + transmission.
2368
+ // This extends GIL-free duration from ~100ns to ~500-2000ns (5-20x improvement).
2369
+ char *body = NULL;
2370
+ size_t len = 0;
2371
+ int ok = 0;
2372
+
2373
+ Py_BEGIN_ALLOW_THREADS
2374
+ // Build JSON body (WITHOUT GIL - pure C string operations)
2375
+ if (build_body_func_span(
2376
+ session_id, span_id, parent_span_id,
2377
+ file_path, line_number, column_number,
2378
+ function_name, arguments_json, return_value_json,
2379
+ (uint64_t)start_time_ns, (uint64_t)duration_ns,
2380
+ &body, &len)) {
2381
+ // Push to ring buffer (WITHOUT GIL)
2382
+ ok = ring_push(body, len);
2383
+ }
2384
+ Py_END_ALLOW_THREADS
2385
+
2386
+ if (!ok) { free(body); }
2387
+ Py_RETURN_NONE;
2388
+ }
2389
+
2390
+ static PyObject *py_generate_span_id(PyObject *self, PyObject *args) {
2391
+ char *span_id = generate_span_id();
2392
+ if (!span_id) Py_RETURN_NONE;
2393
+ PyObject *result = PyUnicode_FromString(span_id);
2394
+ free(span_id);
2395
+ return result;
2396
+ }
2397
+
2398
+ static PyObject *py_push_span(PyObject *self, PyObject *args) {
2399
+ const char *span_id;
2400
+ if (!PyArg_ParseTuple(args, "s", &span_id)) {
2401
+ Py_RETURN_NONE;
2402
+ }
2403
+ push_span(span_id);
2404
+ Py_RETURN_NONE;
2405
+ }
2406
+
2407
+ static PyObject *py_pop_span(PyObject *self, PyObject *args) {
2408
+ char *span_id = pop_span();
2409
+ if (!span_id) Py_RETURN_NONE;
2410
+ PyObject *result = PyUnicode_FromString(span_id);
2411
+ free(span_id);
2412
+ return result;
2413
+ }
2414
+
2415
+ static PyObject *py_peek_parent_span_id(PyObject *self, PyObject *args) {
2416
+ char *parent_span_id = peek_parent_span_id();
2417
+ if (!parent_span_id) Py_RETURN_NONE;
2418
+ PyObject *result = PyUnicode_FromString(parent_span_id);
2419
+ free(parent_span_id);
2420
+ return result;
2421
+ }
2422
+
2423
+ static PyObject *py_get_current_span_id(PyObject *self, PyObject *args) {
2424
+ (void)self;
2425
+ (void)args;
2426
+
2427
+ // Get current span ID from thread-local stack (C fallback for get_current_function_span_id)
2428
+ span_entry_t *stack = get_span_stack();
2429
+ if (!stack || !stack->span_id) {
2430
+ Py_RETURN_NONE;
2431
+ }
2432
+
2433
+ return PyUnicode_FromString(stack->span_id);
2434
+ }
2435
+
2436
+ static PyObject *py_get_epoch_ns(PyObject *self, PyObject *args) {
2437
+ uint64_t ns = now_epoch_ns();
2438
+ return PyLong_FromUnsignedLongLong(ns);
2439
+ }
2440
+
2441
+ static PyObject *py_get_stats(PyObject *self, PyObject *args) {
2442
+ uint64_t recorded = atomic_load(&g_spans_recorded);
2443
+ uint64_t sampled_out = atomic_load(&g_spans_sampled_out);
2444
+ uint64_t dropped = atomic_load(&g_spans_dropped);
2445
+ size_t buffer_size = ring_count();
2446
+
2447
+ PyObject *dict = PyDict_New();
2448
+ if (!dict) Py_RETURN_NONE;
2449
+
2450
+ PyDict_SetItemString(dict, "spans_recorded", PyLong_FromUnsignedLongLong(recorded));
2451
+ PyDict_SetItemString(dict, "spans_sampled_out", PyLong_FromUnsignedLongLong(sampled_out));
2452
+ PyDict_SetItemString(dict, "spans_dropped", PyLong_FromUnsignedLongLong(dropped));
2453
+ PyDict_SetItemString(dict, "ring_buffer_used", PyLong_FromSize_t(buffer_size));
2454
+ PyDict_SetItemString(dict, "ring_buffer_capacity", PyLong_FromSize_t(g_cap));
2455
+ PyDict_SetItemString(dict, "sample_rate", PyLong_FromUnsignedLongLong(g_sample_rate));
2456
+ PyDict_SetItemString(dict, "sampling_enabled", PyBool_FromLong(g_enable_sampling));
2457
+
2458
+ return dict;
2459
+ }
2460
+
2461
+ static PyObject *py_reset_stats(PyObject *self, PyObject *args) {
2462
+ atomic_store(&g_spans_recorded, 0);
2463
+ atomic_store(&g_spans_sampled_out, 0);
2464
+ atomic_store(&g_spans_dropped, 0);
2465
+ atomic_store(&g_sample_counter, 0);
2466
+ Py_RETURN_NONE;
2467
+ }
2468
+
2469
+ // Global flag to enable profiler for new threads
2470
+ static _Atomic int g_profiler_enabled = 0;
2471
+
2472
+ // PID tracking for profiler installation (prevents multiple installations per PID)
2473
+ static _Atomic int g_profiler_installed_pid = 0;
2474
+ static pthread_mutex_t g_profiler_install_mutex = PTHREAD_MUTEX_INITIALIZER;
2475
+
2476
+ // Thread start callback to enable profiler on new threads
2477
+ static void thread_start_callback(PyThreadState *tstate) {
2478
+ if (atomic_load(&g_profiler_enabled)) {
2479
+ PyEval_SetProfile(c_profile_func, NULL);
2480
+ }
2481
+ }
2482
+
2483
+ static PyObject *py_start_c_profiler(PyObject *self, PyObject *args) {
2484
+ if (!g_running) {
2485
+ PyErr_SetString(PyExc_RuntimeError, "Profiler not initialized - call init() first");
2486
+ return NULL;
2487
+ }
2488
+
2489
+ int current_pid = getpid();
2490
+
2491
+ // Check environment variable to detect if profiler is already installed
2492
+ // This handles the case where C module is loaded multiple times in same process
2493
+ char env_key[64];
2494
+ snprintf(env_key, sizeof(env_key), "_SF_PROFILER_INSTALLED_%d", current_pid);
2495
+
2496
+ if (getenv(env_key)) {
2497
+ // Already installed for this PID (even in different module instance)
2498
+ fprintf(stderr, "[_sffuncspan] Profiler already installed for PID %d (detected via env var), skipping\n", current_pid);
2499
+ fflush(stderr);
2500
+ Py_RETURN_NONE;
2501
+ }
2502
+
2503
+ // Use file lock to coordinate across all module instances in same process
2504
+ // This works even when C module is loaded twice with separate global variables
2505
+ char lock_path[256];
2506
+ snprintf(lock_path, sizeof(lock_path), "/tmp/sf_profiler_%d.lock", current_pid);
2507
+
2508
+ int lock_fd = open(lock_path, O_CREAT | O_RDWR, 0666);
2509
+ if (lock_fd < 0) {
2510
+ fprintf(stderr, "[_sffuncspan] Failed to open lock file, proceeding anyway (PID=%d)\n", current_pid);
2511
+ fflush(stderr);
2512
+ } else {
2513
+ // Try to acquire exclusive lock (will block if another module instance is installing)
2514
+ struct flock fl;
2515
+ fl.l_type = F_WRLCK;
2516
+ fl.l_whence = SEEK_SET;
2517
+ fl.l_start = 0;
2518
+ fl.l_len = 0; // Lock entire file
2519
+
2520
+ if (fcntl(lock_fd, F_SETLKW, &fl) == 0) {
2521
+ // Got the lock - now check if profiler is already installed
2522
+
2523
+ // Read lock file to see if profiler was already installed
2524
+ char buf[2] = {0};
2525
+ lseek(lock_fd, 0, SEEK_SET);
2526
+ ssize_t bytes_read = read(lock_fd, buf, 1);
2527
+
2528
+ if (bytes_read > 0 && buf[0] == '1') {
2529
+ // Another module instance already installed it
2530
+ fcntl(lock_fd, F_SETLK, &(struct flock){.l_type = F_UNLCK});
2531
+ close(lock_fd);
2532
+
2533
+ fprintf(stderr, "[_sffuncspan] Profiler already installed by another module instance (PID=%d), skipping\n", current_pid);
2534
+ fflush(stderr);
2535
+
2536
+ // Set env var for faster checks
2537
+ setenv(env_key, "1", 1);
2538
+ Py_RETURN_NONE;
2539
+ }
2540
+
2541
+ // Mark that we're about to install (before releasing lock later)
2542
+ // This happens after PyEval_SetProfile succeeds
2543
+ } else {
2544
+ close(lock_fd);
2545
+ fprintf(stderr, "[_sffuncspan] Failed to acquire lock, proceeding anyway (PID=%d)\n", current_pid);
2546
+ fflush(stderr);
2547
+ }
2548
+ }
2549
+
2550
+ // Lock mutex to prevent race conditions (within this module instance)
2551
+ pthread_mutex_lock(&g_profiler_install_mutex);
2552
+
2553
+ // Check if profiler is already installed for this PID (within this module instance)
2554
+ int old_pid = atomic_load(&g_profiler_installed_pid);
2555
+
2556
+ if (old_pid == current_pid) {
2557
+ // Already installed for this PID - skip
2558
+ pthread_mutex_unlock(&g_profiler_install_mutex);
2559
+ Py_RETURN_NONE;
2560
+ }
2561
+
2562
+ // For fork scenario: parent had PID X, child has PID Y
2563
+ // We need to reset and install for the new PID
2564
+ if (old_pid != 0 && old_pid != current_pid) {
2565
+ // Fork detected - reset the profiler state for new PID
2566
+ atomic_store(&g_profiler_ready, 0);
2567
+ atomic_store(&g_profiler_enabled, 0);
2568
+ atomic_store(&g_profiler_installed_pid, 0);
2569
+
2570
+ // Clear parent's env var (child inherited it)
2571
+ char parent_env_key[64];
2572
+ snprintf(parent_env_key, sizeof(parent_env_key), "_SF_PROFILER_INSTALLED_%d", old_pid);
2573
+ unsetenv(parent_env_key);
2574
+ }
2575
+
2576
+ fprintf(stderr, "[_sffuncspan] Installing C profiler (PID=%d)...\n", current_pid);
2577
+ fflush(stderr);
2578
+
2579
+ // Enable profiler flag for new threads
2580
+ atomic_store(&g_profiler_enabled, 1);
2581
+
2582
+ // Set the C-level profiler for current thread (ultra-fast!)
2583
+ PyEval_SetProfile(c_profile_func, NULL);
2584
+
2585
+ // CRITICAL: Mark profiler as ready AFTER PyEval_SetProfile() completes
2586
+ // This ensures any profiler callbacks during installation will skip early
2587
+ atomic_store(&g_profiler_ready, 1);
2588
+
2589
+ // Mark this PID as having profiler installed (in this module instance)
2590
+ atomic_store(&g_profiler_installed_pid, current_pid);
2591
+
2592
+ // Set environment variable to mark profiler as installed globally (across all module instances)
2593
+ setenv(env_key, "1", 1);
2594
+
2595
+ // Write to lock file to mark profiler as installed
2596
+ if (lock_fd >= 0) {
2597
+ lseek(lock_fd, 0, SEEK_SET);
2598
+ write(lock_fd, "1", 1);
2599
+ fsync(lock_fd);
2600
+
2601
+ // Release file lock
2602
+ fcntl(lock_fd, F_SETLK, &(struct flock){.l_type = F_UNLCK});
2603
+ close(lock_fd);
2604
+ }
2605
+
2606
+ fprintf(stderr, "[_sffuncspan] C profiler installed successfully (PID=%d)\n", current_pid);
2607
+ fflush(stderr);
2608
+
2609
+ // Unlock mutex before returning
2610
+ pthread_mutex_unlock(&g_profiler_install_mutex);
2611
+
2612
+ // Note: For Python 3.12+, we'd use PyEval_SetProfileAllThreads
2613
+ // For earlier versions, we rely on threading.setprofile in Python wrapper
2614
+
2615
+ Py_RETURN_NONE;
2616
+ }
2617
+
2618
+ static PyObject *py_stop_c_profiler(PyObject *self, PyObject *args) {
2619
+ // Mark profiler as not ready
2620
+ atomic_store(&g_profiler_ready, 0);
2621
+
2622
+ // Disable profiler flag for new threads
2623
+ atomic_store(&g_profiler_enabled, 0);
2624
+
2625
+ // Remove the C-level profiler
2626
+ PyEval_SetProfile(NULL, NULL);
2627
+
2628
+ Py_RETURN_NONE;
2629
+ }
2630
+
2631
+ static PyObject *py_cache_config(PyObject *self, PyObject *args) {
2632
+ const char *file_path;
2633
+ const char *func_name;
2634
+ int include_arguments;
2635
+ int include_return_value;
2636
+ int autocapture_all_children;
2637
+ int arg_limit_mb;
2638
+ int return_limit_mb;
2639
+ float sample_rate;
2640
+
2641
+ if (!PyArg_ParseTuple(args, "ssiiiiff",
2642
+ &file_path, &func_name,
2643
+ &include_arguments, &include_return_value,
2644
+ &autocapture_all_children,
2645
+ &arg_limit_mb, &return_limit_mb, &sample_rate)) {
2646
+ Py_RETURN_NONE;
2647
+ }
2648
+
2649
+ // Build config
2650
+ sf_funcspan_config_t config;
2651
+ config.include_arguments = (uint8_t)include_arguments;
2652
+ config.include_return_value = (uint8_t)include_return_value;
2653
+ config.autocapture_all_children = (uint8_t)autocapture_all_children;
2654
+ config.arg_limit_mb = (uint32_t)arg_limit_mb;
2655
+ config.return_limit_mb = (uint32_t)return_limit_mb;
2656
+ config.sample_rate = sample_rate;
2657
+
2658
+ // Compute hash and cache
2659
+ uint64_t hash = simple_hash(file_path, func_name);
2660
+ uint32_t cache_idx = hash % CONFIG_CACHE_SIZE;
2661
+
2662
+ pthread_mutex_lock(&g_config_cache_mutex);
2663
+ g_config_cache[cache_idx].hash = hash;
2664
+ g_config_cache[cache_idx].config = config;
2665
+ pthread_mutex_unlock(&g_config_cache_mutex);
2666
+
2667
+ Py_RETURN_NONE;
2668
+ }
2669
+
2670
+ static PyObject *py_set_function_spans_enabled(PyObject *self, PyObject *args) {
2671
+ int enabled;
2672
+ if (!PyArg_ParseTuple(args, "p", &enabled)) {
2673
+ return NULL;
2674
+ }
2675
+ g_enable_function_spans = enabled;
2676
+ Py_RETURN_NONE;
2677
+ }
2678
+
2679
+ static PyObject *py_set_capture_installed_packages(PyObject *self, PyObject *args) {
2680
+ int enabled;
2681
+ if (!PyArg_ParseTuple(args, "p", &enabled)) {
2682
+ return NULL;
2683
+ }
2684
+ g_capture_installed_packages = enabled;
2685
+ Py_RETURN_NONE;
2686
+ }
2687
+
2688
+ static PyObject *py_set_capture_sf_veritas(PyObject *self, PyObject *args) {
2689
+ int enabled;
2690
+ if (!PyArg_ParseTuple(args, "p", &enabled)) {
2691
+ return NULL;
2692
+ }
2693
+ g_capture_sf_veritas = enabled;
2694
+ Py_RETURN_NONE;
2695
+ }
2696
+
2697
+ static PyObject *py_set_interceptors_ready(PyObject *self, PyObject *args) {
2698
+ // Mark interceptors as fully initialized - profiling can now begin
2699
+ atomic_store(&g_interceptors_ready, 1);
2700
+ fprintf(stderr, "[_sffuncspan] Interceptors ready - profiling enabled\n");
2701
+ fflush(stderr);
2702
+ Py_RETURN_NONE;
2703
+ }
2704
+
2705
+ static PyObject *py_shutdown(PyObject *self, PyObject *args) {
2706
+ if (!g_running) Py_RETURN_NONE;
2707
+
2708
+ // Stop profiler first
2709
+ PyEval_SetProfile(NULL, NULL);
2710
+
2711
+ atomic_store(&g_running, 0);
2712
+
2713
+ // Wake ALL threads with broadcast (not signal)
2714
+ pthread_mutex_lock(&g_cv_mtx);
2715
+ pthread_cond_broadcast(&g_cv);
2716
+ pthread_mutex_unlock(&g_cv_mtx);
2717
+
2718
+ // Join all sender threads in thread pool
2719
+ for (int i = 0; i < g_num_sender_threads; i++) {
2720
+ if (g_sender_threads[i]) {
2721
+ pthread_join(g_sender_threads[i], NULL);
2722
+ g_sender_threads[i] = 0;
2723
+ }
2724
+ }
2725
+ g_num_sender_threads = 0;
2726
+
2727
+ // Shutdown UUID4 ring buffer worker thread
2728
+ shutdown_span_uuid_buffer();
2729
+
2730
+ // Cleanup curl (per-thread handles cleaned by pthread_cleanup_push)
2731
+ if (g_hdrs) { curl_slist_free_all(g_hdrs); g_hdrs = NULL; }
2732
+ curl_global_cleanup();
2733
+
2734
+ free(g_url); g_url = NULL;
2735
+ free(g_func_span_query_escaped); g_func_span_query_escaped = NULL;
2736
+ free(g_json_prefix_func_span); g_json_prefix_func_span = NULL;
2737
+ free(g_api_key); g_api_key = NULL;
2738
+ free(g_service_uuid); g_service_uuid = NULL;
2739
+ free(g_library); g_library = NULL;
2740
+ free(g_version); g_version = NULL;
2741
+
2742
+ Py_XDECREF(g_capture_from_installed_libraries);
2743
+ g_capture_from_installed_libraries = NULL;
2744
+
2745
+ if (g_ring) {
2746
+ char *b; size_t l;
2747
+ while (ring_pop(&b, &l)) free(b);
2748
+ free(g_ring); g_ring = NULL;
2749
+ }
2750
+ Py_RETURN_NONE;
2751
+ }
2752
+
2753
+ // ---------- Module table ----------
2754
+ static PyMethodDef SFFuncSpanMethods[] = {
2755
+ {"init", (PyCFunction)py_init, METH_VARARGS | METH_KEYWORDS, "Init and start sender"},
2756
+ {"configure", (PyCFunction)py_configure, METH_VARARGS | METH_KEYWORDS, "Configure function span settings"},
2757
+ {"record_span", (PyCFunction)py_record_span, METH_VARARGS | METH_KEYWORDS, "Record a function span"},
2758
+ {"serialize_value", (PyCFunction)py_serialize_value, METH_VARARGS, "Serialize Python object to JSON (ultra-fast C)"},
2759
+ {"generate_span_id", (PyCFunction)py_generate_span_id, METH_NOARGS, "Generate a new span ID"},
2760
+ {"push_span", (PyCFunction)py_push_span, METH_VARARGS, "Push span ID onto stack"},
2761
+ {"pop_span", (PyCFunction)py_pop_span, METH_NOARGS, "Pop span ID from stack"},
2762
+ {"peek_parent_span_id", (PyCFunction)py_peek_parent_span_id, METH_NOARGS, "Peek parent span ID"},
2763
+ {"get_current_span_id", (PyCFunction)py_get_current_span_id, METH_NOARGS, "Get current span ID (C fallback for async-safety)"},
2764
+ {"get_epoch_ns", (PyCFunction)py_get_epoch_ns, METH_NOARGS, "Get current epoch nanoseconds"},
2765
+ {"get_stats", (PyCFunction)py_get_stats, METH_NOARGS, "Get performance statistics"},
2766
+ {"reset_stats", (PyCFunction)py_reset_stats, METH_NOARGS, "Reset performance statistics"},
2767
+ {"start_c_profiler", (PyCFunction)py_start_c_profiler, METH_NOARGS, "Start ultra-fast C profiler (replaces sys.setprofile)"},
2768
+ {"stop_c_profiler", (PyCFunction)py_stop_c_profiler, METH_NOARGS, "Stop ultra-fast C profiler"},
2769
+ {"cache_config", (PyCFunction)py_cache_config, METH_VARARGS, "Cache config for a function (avoids Python calls in profiler)"},
2770
+ {"set_function_spans_enabled", (PyCFunction)py_set_function_spans_enabled, METH_VARARGS, "Enable/disable function span capture and transmission (master kill switch)"},
2771
+ {"set_capture_installed_packages", (PyCFunction)py_set_capture_installed_packages, METH_VARARGS, "Enable/disable capturing spans from installed packages (site-packages, dist-packages)"},
2772
+ {"set_capture_sf_veritas", (PyCFunction)py_set_capture_sf_veritas, METH_VARARGS, "Enable/disable capturing spans from sf_veritas telemetry code itself"},
2773
+ {"set_interceptors_ready", (PyCFunction)py_set_interceptors_ready, METH_NOARGS, "Mark interceptors as ready - enables profiling (call after setup_interceptors completes)"},
2774
+ {"shutdown", (PyCFunction)py_shutdown, METH_NOARGS, "Shutdown sender and free state"},
2775
+ {NULL, NULL, 0, NULL}
2776
+ };
2777
+
2778
+ static struct PyModuleDef sffuncspanmodule = {
2779
+ PyModuleDef_HEAD_INIT,
2780
+ "_sffuncspan",
2781
+ "sf_veritas ultra-fast function span collection",
2782
+ -1,
2783
+ SFFuncSpanMethods
2784
+ };
2785
+
2786
+ PyMODINIT_FUNC PyInit__sffuncspan(void) {
2787
+ fprintf(stderr, "[_sffuncspan] PyInit__sffuncspan called! PID=%d, g_running=%d, g_profiler_installed_pid=%d\n",
2788
+ getpid(), atomic_load(&g_running), atomic_load(&g_profiler_installed_pid));
2789
+ fflush(stderr);
2790
+ return PyModule_Create(&sffuncspanmodule);
2791
+ }