openbox-temporal-sdk-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openbox/otel_setup.py ADDED
@@ -0,0 +1,969 @@
1
+ # openbox/otel_setup.py
2
+ """
3
+ Setup OpenTelemetry instrumentors with body capture hooks.
4
+
5
+ Bodies are stored in the span processor buffer, NOT in OTel span attributes.
6
+ This keeps sensitive data out of external tracing systems while still
7
+ capturing it for governance evaluation.
8
+
9
+ Supported HTTP libraries:
10
+ - requests
11
+ - httpx (sync + async)
12
+ - urllib3
13
+ - urllib (standard library - request body only)
14
+
15
+ Supported database libraries:
16
+ - psycopg2 (PostgreSQL)
17
+ - asyncpg (PostgreSQL async)
18
+ - mysql-connector-python
19
+ - pymysql
20
+ - pymongo (MongoDB)
21
+ - redis
22
+ - sqlalchemy (ORM)
23
+ """
24
+
25
+ from typing import TYPE_CHECKING, Optional, Set, List
26
+ import logging
27
+
28
+ if TYPE_CHECKING:
29
+ from .span_processor import WorkflowSpanProcessor
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Global reference to span processor for hooks
34
+ _span_processor: Optional["WorkflowSpanProcessor"] = None
35
+
36
+ # URLs to ignore (e.g., OpenBox Core API - we don't want to capture governance events)
37
+ _ignored_url_prefixes: Set[str] = set()
38
+
39
+ # Text content types that are safe to capture as body
40
+ _TEXT_CONTENT_TYPES = (
41
+ "text/",
42
+ "application/json",
43
+ "application/xml",
44
+ "application/javascript",
45
+ "application/x-www-form-urlencoded",
46
+ )
47
+
48
+
49
+ def _should_ignore_url(url: str) -> bool:
50
+ """Check if URL should be ignored (e.g., OpenBox Core API)."""
51
+ if not url:
52
+ return False
53
+ for prefix in _ignored_url_prefixes:
54
+ if url.startswith(prefix):
55
+ return True
56
+ return False
57
+
58
+
59
+ def _is_text_content_type(content_type: Optional[str]) -> bool:
60
+ """Check if content type indicates text content (safe to decode)."""
61
+ if not content_type:
62
+ return True # Assume text if no content-type
63
+ content_type = content_type.lower().split(";")[0].strip()
64
+ return any(content_type.startswith(t) for t in _TEXT_CONTENT_TYPES)
65
+
66
+
67
+ def setup_opentelemetry_for_governance(
68
+ span_processor: "WorkflowSpanProcessor",
69
+ ignored_urls: Optional[list] = None,
70
+ instrument_databases: bool = True,
71
+ db_libraries: Optional[Set[str]] = None,
72
+ instrument_file_io: bool = False,
73
+ ) -> None:
74
+ """
75
+ Setup OpenTelemetry instrumentors with body capture hooks.
76
+
77
+ This function instruments HTTP, database, and file I/O libraries to:
78
+ 1. Create OTel spans for HTTP requests, database queries, and file operations
79
+ 2. Capture request/response bodies (via hooks that store in span_processor)
80
+ 3. Register the span processor with the OTel tracer provider
81
+
82
+ Args:
83
+ span_processor: The WorkflowSpanProcessor to store bodies in
84
+ ignored_urls: List of URL prefixes to ignore (e.g., OpenBox Core API)
85
+ instrument_databases: Whether to instrument database libraries (default: True)
86
+ db_libraries: Set of database libraries to instrument (None = all available).
87
+ Valid values: "psycopg2", "asyncpg", "mysql", "pymysql",
88
+ "pymongo", "redis", "sqlalchemy"
89
+ instrument_file_io: Whether to instrument file I/O operations (default: False)
90
+ """
91
+ global _span_processor, _ignored_url_prefixes
92
+ _span_processor = span_processor
93
+
94
+ # Set ignored URL prefixes
95
+ if ignored_urls:
96
+ _ignored_url_prefixes = set(ignored_urls)
97
+ logger.info(f"Ignoring URLs with prefixes: {_ignored_url_prefixes}")
98
+
99
+ # Register span processor with OTel tracer provider
100
+ # This ensures on_end() is called when spans complete
101
+ from opentelemetry import trace
102
+ from opentelemetry.sdk.trace import TracerProvider
103
+
104
+ provider = trace.get_tracer_provider()
105
+ if not isinstance(provider, TracerProvider):
106
+ # Create a new TracerProvider if none exists
107
+ provider = TracerProvider()
108
+ trace.set_tracer_provider(provider)
109
+
110
+ provider.add_span_processor(span_processor)
111
+ logger.info("Registered WorkflowSpanProcessor with OTel TracerProvider")
112
+
113
+ # Track what was instrumented
114
+ instrumented = []
115
+
116
+ # 1. requests library
117
+ try:
118
+ from opentelemetry.instrumentation.requests import RequestsInstrumentor
119
+
120
+ RequestsInstrumentor().instrument(
121
+ request_hook=_requests_request_hook,
122
+ response_hook=_requests_response_hook,
123
+ )
124
+ instrumented.append("requests")
125
+ logger.info("Instrumented: requests")
126
+ except ImportError:
127
+ logger.debug("requests instrumentation not available")
128
+
129
+ # 2. httpx library (sync + async) - hooks for metadata only
130
+ try:
131
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
132
+
133
+ HTTPXClientInstrumentor().instrument(
134
+ request_hook=_httpx_request_hook,
135
+ response_hook=_httpx_response_hook,
136
+ async_request_hook=_httpx_async_request_hook,
137
+ async_response_hook=_httpx_async_response_hook,
138
+ )
139
+ instrumented.append("httpx")
140
+ logger.info("Instrumented: httpx")
141
+ except ImportError:
142
+ logger.debug("httpx instrumentation not available")
143
+
144
+ # 3. urllib3 library
145
+ try:
146
+ from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
147
+
148
+ URLLib3Instrumentor().instrument(
149
+ request_hook=_urllib3_request_hook,
150
+ response_hook=_urllib3_response_hook,
151
+ )
152
+ instrumented.append("urllib3")
153
+ logger.info("Instrumented: urllib3")
154
+ except ImportError:
155
+ logger.debug("urllib3 instrumentation not available")
156
+
157
+ # 4. urllib (standard library) - request body only, response body cannot be captured
158
+ try:
159
+ from opentelemetry.instrumentation.urllib import URLLibInstrumentor
160
+
161
+ URLLibInstrumentor().instrument(
162
+ request_hook=_urllib_request_hook,
163
+ )
164
+ instrumented.append("urllib")
165
+ logger.info("Instrumented: urllib")
166
+ except ImportError:
167
+ logger.debug("urllib instrumentation not available")
168
+
169
+ # 5. httpx body capture (separate from OTel - patches Client.send)
170
+ setup_httpx_body_capture(span_processor)
171
+
172
+ logger.info(f"OpenTelemetry HTTP instrumentation complete. Instrumented: {instrumented}")
173
+
174
+ # 6. Database instrumentation (optional)
175
+ if instrument_databases:
176
+ db_instrumented = setup_database_instrumentation(db_libraries)
177
+ if db_instrumented:
178
+ instrumented.extend(db_instrumented)
179
+
180
+ # 7. File I/O instrumentation (optional)
181
+ if instrument_file_io:
182
+ if setup_file_io_instrumentation():
183
+ instrumented.append("file_io")
184
+
185
+ logger.info(f"OpenTelemetry governance setup complete. Instrumented: {instrumented}")
186
+
187
+
188
+ def setup_file_io_instrumentation() -> bool:
189
+ """
190
+ Setup file I/O instrumentation by patching built-in open().
191
+
192
+ File operations will be captured as spans with:
193
+ - file.path: File path
194
+ - file.mode: Open mode (r, w, a, etc.)
195
+ - file.operation: read, write, etc.
196
+ - file.bytes: Number of bytes read/written
197
+
198
+ Returns:
199
+ True if instrumentation was successful
200
+ """
201
+ import builtins
202
+ from opentelemetry import trace
203
+
204
+ # Check if already instrumented
205
+ if hasattr(builtins, '_openbox_original_open'):
206
+ logger.debug("File I/O already instrumented")
207
+ return True
208
+
209
+ _original_open = builtins.open
210
+ builtins._openbox_original_open = _original_open # Store for uninstrumentation
211
+ _tracer = trace.get_tracer("openbox.file_io")
212
+
213
+ # Paths to skip (noisy system files)
214
+ _skip_patterns = ('/dev/', '/proc/', '/sys/', '__pycache__', '.pyc', '.pyo', '.so', '.dylib')
215
+
216
+ class TracedFile:
217
+ """Wrapper around file object to trace read/write operations."""
218
+
219
+ def __init__(self, file_obj, file_path: str, mode: str, parent_span):
220
+ self._file = file_obj
221
+ self._file_path = file_path
222
+ self._mode = mode
223
+ self._parent_span = parent_span
224
+ self._bytes_read = 0
225
+ self._bytes_written = 0
226
+
227
+ def read(self, size=-1):
228
+ with _tracer.start_as_current_span("file.read") as span:
229
+ span.set_attribute("file.path", self._file_path)
230
+ span.set_attribute("file.operation", "read")
231
+ data = self._file.read(size)
232
+ bytes_count = len(data) if isinstance(data, (str, bytes)) else 0
233
+ self._bytes_read += bytes_count
234
+ span.set_attribute("file.bytes", bytes_count)
235
+ return data
236
+
237
+ def readline(self):
238
+ with _tracer.start_as_current_span("file.readline") as span:
239
+ span.set_attribute("file.path", self._file_path)
240
+ span.set_attribute("file.operation", "readline")
241
+ data = self._file.readline()
242
+ bytes_count = len(data) if isinstance(data, (str, bytes)) else 0
243
+ self._bytes_read += bytes_count
244
+ span.set_attribute("file.bytes", bytes_count)
245
+ return data
246
+
247
+ def readlines(self):
248
+ with _tracer.start_as_current_span("file.readlines") as span:
249
+ span.set_attribute("file.path", self._file_path)
250
+ span.set_attribute("file.operation", "readlines")
251
+ data = self._file.readlines()
252
+ bytes_count = sum(len(line) for line in data) if data else 0
253
+ self._bytes_read += bytes_count
254
+ span.set_attribute("file.bytes", bytes_count)
255
+ span.set_attribute("file.lines", len(data) if data else 0)
256
+ return data
257
+
258
+ def write(self, data):
259
+ with _tracer.start_as_current_span("file.write") as span:
260
+ span.set_attribute("file.path", self._file_path)
261
+ span.set_attribute("file.operation", "write")
262
+ bytes_count = len(data) if isinstance(data, (str, bytes)) else 0
263
+ span.set_attribute("file.bytes", bytes_count)
264
+ self._bytes_written += bytes_count
265
+ return self._file.write(data)
266
+
267
+ def writelines(self, lines):
268
+ with _tracer.start_as_current_span("file.writelines") as span:
269
+ span.set_attribute("file.path", self._file_path)
270
+ span.set_attribute("file.operation", "writelines")
271
+ bytes_count = sum(len(line) for line in lines) if lines else 0
272
+ span.set_attribute("file.bytes", bytes_count)
273
+ span.set_attribute("file.lines", len(lines) if lines else 0)
274
+ self._bytes_written += bytes_count
275
+ return self._file.writelines(lines)
276
+
277
+ def close(self):
278
+ if self._parent_span:
279
+ self._parent_span.set_attribute("file.total_bytes_read", self._bytes_read)
280
+ self._parent_span.set_attribute("file.total_bytes_written", self._bytes_written)
281
+ self._parent_span.end()
282
+ return self._file.close()
283
+
284
+ def __enter__(self):
285
+ return self
286
+
287
+ def __exit__(self, exc_type, exc_val, exc_tb):
288
+ self.close()
289
+ return False
290
+
291
+ def __iter__(self):
292
+ return iter(self._file)
293
+
294
+ def __next__(self):
295
+ return next(self._file)
296
+
297
+ def __getattr__(self, name):
298
+ return getattr(self._file, name)
299
+
300
+ def traced_open(file, mode='r', *args, **kwargs):
301
+ file_str = str(file)
302
+
303
+ # Skip system/noisy paths
304
+ if any(p in file_str for p in _skip_patterns):
305
+ return _original_open(file, mode, *args, **kwargs)
306
+
307
+ span = _tracer.start_span("file.open")
308
+ span.set_attribute("file.path", file_str)
309
+ span.set_attribute("file.mode", mode)
310
+
311
+ try:
312
+ file_obj = _original_open(file, mode, *args, **kwargs)
313
+ return TracedFile(file_obj, file_str, mode, span)
314
+ except Exception as e:
315
+ span.set_attribute("error", True)
316
+ span.set_attribute("error.type", type(e).__name__)
317
+ span.set_attribute("error.message", str(e))
318
+ span.end()
319
+ raise
320
+
321
+ builtins.open = traced_open
322
+ logger.info("Instrumented: file I/O (builtins.open)")
323
+ return True
324
+
325
+
326
+ def uninstrument_file_io() -> None:
327
+ """Restore original open() function."""
328
+ import builtins
329
+ if hasattr(builtins, '_openbox_original_open'):
330
+ builtins.open = builtins._openbox_original_open
331
+ delattr(builtins, '_openbox_original_open')
332
+ logger.info("Uninstrumented: file I/O")
333
+
334
+
335
+ def setup_database_instrumentation(
336
+ db_libraries: Optional[Set[str]] = None,
337
+ ) -> List[str]:
338
+ """
339
+ Setup OpenTelemetry database instrumentors.
340
+
341
+ Database spans will be captured by the WorkflowSpanProcessor (already registered
342
+ with the TracerProvider) and included in governance events.
343
+
344
+ Args:
345
+ db_libraries: Set of library names to instrument. If None, instruments all
346
+ available libraries. Valid values:
347
+ - "psycopg2" (PostgreSQL sync)
348
+ - "asyncpg" (PostgreSQL async)
349
+ - "mysql" (mysql-connector-python)
350
+ - "pymysql"
351
+ - "pymongo" (MongoDB)
352
+ - "redis"
353
+ - "sqlalchemy" (ORM)
354
+
355
+ Returns:
356
+ List of successfully instrumented library names
357
+ """
358
+ instrumented = []
359
+
360
+ # psycopg2 (PostgreSQL sync)
361
+ if db_libraries is None or "psycopg2" in db_libraries:
362
+ try:
363
+ from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
364
+
365
+ Psycopg2Instrumentor().instrument()
366
+ instrumented.append("psycopg2")
367
+ logger.info("Instrumented: psycopg2")
368
+ except ImportError:
369
+ logger.debug("psycopg2 instrumentation not available")
370
+
371
+ # asyncpg (PostgreSQL async)
372
+ if db_libraries is None or "asyncpg" in db_libraries:
373
+ try:
374
+ from opentelemetry.instrumentation.asyncpg import AsyncPGInstrumentor
375
+
376
+ AsyncPGInstrumentor().instrument()
377
+ instrumented.append("asyncpg")
378
+ logger.info("Instrumented: asyncpg")
379
+ except ImportError:
380
+ logger.debug("asyncpg instrumentation not available")
381
+
382
+ # mysql-connector-python
383
+ if db_libraries is None or "mysql" in db_libraries:
384
+ try:
385
+ from opentelemetry.instrumentation.mysql import MySQLInstrumentor
386
+
387
+ MySQLInstrumentor().instrument()
388
+ instrumented.append("mysql")
389
+ logger.info("Instrumented: mysql")
390
+ except ImportError:
391
+ logger.debug("mysql instrumentation not available")
392
+
393
+ # pymysql
394
+ if db_libraries is None or "pymysql" in db_libraries:
395
+ try:
396
+ from opentelemetry.instrumentation.pymysql import PyMySQLInstrumentor
397
+
398
+ PyMySQLInstrumentor().instrument()
399
+ instrumented.append("pymysql")
400
+ logger.info("Instrumented: pymysql")
401
+ except ImportError:
402
+ logger.debug("pymysql instrumentation not available")
403
+
404
+ # pymongo (MongoDB)
405
+ if db_libraries is None or "pymongo" in db_libraries:
406
+ try:
407
+ from opentelemetry.instrumentation.pymongo import PymongoInstrumentor
408
+
409
+ PymongoInstrumentor().instrument()
410
+ instrumented.append("pymongo")
411
+ logger.info("Instrumented: pymongo")
412
+ except ImportError:
413
+ logger.debug("pymongo instrumentation not available")
414
+
415
+ # redis
416
+ if db_libraries is None or "redis" in db_libraries:
417
+ try:
418
+ from opentelemetry.instrumentation.redis import RedisInstrumentor
419
+
420
+ RedisInstrumentor().instrument()
421
+ instrumented.append("redis")
422
+ logger.info("Instrumented: redis")
423
+ except ImportError:
424
+ logger.debug("redis instrumentation not available")
425
+
426
+ # sqlalchemy (ORM)
427
+ if db_libraries is None or "sqlalchemy" in db_libraries:
428
+ try:
429
+ from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
430
+
431
+ SQLAlchemyInstrumentor().instrument()
432
+ instrumented.append("sqlalchemy")
433
+ logger.info("Instrumented: sqlalchemy")
434
+ except ImportError:
435
+ logger.debug("sqlalchemy instrumentation not available")
436
+
437
+ if instrumented:
438
+ logger.info(f"Database instrumentation complete. Instrumented: {instrumented}")
439
+ else:
440
+ logger.debug("No database libraries instrumented (none available or installed)")
441
+
442
+ return instrumented
443
+
444
+
445
+ def uninstrument_databases() -> None:
446
+ """Uninstrument all database libraries."""
447
+ try:
448
+ from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
449
+
450
+ Psycopg2Instrumentor().uninstrument()
451
+ except (ImportError, Exception):
452
+ pass
453
+
454
+ try:
455
+ from opentelemetry.instrumentation.asyncpg import AsyncPGInstrumentor
456
+
457
+ AsyncPGInstrumentor().uninstrument()
458
+ except (ImportError, Exception):
459
+ pass
460
+
461
+ try:
462
+ from opentelemetry.instrumentation.mysql import MySQLInstrumentor
463
+
464
+ MySQLInstrumentor().uninstrument()
465
+ except (ImportError, Exception):
466
+ pass
467
+
468
+ try:
469
+ from opentelemetry.instrumentation.pymysql import PyMySQLInstrumentor
470
+
471
+ PyMySQLInstrumentor().uninstrument()
472
+ except (ImportError, Exception):
473
+ pass
474
+
475
+ try:
476
+ from opentelemetry.instrumentation.pymongo import PymongoInstrumentor
477
+
478
+ PymongoInstrumentor().uninstrument()
479
+ except (ImportError, Exception):
480
+ pass
481
+
482
+ try:
483
+ from opentelemetry.instrumentation.redis import RedisInstrumentor
484
+
485
+ RedisInstrumentor().uninstrument()
486
+ except (ImportError, Exception):
487
+ pass
488
+
489
+ try:
490
+ from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
491
+
492
+ SQLAlchemyInstrumentor().uninstrument()
493
+ except (ImportError, Exception):
494
+ pass
495
+
496
+
497
+ def uninstrument_all() -> None:
498
+ """Uninstrument all HTTP and database libraries."""
499
+ global _span_processor
500
+ _span_processor = None
501
+
502
+ # Uninstrument HTTP libraries
503
+ try:
504
+ from opentelemetry.instrumentation.requests import RequestsInstrumentor
505
+
506
+ RequestsInstrumentor().uninstrument()
507
+ except (ImportError, Exception):
508
+ pass
509
+
510
+ try:
511
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
512
+
513
+ HTTPXClientInstrumentor().uninstrument()
514
+ except (ImportError, Exception):
515
+ pass
516
+
517
+ try:
518
+ from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
519
+
520
+ URLLib3Instrumentor().uninstrument()
521
+ except (ImportError, Exception):
522
+ pass
523
+
524
+ try:
525
+ from opentelemetry.instrumentation.urllib import URLLibInstrumentor
526
+
527
+ URLLibInstrumentor().uninstrument()
528
+ except (ImportError, Exception):
529
+ pass
530
+
531
+ # Uninstrument database libraries
532
+ uninstrument_databases()
533
+
534
+ # Uninstrument file I/O
535
+ uninstrument_file_io()
536
+
537
+
538
+ # ═══════════════════════════════════════════════════════════════════════════════
539
+ # requests hooks
540
+ # ═══════════════════════════════════════════════════════════════════════════════
541
+
542
+
543
+ def _requests_request_hook(span, request) -> None:
544
+ """
545
+ Hook called before requests library sends a request.
546
+
547
+ Args:
548
+ span: OTel span
549
+ request: requests.PreparedRequest
550
+ """
551
+ if _span_processor is None:
552
+ return
553
+
554
+ body = None
555
+ try:
556
+ if request.body:
557
+ body = request.body
558
+ if isinstance(body, bytes):
559
+ body = body.decode("utf-8", errors="ignore")
560
+ except Exception:
561
+ pass
562
+
563
+ if body:
564
+ _span_processor.store_body(span.context.span_id, request_body=body)
565
+
566
+
567
+ def _requests_response_hook(span, request, response) -> None:
568
+ """
569
+ Hook called after requests library receives a response.
570
+
571
+ Args:
572
+ span: OTel span
573
+ request: requests.PreparedRequest
574
+ response: requests.Response
575
+ """
576
+ if _span_processor is None:
577
+ return
578
+
579
+ try:
580
+ content_type = response.headers.get("content-type", "")
581
+ if _is_text_content_type(content_type):
582
+ _span_processor.store_body(span.context.span_id, response_body=response.text)
583
+ except Exception:
584
+ pass
585
+
586
+
587
+ # ═══════════════════════════════════════════════════════════════════════════════
588
+ # httpx hooks
589
+ #
590
+ # These hooks are called by the OTel httpx instrumentation.
591
+ # We capture request/response bodies here for governance evaluation.
592
+ # ═══════════════════════════════════════════════════════════════════════════════
593
+
594
+
595
+ def _httpx_request_hook(span, request) -> None:
596
+ """
597
+ Hook called before httpx sends a request.
598
+
599
+ Args:
600
+ span: OTel span
601
+ request: RequestInfo namedtuple with (method, url, headers, stream, extensions)
602
+ """
603
+ if _span_processor is None:
604
+ return
605
+
606
+ # Check if URL should be ignored
607
+ url = str(request.url) if hasattr(request, 'url') else None
608
+ if url and _should_ignore_url(url):
609
+ return
610
+
611
+ try:
612
+ # Capture request headers from RequestInfo namedtuple
613
+ if hasattr(request, 'headers') and request.headers:
614
+ request_headers = dict(request.headers)
615
+ _span_processor.store_body(span.context.span_id, request_headers=request_headers)
616
+
617
+ # Try to get request body - RequestInfo has a 'stream' attribute
618
+ body = None
619
+ if hasattr(request, 'stream'):
620
+ stream = request.stream
621
+ if hasattr(stream, 'body'):
622
+ body = stream.body
623
+ elif hasattr(stream, '_body'):
624
+ body = stream._body
625
+ elif isinstance(stream, bytes):
626
+ body = stream
627
+
628
+ # Fallback: Direct content attribute (for httpx.Request objects)
629
+ if not body and hasattr(request, '_content') and request._content:
630
+ body = request._content
631
+
632
+ if not body and hasattr(request, 'content'):
633
+ try:
634
+ content = request.content
635
+ if content:
636
+ body = content
637
+ except Exception:
638
+ pass
639
+
640
+ if body:
641
+ if isinstance(body, bytes):
642
+ body = body.decode("utf-8", errors="ignore")
643
+ elif not isinstance(body, str):
644
+ body = str(body)
645
+ _span_processor.store_body(span.context.span_id, request_body=body)
646
+
647
+ except Exception:
648
+ pass # Best effort
649
+
650
+
651
+ def _httpx_response_hook(span, request, response) -> None:
652
+ """
653
+ Hook called after httpx receives a response.
654
+
655
+ NOTE: At this point the response may not have been fully read yet.
656
+ We try to read it here, but body capture may need to happen via
657
+ the patched send method instead.
658
+
659
+ Args:
660
+ span: OTel span
661
+ request: httpx.Request
662
+ response: httpx.Response
663
+ """
664
+ if _span_processor is None:
665
+ return
666
+
667
+ # Check if URL should be ignored
668
+ url = str(request.url) if hasattr(request, 'url') else None
669
+ if url and _should_ignore_url(url):
670
+ return
671
+
672
+ try:
673
+ # Capture response headers first (always available even for streaming)
674
+ if hasattr(response, 'headers') and response.headers:
675
+ response_headers = dict(response.headers)
676
+ _span_processor.store_body(span.context.span_id, response_headers=response_headers)
677
+
678
+ content_type = response.headers.get("content-type", "")
679
+ if _is_text_content_type(content_type):
680
+ body = None
681
+
682
+ # Check if response has already been read (has _content)
683
+ if hasattr(response, '_content') and response._content:
684
+ body = response._content
685
+ # Try .content property
686
+ elif hasattr(response, 'content'):
687
+ try:
688
+ body = response.content
689
+ except Exception:
690
+ pass
691
+
692
+ if body:
693
+ if isinstance(body, bytes):
694
+ body = body.decode("utf-8", errors="ignore")
695
+ _span_processor.store_body(span.context.span_id, response_body=body)
696
+ except Exception:
697
+ pass # Best effort
698
+
699
+
700
+ async def _httpx_async_request_hook(span, request) -> None:
701
+ """Async version of request hook."""
702
+ _httpx_request_hook(span, request)
703
+
704
+
705
+ async def _httpx_async_response_hook(span, request, response) -> None:
706
+ """Async version of response hook."""
707
+ if _span_processor is None:
708
+ return
709
+
710
+ # Check if URL should be ignored
711
+ url = str(request.url) if hasattr(request, 'url') else None
712
+ if url and _should_ignore_url(url):
713
+ return
714
+
715
+ try:
716
+ # Capture response headers
717
+ if hasattr(response, 'headers') and response.headers:
718
+ response_headers = dict(response.headers)
719
+ _span_processor.store_body(span.context.span_id, response_headers=response_headers)
720
+
721
+ content_type = response.headers.get("content-type", "")
722
+ if _is_text_content_type(content_type):
723
+ body = None
724
+
725
+ # Check if response has already been read
726
+ if hasattr(response, '_content') and response._content:
727
+ body = response._content
728
+ if isinstance(body, bytes):
729
+ body = body.decode("utf-8", errors="ignore")
730
+ # For async, try to read the response - THIS WILL CONSUME IT
731
+ # but httpx caches it in _content after first read
732
+ elif hasattr(response, 'aread'):
733
+ try:
734
+ await response.aread()
735
+ if hasattr(response, '_content') and response._content:
736
+ body = response._content
737
+ if isinstance(body, bytes):
738
+ body = body.decode("utf-8", errors="ignore")
739
+ except Exception:
740
+ pass
741
+
742
+ if body:
743
+ _span_processor.store_body(span.context.span_id, response_body=body)
744
+
745
+ # Also try to get request body from the stream
746
+ request_body = None
747
+ if hasattr(request, 'stream'):
748
+ stream = request.stream
749
+ if hasattr(stream, 'body'):
750
+ request_body = stream.body
751
+ elif hasattr(stream, '_body'):
752
+ request_body = stream._body
753
+
754
+ if request_body:
755
+ if isinstance(request_body, bytes):
756
+ request_body = request_body.decode("utf-8", errors="ignore")
757
+ _span_processor.store_body(span.context.span_id, request_body=request_body)
758
+
759
+ except Exception:
760
+ pass # Best effort
761
+
762
+
763
+ # ═══════════════════════════════════════════════════════════════════════════════
764
+ # httpx body capture (patches Client.send)
765
+ # ═══════════════════════════════════════════════════════════════════════════════
766
+
767
+
768
+ def setup_httpx_body_capture(span_processor: "WorkflowSpanProcessor") -> None:
769
+ """
770
+ Setup httpx body capture using Client.send patching.
771
+
772
+ This is separate from OTel instrumentation because OTel hooks
773
+ receive streams that cannot be safely consumed.
774
+ """
775
+ try:
776
+ import httpx
777
+
778
+ _original_send = httpx.Client.send
779
+ _original_async_send = httpx.AsyncClient.send
780
+
781
+ def _patched_send(self, request, *args, **kwargs):
782
+ # Check if URL should be ignored
783
+ url = str(request.url) if hasattr(request, 'url') else None
784
+ if url and _should_ignore_url(url):
785
+ return _original_send(self, request, *args, **kwargs)
786
+
787
+ # Capture request body BEFORE sending
788
+ request_body = None
789
+ try:
790
+ if hasattr(request, '_content') and request._content:
791
+ request_body = request._content
792
+ if isinstance(request_body, bytes):
793
+ request_body = request_body.decode("utf-8", errors="ignore")
794
+ elif hasattr(request, 'content') and request.content:
795
+ request_body = request.content
796
+ if isinstance(request_body, bytes):
797
+ request_body = request_body.decode("utf-8", errors="ignore")
798
+ except Exception as e:
799
+ logger.debug(f"Failed to capture request body: {e}")
800
+
801
+ response = _original_send(self, request, *args, **kwargs)
802
+
803
+ # Capture response body AFTER receiving (skip binary)
804
+ response_body = None
805
+ content_type = response.headers.get("content-type", "")
806
+ if _is_text_content_type(content_type):
807
+ try:
808
+ response_body = response.text
809
+ except (UnicodeDecodeError, Exception) as e:
810
+ logger.debug(f"Failed to capture response body: {e}")
811
+
812
+ # Store bodies if we have an active span
813
+ try:
814
+ from opentelemetry import trace
815
+
816
+ span = trace.get_current_span()
817
+ if span and hasattr(span, 'context') and span.context.span_id:
818
+ if request_body:
819
+ span_processor.store_body(span.context.span_id, request_body=request_body)
820
+ logger.debug(f"Stored request body for span {span.context.span_id}")
821
+ if response_body:
822
+ span_processor.store_body(span.context.span_id, response_body=response_body)
823
+ logger.debug(f"Stored response body for span {span.context.span_id}")
824
+ except Exception as e:
825
+ logger.debug(f"Failed to store body: {e}")
826
+
827
+ return response
828
+
829
+ async def _patched_async_send(self, request, *args, **kwargs):
830
+ # Check if URL should be ignored
831
+ url = str(request.url) if hasattr(request, 'url') else None
832
+ if url and _should_ignore_url(url):
833
+ return await _original_async_send(self, request, *args, **kwargs)
834
+
835
+ # Capture request body and headers BEFORE sending
836
+ request_body = None
837
+ request_headers = None
838
+ try:
839
+ if hasattr(request, '_content') and request._content:
840
+ request_body = request._content
841
+ if isinstance(request_body, bytes):
842
+ request_body = request_body.decode("utf-8", errors="ignore")
843
+ elif hasattr(request, 'content') and request.content:
844
+ request_body = request.content
845
+ if isinstance(request_body, bytes):
846
+ request_body = request_body.decode("utf-8", errors="ignore")
847
+ # Capture request headers
848
+ if hasattr(request, 'headers') and request.headers:
849
+ request_headers = dict(request.headers)
850
+ except Exception as e:
851
+ logger.debug(f"Failed to capture request body/headers: {e}")
852
+
853
+ # Get current span BEFORE calling original send
854
+ # The OTel httpx instrumentation creates a child span for HTTP call
855
+ from opentelemetry import trace
856
+ parent_span = trace.get_current_span()
857
+
858
+ response = await _original_async_send(self, request, *args, **kwargs)
859
+
860
+ # Capture response body and headers AFTER receiving (skip binary for body)
861
+ response_body = None
862
+ response_headers = None
863
+ content_type = response.headers.get("content-type", "")
864
+ try:
865
+ # Always capture response headers
866
+ if hasattr(response, 'headers') and response.headers:
867
+ response_headers = dict(response.headers)
868
+ # Only capture body for text content types
869
+ if _is_text_content_type(content_type):
870
+ response_body = response.text
871
+ except (UnicodeDecodeError, Exception) as e:
872
+ logger.debug(f"Failed to capture response body: {e}")
873
+
874
+ # Store bodies and headers against parent span (activity span)
875
+ # The HTTP span may have ended by now, but we stored it via hooks
876
+ try:
877
+ if parent_span and hasattr(parent_span, 'context') and parent_span.context.span_id:
878
+ span_id = parent_span.context.span_id
879
+ if request_body:
880
+ span_processor.store_body(span_id, request_body=request_body)
881
+ if response_body:
882
+ span_processor.store_body(span_id, response_body=response_body)
883
+ if request_headers:
884
+ span_processor.store_body(span_id, request_headers=request_headers)
885
+ if response_headers:
886
+ span_processor.store_body(span_id, response_headers=response_headers)
887
+ except Exception:
888
+ pass # Best effort
889
+
890
+ return response
891
+
892
+ httpx.Client.send = _patched_send
893
+ httpx.AsyncClient.send = _patched_async_send
894
+ logger.info("Patched httpx for body capture")
895
+
896
+ except ImportError:
897
+ logger.debug("httpx not available for body capture")
898
+
899
+
900
+ # ═══════════════════════════════════════════════════════════════════════════════
901
+ # urllib3 hooks
902
+ # ═══════════════════════════════════════════════════════════════════════════════
903
+
904
+
905
+ def _urllib3_request_hook(span, pool, request_info) -> None:
906
+ """
907
+ Hook called before urllib3 sends a request.
908
+
909
+ Args:
910
+ span: OTel span
911
+ pool: urllib3.HTTPConnectionPool
912
+ request_info: RequestInfo namedtuple
913
+ """
914
+ if _span_processor is None:
915
+ return
916
+
917
+ try:
918
+ if hasattr(request_info, "body") and request_info.body:
919
+ body = request_info.body
920
+ if isinstance(body, bytes):
921
+ body = body.decode("utf-8", errors="ignore")
922
+ _span_processor.store_body(span.context.span_id, request_body=body)
923
+ except Exception:
924
+ pass
925
+
926
+
927
+ def _urllib3_response_hook(span, pool, response) -> None:
928
+ """
929
+ Hook called after urllib3 receives a response.
930
+
931
+ Args:
932
+ span: OTel span
933
+ pool: urllib3.HTTPConnectionPool
934
+ response: urllib3.HTTPResponse
935
+ """
936
+ if _span_processor is None:
937
+ return
938
+
939
+ try:
940
+ content_type = response.headers.get("content-type", "")
941
+ if _is_text_content_type(content_type):
942
+ body = response.data
943
+ if isinstance(body, bytes):
944
+ body = body.decode("utf-8", errors="ignore")
945
+ if body:
946
+ _span_processor.store_body(span.context.span_id, response_body=body)
947
+ except Exception:
948
+ pass
949
+
950
+
951
+ # ═══════════════════════════════════════════════════════════════════════════════
952
+ # urllib hooks (standard library)
953
+ # NOTE: Response body capture is NOT supported - read() consumes the socket stream
954
+ # ═══════════════════════════════════════════════════════════════════════════════
955
+
956
+
957
+ def _urllib_request_hook(span, request) -> None:
958
+ """Hook called before urllib sends a request."""
959
+ if _span_processor is None:
960
+ return
961
+
962
+ try:
963
+ if request.data:
964
+ body = request.data
965
+ if isinstance(body, bytes):
966
+ body = body.decode("utf-8", errors="ignore")
967
+ _span_processor.store_body(span.context.span_id, request_body=body)
968
+ except Exception:
969
+ pass