openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openbox_langgraph/__init__.py +130 -0
- openbox_langgraph/client.py +358 -0
- openbox_langgraph/config.py +264 -0
- openbox_langgraph/db_governance_hooks.py +897 -0
- openbox_langgraph/errors.py +114 -0
- openbox_langgraph/file_governance_hooks.py +413 -0
- openbox_langgraph/hitl.py +88 -0
- openbox_langgraph/hook_governance.py +397 -0
- openbox_langgraph/http_governance_hooks.py +695 -0
- openbox_langgraph/langgraph_handler.py +1616 -0
- openbox_langgraph/otel_setup.py +468 -0
- openbox_langgraph/span_processor.py +253 -0
- openbox_langgraph/tracing.py +352 -0
- openbox_langgraph/types.py +485 -0
- openbox_langgraph/verdict_handler.py +203 -0
- openbox_langgraph_sdk_python-0.1.0.dist-info/METADATA +492 -0
- openbox_langgraph_sdk_python-0.1.0.dist-info/RECORD +18 -0
- openbox_langgraph_sdk_python-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
# openbox/http_governance_hooks.py
|
|
2
|
+
"""HTTP governance hooks for requests, httpx, urllib3, and urllib.
|
|
3
|
+
|
|
4
|
+
Captures request/response bodies and sends hook-level governance
|
|
5
|
+
evaluations (started/completed) for every HTTP operation during
|
|
6
|
+
activity execution.
|
|
7
|
+
|
|
8
|
+
Each library's OTel instrumentor calls these hooks. The hooks:
|
|
9
|
+
1. Extract request/response metadata
|
|
10
|
+
2. Build a span_data dict via _build_http_span_data()
|
|
11
|
+
3. Call hook_governance.evaluate_sync/async() for governance evaluation
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import contextvars
|
|
17
|
+
import logging
|
|
18
|
+
import time as _time
|
|
19
|
+
from collections import OrderedDict
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
from . import hook_governance as _hook_gov
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from .span_processor import WorkflowSpanProcessor
|
|
26
|
+
|
|
27
|
+
# Late import: otel_setup imports us, so we get a partially-loaded module ref.
|
|
28
|
+
# That's fine — we only access _otel._span_processor and _otel._ignored_url_prefixes
|
|
29
|
+
# at function call time, when both modules are fully loaded.
|
|
30
|
+
from . import otel_setup as _otel
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# ContextVar to pass HTTP child span from OTel request hooks to _patched_send.
|
|
35
|
+
# Request hooks receive the correct HTTP span; we store it here so _patched_send
|
|
36
|
+
# can use it after _original_send() returns (when the HTTP span has ended and
|
|
37
|
+
# trace.get_current_span() would return the parent activity span).
|
|
38
|
+
_httpx_http_span: contextvars.ContextVar = contextvars.ContextVar(
|
|
39
|
+
'_httpx_http_span', default=None
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Timing for HTTP hooks: span_id → perf_counter start time
|
|
43
|
+
# Used by request_hook (started) to pass timing to response_hook (completed)
|
|
44
|
+
# OrderedDict for FIFO eviction — .clear() would drop in-flight timings
|
|
45
|
+
_http_hook_timings: OrderedDict[int, float] = OrderedDict()
|
|
46
|
+
_HTTP_HOOK_TIMINGS_MAX = 1000
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _record_timing(span_id: int) -> None:
|
|
50
|
+
"""Record start time for a span, evicting oldest entries if at capacity."""
|
|
51
|
+
if len(_http_hook_timings) >= _HTTP_HOOK_TIMINGS_MAX:
|
|
52
|
+
# Evict oldest 10% to amortize eviction cost
|
|
53
|
+
for _ in range(max(1, _HTTP_HOOK_TIMINGS_MAX // 10)):
|
|
54
|
+
if _http_hook_timings:
|
|
55
|
+
_http_hook_timings.popitem(last=False)
|
|
56
|
+
_http_hook_timings[span_id] = _time.perf_counter()
|
|
57
|
+
|
|
58
|
+
# Text content types that are safe to capture as body
|
|
59
|
+
_TEXT_CONTENT_TYPES = (
|
|
60
|
+
"text/",
|
|
61
|
+
"application/json",
|
|
62
|
+
"application/xml",
|
|
63
|
+
"application/javascript",
|
|
64
|
+
"application/x-www-form-urlencoded",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
69
|
+
# Shared HTTP utilities
|
|
70
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _should_ignore_url(url: str) -> bool:
|
|
74
|
+
"""Check if URL should be ignored (e.g., OpenBox Core API)."""
|
|
75
|
+
if not url:
|
|
76
|
+
return False
|
|
77
|
+
for prefix in _otel._ignored_url_prefixes:
|
|
78
|
+
if url.startswith(prefix):
|
|
79
|
+
return True
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _is_text_content_type(content_type: str | None) -> bool:
|
|
84
|
+
"""Check if content type indicates text content (safe to decode)."""
|
|
85
|
+
if not content_type:
|
|
86
|
+
return True # Assume text if no content-type
|
|
87
|
+
content_type = content_type.lower().split(";")[0].strip()
|
|
88
|
+
return any(content_type.startswith(t) for t in _TEXT_CONTENT_TYPES)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _build_http_span_data(
|
|
92
|
+
span,
|
|
93
|
+
http_method: str,
|
|
94
|
+
http_url: str,
|
|
95
|
+
stage: str,
|
|
96
|
+
request_body: str | None = None,
|
|
97
|
+
request_headers: dict | None = None,
|
|
98
|
+
response_body: str | None = None,
|
|
99
|
+
response_headers: dict | None = None,
|
|
100
|
+
http_status_code: int | None = None,
|
|
101
|
+
duration_ms: float | None = None,
|
|
102
|
+
) -> dict:
|
|
103
|
+
"""Build span data dict for an HTTP request (used by governance hooks).
|
|
104
|
+
|
|
105
|
+
attributes: OTel-original only. All custom data at root level.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
span_id_hex, trace_id_hex, parent_span_id = _hook_gov.extract_span_context(span)
|
|
110
|
+
attrs = dict(span.attributes) if hasattr(span, 'attributes') and span.attributes else {}
|
|
111
|
+
|
|
112
|
+
now_ns = _time.time_ns()
|
|
113
|
+
duration_ns = int(duration_ms * 1_000_000) if duration_ms else None
|
|
114
|
+
end_time = now_ns if stage == "completed" else None
|
|
115
|
+
start_time = (now_ns - duration_ns) if duration_ns else now_ns
|
|
116
|
+
|
|
117
|
+
error = None
|
|
118
|
+
if http_status_code is not None and http_status_code >= 400:
|
|
119
|
+
error = f"HTTP {http_status_code}"
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"span_id": span_id_hex,
|
|
123
|
+
"trace_id": trace_id_hex,
|
|
124
|
+
"parent_span_id": parent_span_id,
|
|
125
|
+
"name": span.name if hasattr(span, 'name') and span.name else f"HTTP {http_method}",
|
|
126
|
+
"kind": "CLIENT",
|
|
127
|
+
"stage": stage,
|
|
128
|
+
"start_time": start_time,
|
|
129
|
+
"end_time": end_time,
|
|
130
|
+
"duration_ns": duration_ns,
|
|
131
|
+
"attributes": attrs,
|
|
132
|
+
"status": {"code": "ERROR" if error else "UNSET", "description": error},
|
|
133
|
+
"events": [],
|
|
134
|
+
# Hook type identification
|
|
135
|
+
"hook_type": "http_request",
|
|
136
|
+
# HTTP-specific root fields
|
|
137
|
+
"http_method": http_method,
|
|
138
|
+
"http_url": http_url,
|
|
139
|
+
"request_body": request_body,
|
|
140
|
+
"request_headers": request_headers,
|
|
141
|
+
"response_body": response_body,
|
|
142
|
+
"response_headers": response_headers,
|
|
143
|
+
"http_status_code": http_status_code,
|
|
144
|
+
"error": error,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
149
|
+
# requests hooks
|
|
150
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _requests_request_hook(span, request) -> None:
|
|
154
|
+
"""Hook called before requests library sends a request.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
span: OTel span
|
|
158
|
+
request: requests.PreparedRequest
|
|
159
|
+
"""
|
|
160
|
+
if _otel._span_processor is None:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
body = None
|
|
164
|
+
try:
|
|
165
|
+
if request.body:
|
|
166
|
+
body = request.body
|
|
167
|
+
if isinstance(body, bytes):
|
|
168
|
+
body = body.decode("utf-8", errors="ignore")
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
# Hook-level governance evaluation
|
|
173
|
+
if _hook_gov.is_configured():
|
|
174
|
+
|
|
175
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
176
|
+
if url and not _should_ignore_url(url):
|
|
177
|
+
if hasattr(span, 'context') and hasattr(span.context, 'span_id'):
|
|
178
|
+
_record_timing(span.context.span_id)
|
|
179
|
+
headers = (
|
|
180
|
+
dict(request.headers) if hasattr(request, "headers") and request.headers else None
|
|
181
|
+
)
|
|
182
|
+
method = request.method or "UNKNOWN"
|
|
183
|
+
span_data = _build_http_span_data(
|
|
184
|
+
span, method, url, "started", request_body=body, request_headers=headers
|
|
185
|
+
)
|
|
186
|
+
_hook_gov.evaluate_sync(
|
|
187
|
+
span,
|
|
188
|
+
identifier=url,
|
|
189
|
+
span_data=span_data,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _requests_response_hook(span, request, response) -> None:
|
|
194
|
+
"""Hook called after requests library receives a response.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
span: OTel span
|
|
198
|
+
request: requests.PreparedRequest
|
|
199
|
+
response: requests.Response
|
|
200
|
+
"""
|
|
201
|
+
if _otel._span_processor is None:
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
resp_body = None
|
|
205
|
+
resp_headers = None
|
|
206
|
+
try:
|
|
207
|
+
resp_headers = (
|
|
208
|
+
dict(response.headers) if hasattr(response, "headers") and response.headers else None
|
|
209
|
+
)
|
|
210
|
+
content_type = response.headers.get("content-type", "")
|
|
211
|
+
if _is_text_content_type(content_type):
|
|
212
|
+
resp_body = response.text
|
|
213
|
+
except Exception:
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
# Hook-level governance evaluation (response stage)
|
|
217
|
+
if _hook_gov.is_configured():
|
|
218
|
+
|
|
219
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
220
|
+
if url and not _should_ignore_url(url):
|
|
221
|
+
# Compute duration from started hook timing
|
|
222
|
+
_dur_ms = None
|
|
223
|
+
if hasattr(span, 'context') and hasattr(span.context, 'span_id'):
|
|
224
|
+
_start = _http_hook_timings.pop(span.context.span_id, None)
|
|
225
|
+
if _start:
|
|
226
|
+
_dur_ms = (_time.perf_counter() - _start) * 1000
|
|
227
|
+
req_headers = (
|
|
228
|
+
dict(request.headers) if hasattr(request, "headers") and request.headers else None
|
|
229
|
+
)
|
|
230
|
+
req_body = None
|
|
231
|
+
try:
|
|
232
|
+
if request.body:
|
|
233
|
+
req_body = request.body
|
|
234
|
+
if isinstance(req_body, bytes):
|
|
235
|
+
req_body = req_body.decode("utf-8", errors="ignore")
|
|
236
|
+
except Exception:
|
|
237
|
+
pass
|
|
238
|
+
method = request.method or "UNKNOWN"
|
|
239
|
+
span_data = _build_http_span_data(
|
|
240
|
+
span, method, url, "completed",
|
|
241
|
+
request_body=req_body, request_headers=req_headers,
|
|
242
|
+
response_body=resp_body, response_headers=resp_headers,
|
|
243
|
+
http_status_code=getattr(response, 'status_code', None),
|
|
244
|
+
duration_ms=_dur_ms,
|
|
245
|
+
)
|
|
246
|
+
_hook_gov.evaluate_sync(
|
|
247
|
+
span,
|
|
248
|
+
identifier=url,
|
|
249
|
+
span_data=span_data,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
254
|
+
# httpx hooks
|
|
255
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _httpx_request_hook(span, request) -> None:
|
|
259
|
+
"""Hook called before httpx sends a request.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
span: OTel span
|
|
263
|
+
request: RequestInfo namedtuple with (method, url, headers, stream, extensions)
|
|
264
|
+
"""
|
|
265
|
+
if _otel._span_processor is None:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
# Check if URL should be ignored
|
|
269
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
270
|
+
if url and _should_ignore_url(url):
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
body = None
|
|
274
|
+
request_headers = None
|
|
275
|
+
try:
|
|
276
|
+
# Capture request headers from RequestInfo namedtuple
|
|
277
|
+
if hasattr(request, 'headers') and request.headers:
|
|
278
|
+
request_headers = dict(request.headers)
|
|
279
|
+
|
|
280
|
+
# Try to get request body - RequestInfo has a 'stream' attribute
|
|
281
|
+
# httpx ByteStream stores body in _stream (not body or _body)
|
|
282
|
+
if hasattr(request, 'stream'):
|
|
283
|
+
stream = request.stream
|
|
284
|
+
if hasattr(stream, '_stream') and isinstance(stream._stream, bytes):
|
|
285
|
+
body = stream._stream
|
|
286
|
+
elif hasattr(stream, 'body'):
|
|
287
|
+
body = stream.body
|
|
288
|
+
elif hasattr(stream, '_body'):
|
|
289
|
+
body = stream._body
|
|
290
|
+
elif isinstance(stream, bytes):
|
|
291
|
+
body = stream
|
|
292
|
+
|
|
293
|
+
# Fallback: Direct content attribute (for httpx.Request objects)
|
|
294
|
+
if not body and hasattr(request, '_content') and request._content:
|
|
295
|
+
body = request._content
|
|
296
|
+
|
|
297
|
+
if not body and hasattr(request, 'content'):
|
|
298
|
+
try:
|
|
299
|
+
content = request.content
|
|
300
|
+
if content:
|
|
301
|
+
body = content
|
|
302
|
+
except Exception:
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
if body:
|
|
306
|
+
if isinstance(body, bytes):
|
|
307
|
+
body = body.decode("utf-8", errors="ignore")
|
|
308
|
+
elif not isinstance(body, str):
|
|
309
|
+
body = str(body)
|
|
310
|
+
|
|
311
|
+
except Exception:
|
|
312
|
+
pass # Best effort
|
|
313
|
+
|
|
314
|
+
# Store HTTP child span so _patched_send can use it for governance
|
|
315
|
+
_httpx_http_span.set(span)
|
|
316
|
+
|
|
317
|
+
# Hook-level governance evaluation
|
|
318
|
+
if _hook_gov.is_configured() and url:
|
|
319
|
+
method = str(request.method) if hasattr(request, "method") else "UNKNOWN"
|
|
320
|
+
req_body = body if isinstance(body, str) else None
|
|
321
|
+
span_data = _build_http_span_data(
|
|
322
|
+
span, method, url, "started", request_body=req_body, request_headers=request_headers
|
|
323
|
+
)
|
|
324
|
+
_hook_gov.evaluate_sync(
|
|
325
|
+
span,
|
|
326
|
+
identifier=url,
|
|
327
|
+
span_data=span_data,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _httpx_response_hook(span, request, response) -> None:
|
|
332
|
+
"""No-op — completed governance is handled in _patched_send."""
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
async def _httpx_async_request_hook(span, request) -> None:
|
|
336
|
+
"""Async version of request hook with async governance evaluation."""
|
|
337
|
+
if _otel._span_processor is None:
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
# Check if URL should be ignored
|
|
341
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
342
|
+
if url and _should_ignore_url(url):
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
body = None
|
|
346
|
+
request_headers = None
|
|
347
|
+
try:
|
|
348
|
+
# Capture request headers
|
|
349
|
+
if hasattr(request, 'headers') and request.headers:
|
|
350
|
+
request_headers = dict(request.headers)
|
|
351
|
+
|
|
352
|
+
# Try to get request body — httpx ByteStream stores body in _stream
|
|
353
|
+
if hasattr(request, 'stream'):
|
|
354
|
+
stream = request.stream
|
|
355
|
+
if hasattr(stream, '_stream') and isinstance(stream._stream, bytes):
|
|
356
|
+
body = stream._stream
|
|
357
|
+
elif hasattr(stream, 'body'):
|
|
358
|
+
body = stream.body
|
|
359
|
+
elif hasattr(stream, '_body'):
|
|
360
|
+
body = stream._body
|
|
361
|
+
elif isinstance(stream, bytes):
|
|
362
|
+
body = stream
|
|
363
|
+
|
|
364
|
+
if not body and hasattr(request, '_content') and request._content:
|
|
365
|
+
body = request._content
|
|
366
|
+
|
|
367
|
+
if not body and hasattr(request, 'content'):
|
|
368
|
+
try:
|
|
369
|
+
content = request.content
|
|
370
|
+
if content:
|
|
371
|
+
body = content
|
|
372
|
+
except Exception:
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
if body:
|
|
376
|
+
if isinstance(body, bytes):
|
|
377
|
+
body = body.decode("utf-8", errors="ignore")
|
|
378
|
+
elif not isinstance(body, str):
|
|
379
|
+
body = str(body)
|
|
380
|
+
|
|
381
|
+
except Exception:
|
|
382
|
+
pass # Best effort
|
|
383
|
+
|
|
384
|
+
# Store HTTP child span so _patched_async_send can use it for governance
|
|
385
|
+
_httpx_http_span.set(span)
|
|
386
|
+
|
|
387
|
+
# Async hook-level governance evaluation
|
|
388
|
+
if _hook_gov.is_configured() and url:
|
|
389
|
+
method = str(request.method) if hasattr(request, "method") else "UNKNOWN"
|
|
390
|
+
req_body = body if isinstance(body, str) else None
|
|
391
|
+
span_data = _build_http_span_data(
|
|
392
|
+
span, method, url, "started", request_body=req_body, request_headers=request_headers
|
|
393
|
+
)
|
|
394
|
+
await _hook_gov.evaluate_async(
|
|
395
|
+
span,
|
|
396
|
+
identifier=url,
|
|
397
|
+
span_data=span_data,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
async def _httpx_async_response_hook(span, request, response) -> None:
|
|
402
|
+
"""No-op — completed governance is handled in _patched_async_send."""
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
406
|
+
# httpx body capture (patches Client.send)
|
|
407
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _capture_httpx_request_data(request) -> tuple:
|
|
411
|
+
"""Extract request body and headers from an httpx request.
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
(request_body, request_headers) tuple. Either may be None.
|
|
415
|
+
"""
|
|
416
|
+
request_body = None
|
|
417
|
+
request_headers = None
|
|
418
|
+
try:
|
|
419
|
+
if hasattr(request, '_content') and request._content:
|
|
420
|
+
request_body = request._content
|
|
421
|
+
if isinstance(request_body, bytes):
|
|
422
|
+
request_body = request_body.decode("utf-8", errors="ignore")
|
|
423
|
+
elif hasattr(request, 'content') and request.content:
|
|
424
|
+
request_body = request.content
|
|
425
|
+
if isinstance(request_body, bytes):
|
|
426
|
+
request_body = request_body.decode("utf-8", errors="ignore")
|
|
427
|
+
if hasattr(request, 'headers') and request.headers:
|
|
428
|
+
request_headers = dict(request.headers)
|
|
429
|
+
except Exception as e:
|
|
430
|
+
logger.debug(f"Failed to capture request body/headers: {e}")
|
|
431
|
+
return request_body, request_headers
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _capture_httpx_response_data(response) -> tuple:
|
|
435
|
+
"""Extract response body and headers from an httpx response.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
(response_body, response_headers) tuple. Either may be None.
|
|
439
|
+
"""
|
|
440
|
+
response_body = None
|
|
441
|
+
response_headers = None
|
|
442
|
+
try:
|
|
443
|
+
content_type = (
|
|
444
|
+
response.headers.get("content-type", "")
|
|
445
|
+
if hasattr(response, "headers") and response.headers
|
|
446
|
+
else ""
|
|
447
|
+
)
|
|
448
|
+
except Exception:
|
|
449
|
+
content_type = ""
|
|
450
|
+
try:
|
|
451
|
+
if hasattr(response, 'headers') and response.headers:
|
|
452
|
+
response_headers = dict(response.headers)
|
|
453
|
+
if _is_text_content_type(content_type):
|
|
454
|
+
# Only access body if stream has been read (avoid StreamNotRead error)
|
|
455
|
+
if hasattr(response, 'is_stream_consumed') and not response.is_stream_consumed:
|
|
456
|
+
pass # streaming response — body not available yet
|
|
457
|
+
elif hasattr(response, '_content') and response._content:
|
|
458
|
+
body = response._content
|
|
459
|
+
response_body = (
|
|
460
|
+
body.decode("utf-8", errors="ignore") if isinstance(body, bytes) else str(body)
|
|
461
|
+
)
|
|
462
|
+
else:
|
|
463
|
+
response_body = response.text
|
|
464
|
+
except (UnicodeDecodeError, Exception) as e:
|
|
465
|
+
logger.debug(f"Failed to capture response body: {e}")
|
|
466
|
+
return response_body, response_headers
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _get_httpx_http_span():
|
|
470
|
+
"""Retrieve and reset the HTTP span stored by request hooks.
|
|
471
|
+
|
|
472
|
+
Falls back to the current OTel span if no stored span is found.
|
|
473
|
+
"""
|
|
474
|
+
http_span = _httpx_http_span.get(None)
|
|
475
|
+
_httpx_http_span.set(None)
|
|
476
|
+
if http_span is None:
|
|
477
|
+
from opentelemetry import trace
|
|
478
|
+
http_span = trace.get_current_span()
|
|
479
|
+
return http_span
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _prepare_completed_governance(http_span, request, url, request_body, request_headers,
|
|
483
|
+
response_body, response_headers, status_code, duration_ms=None):
|
|
484
|
+
"""Build 'completed' governance args. Returns tuple or None if not applicable."""
|
|
485
|
+
if not (_hook_gov.is_configured() and url and http_span):
|
|
486
|
+
return None
|
|
487
|
+
method = str(request.method) if hasattr(request, 'method') else "UNKNOWN"
|
|
488
|
+
span_data = _build_http_span_data(
|
|
489
|
+
http_span, method, url, "completed",
|
|
490
|
+
request_body=request_body, request_headers=request_headers,
|
|
491
|
+
response_body=response_body, response_headers=response_headers,
|
|
492
|
+
http_status_code=status_code, duration_ms=duration_ms,
|
|
493
|
+
)
|
|
494
|
+
return http_span, url, span_data
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def setup_httpx_body_capture(span_processor: WorkflowSpanProcessor) -> None:
|
|
498
|
+
"""Setup httpx body capture using Client.send patching.
|
|
499
|
+
|
|
500
|
+
This is separate from OTel instrumentation because OTel hooks
|
|
501
|
+
receive streams that cannot be safely consumed.
|
|
502
|
+
"""
|
|
503
|
+
try:
|
|
504
|
+
import httpx
|
|
505
|
+
|
|
506
|
+
_original_send = httpx.Client.send
|
|
507
|
+
_original_async_send = httpx.AsyncClient.send
|
|
508
|
+
|
|
509
|
+
def _patched_send(self, request, *args, **kwargs):
|
|
510
|
+
|
|
511
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
512
|
+
if url and _should_ignore_url(url):
|
|
513
|
+
return _original_send(self, request, *args, **kwargs)
|
|
514
|
+
|
|
515
|
+
request_body, request_headers = _capture_httpx_request_data(request)
|
|
516
|
+
_start = _time.perf_counter()
|
|
517
|
+
response = _original_send(self, request, *args, **kwargs)
|
|
518
|
+
_dur_ms = (_time.perf_counter() - _start) * 1000
|
|
519
|
+
http_span = _get_httpx_http_span()
|
|
520
|
+
response_body, response_headers = _capture_httpx_response_data(response)
|
|
521
|
+
|
|
522
|
+
gov_args = _prepare_completed_governance(
|
|
523
|
+
http_span, request, url, request_body, request_headers,
|
|
524
|
+
response_body, response_headers, getattr(response, 'status_code', None),
|
|
525
|
+
duration_ms=_dur_ms,
|
|
526
|
+
)
|
|
527
|
+
if gov_args:
|
|
528
|
+
_hook_gov.evaluate_sync(gov_args[0], identifier=gov_args[1], span_data=gov_args[2])
|
|
529
|
+
return response
|
|
530
|
+
|
|
531
|
+
async def _patched_async_send(self, request, *args, **kwargs):
|
|
532
|
+
|
|
533
|
+
url = str(request.url) if hasattr(request, 'url') else None
|
|
534
|
+
if url and _should_ignore_url(url):
|
|
535
|
+
return await _original_async_send(self, request, *args, **kwargs)
|
|
536
|
+
|
|
537
|
+
request_body, request_headers = _capture_httpx_request_data(request)
|
|
538
|
+
_start = _time.perf_counter()
|
|
539
|
+
response = await _original_async_send(self, request, *args, **kwargs)
|
|
540
|
+
_dur_ms = (_time.perf_counter() - _start) * 1000
|
|
541
|
+
http_span = _get_httpx_http_span()
|
|
542
|
+
response_body, response_headers = _capture_httpx_response_data(response)
|
|
543
|
+
|
|
544
|
+
gov_args = _prepare_completed_governance(
|
|
545
|
+
http_span, request, url, request_body, request_headers,
|
|
546
|
+
response_body, response_headers, getattr(response, 'status_code', None),
|
|
547
|
+
duration_ms=_dur_ms,
|
|
548
|
+
)
|
|
549
|
+
if gov_args:
|
|
550
|
+
await _hook_gov.evaluate_async(
|
|
551
|
+
gov_args[0], identifier=gov_args[1], span_data=gov_args[2]
|
|
552
|
+
)
|
|
553
|
+
return response
|
|
554
|
+
|
|
555
|
+
httpx.Client.send = _patched_send
|
|
556
|
+
httpx.AsyncClient.send = _patched_async_send
|
|
557
|
+
logger.info("Patched httpx for body capture")
|
|
558
|
+
|
|
559
|
+
except ImportError:
|
|
560
|
+
logger.debug("httpx not available for body capture")
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
564
|
+
# urllib3 hooks
|
|
565
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def _urllib3_request_hook(span, pool, request_info) -> None:
|
|
569
|
+
"""Hook called before urllib3 sends a request.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
span: OTel span
|
|
573
|
+
pool: urllib3.HTTPConnectionPool
|
|
574
|
+
request_info: RequestInfo namedtuple
|
|
575
|
+
"""
|
|
576
|
+
if _otel._span_processor is None:
|
|
577
|
+
return
|
|
578
|
+
|
|
579
|
+
body = None
|
|
580
|
+
try:
|
|
581
|
+
if hasattr(request_info, "body") and request_info.body:
|
|
582
|
+
body = request_info.body
|
|
583
|
+
if isinstance(body, bytes):
|
|
584
|
+
body = body.decode("utf-8", errors="ignore")
|
|
585
|
+
except Exception:
|
|
586
|
+
pass
|
|
587
|
+
|
|
588
|
+
# Hook-level governance evaluation
|
|
589
|
+
if _hook_gov.is_configured():
|
|
590
|
+
# Reconstruct URL from pool and request_info
|
|
591
|
+
scheme = getattr(pool, 'scheme', 'http')
|
|
592
|
+
host = getattr(pool, 'host', 'unknown')
|
|
593
|
+
port = getattr(pool, 'port', None)
|
|
594
|
+
url_path = getattr(request_info, 'url', getattr(request_info, 'request_url', '/'))
|
|
595
|
+
if port and port not in (80, 443):
|
|
596
|
+
url = f"{scheme}://{host}:{port}{url_path}"
|
|
597
|
+
else:
|
|
598
|
+
url = f"{scheme}://{host}{url_path}"
|
|
599
|
+
|
|
600
|
+
if not _should_ignore_url(url):
|
|
601
|
+
if hasattr(span, 'context') and hasattr(span.context, 'span_id'):
|
|
602
|
+
_record_timing(span.context.span_id)
|
|
603
|
+
method = getattr(request_info, 'method', 'UNKNOWN')
|
|
604
|
+
headers = (
|
|
605
|
+
dict(request_info.headers)
|
|
606
|
+
if hasattr(request_info, "headers") and request_info.headers
|
|
607
|
+
else None
|
|
608
|
+
)
|
|
609
|
+
req_body = body if isinstance(body, str) else None
|
|
610
|
+
span_data = _build_http_span_data(
|
|
611
|
+
span, method, url, "started", request_body=req_body, request_headers=headers
|
|
612
|
+
)
|
|
613
|
+
_hook_gov.evaluate_sync(
|
|
614
|
+
span,
|
|
615
|
+
identifier=url,
|
|
616
|
+
span_data=span_data,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _urllib3_response_hook(span, pool, response) -> None:
|
|
621
|
+
"""Hook called after urllib3 receives a response.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
span: OTel span
|
|
625
|
+
pool: urllib3.HTTPConnectionPool
|
|
626
|
+
response: urllib3.HTTPResponse
|
|
627
|
+
"""
|
|
628
|
+
if _otel._span_processor is None:
|
|
629
|
+
return
|
|
630
|
+
|
|
631
|
+
resp_body = None
|
|
632
|
+
resp_headers = None
|
|
633
|
+
try:
|
|
634
|
+
resp_headers = (
|
|
635
|
+
dict(response.headers) if hasattr(response, "headers") and response.headers else None
|
|
636
|
+
)
|
|
637
|
+
content_type = response.headers.get("content-type", "")
|
|
638
|
+
if _is_text_content_type(content_type):
|
|
639
|
+
body = response.data
|
|
640
|
+
if isinstance(body, bytes):
|
|
641
|
+
body = body.decode("utf-8", errors="ignore")
|
|
642
|
+
if body:
|
|
643
|
+
resp_body = body
|
|
644
|
+
except Exception:
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
# Hook-level governance evaluation (response stage)
|
|
648
|
+
if _hook_gov.is_configured():
|
|
649
|
+
# Reconstruct URL from pool
|
|
650
|
+
scheme = getattr(pool, 'scheme', 'http')
|
|
651
|
+
host = getattr(pool, 'host', 'unknown')
|
|
652
|
+
port = getattr(pool, 'port', None)
|
|
653
|
+
if port and port not in (80, 443):
|
|
654
|
+
url = f"{scheme}://{host}:{port}/"
|
|
655
|
+
else:
|
|
656
|
+
url = f"{scheme}://{host}/"
|
|
657
|
+
|
|
658
|
+
if not _should_ignore_url(url):
|
|
659
|
+
|
|
660
|
+
_dur_ms = None
|
|
661
|
+
if hasattr(span, 'context') and hasattr(span.context, 'span_id'):
|
|
662
|
+
_start = _http_hook_timings.pop(span.context.span_id, None)
|
|
663
|
+
if _start:
|
|
664
|
+
_dur_ms = (_time.perf_counter() - _start) * 1000
|
|
665
|
+
status_code = getattr(response, 'status', None)
|
|
666
|
+
span_data = _build_http_span_data(
|
|
667
|
+
span, "UNKNOWN", url, "completed",
|
|
668
|
+
response_body=resp_body, response_headers=resp_headers,
|
|
669
|
+
http_status_code=status_code, duration_ms=_dur_ms,
|
|
670
|
+
)
|
|
671
|
+
_hook_gov.evaluate_sync(
|
|
672
|
+
span,
|
|
673
|
+
identifier=url,
|
|
674
|
+
span_data=span_data,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
679
|
+
# urllib hooks (standard library)
|
|
680
|
+
# NOTE: Response body capture is NOT supported - read() consumes the socket stream
|
|
681
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def _urllib_request_hook(span, request) -> None:
|
|
685
|
+
"""Hook called before urllib sends a request."""
|
|
686
|
+
if _otel._span_processor is None:
|
|
687
|
+
return
|
|
688
|
+
|
|
689
|
+
try:
|
|
690
|
+
if request.data:
|
|
691
|
+
body = request.data
|
|
692
|
+
if isinstance(body, bytes):
|
|
693
|
+
body = body.decode("utf-8", errors="ignore")
|
|
694
|
+
except Exception:
|
|
695
|
+
pass
|