openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,468 @@
1
+ # openbox/otel_setup.py
2
+ """
3
+ Setup OpenTelemetry instrumentors with body capture hooks.
4
+
5
+ Bodies are stored in the span processor buffer, NOT in OTel span attributes.
6
+ This keeps sensitive data out of external tracing systems while still
7
+ capturing it for governance evaluation.
8
+
9
+ Supported HTTP libraries:
10
+ - requests
11
+ - httpx (sync + async)
12
+ - urllib3
13
+ - urllib (standard library - request body only)
14
+
15
+ Supported database libraries:
16
+ - psycopg2 (PostgreSQL)
17
+ - asyncpg (PostgreSQL async)
18
+ - mysql-connector-python
19
+ - pymysql
20
+ - sqlite3 (SQLite, stdlib)
21
+ - pymongo (MongoDB)
22
+ - redis
23
+ - sqlalchemy (ORM)
24
+ """
25
+
26
+ import logging
27
+ from typing import TYPE_CHECKING, Any, Optional
28
+
29
+ from . import db_governance_hooks as _db_gov
30
+ from . import hook_governance as _hook_gov
31
+ from .file_governance_hooks import (
32
+ setup_file_io_instrumentation,
33
+ uninstrument_file_io,
34
+ )
35
+ from .http_governance_hooks import (
36
+ _httpx_async_request_hook,
37
+ _httpx_async_response_hook,
38
+ _httpx_request_hook,
39
+ _httpx_response_hook,
40
+ _requests_request_hook,
41
+ _requests_response_hook,
42
+ _urllib3_request_hook,
43
+ _urllib3_response_hook,
44
+ _urllib_request_hook,
45
+ setup_httpx_body_capture,
46
+ )
47
+
48
+ if TYPE_CHECKING:
49
+ from .span_processor import WorkflowSpanProcessor
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # Global state — hooks in sub-modules reference these via late import of this module
54
+ _span_processor: Optional["WorkflowSpanProcessor"] = None
55
+ _ignored_url_prefixes: set[str] = set()
56
+
57
+
58
+ def setup_opentelemetry_for_governance(
59
+ span_processor: "WorkflowSpanProcessor",
60
+ api_url: str,
61
+ api_key: str,
62
+ *,
63
+ ignored_urls: list | None = None,
64
+ instrument_databases: bool = True,
65
+ db_libraries: set[str] | None = None,
66
+ instrument_file_io: bool = False,
67
+ sqlalchemy_engine: Any | None = None,
68
+ api_timeout: float = 30.0,
69
+ on_api_error: str = "fail_open",
70
+ ) -> None:
71
+ """
72
+ Setup OpenTelemetry instrumentors with body capture hooks.
73
+
74
+ This function instruments HTTP, database, and file I/O libraries to:
75
+ 1. Create OTel spans for HTTP requests, database queries, and file operations
76
+ 2. Capture request/response bodies (via hooks that store in span_processor)
77
+ 3. Register the span processor with the OTel tracer provider
78
+
79
+ Args:
80
+ span_processor: The WorkflowSpanProcessor to store bodies in
81
+ ignored_urls: List of URL prefixes to ignore (e.g., OpenBox Core API)
82
+ instrument_databases: Whether to instrument database libraries (default: True)
83
+ db_libraries: Set of database libraries to instrument (None = all available).
84
+ Valid values: "psycopg2", "asyncpg", "mysql", "pymysql",
85
+ "pymongo", "redis", "sqlalchemy"
86
+ instrument_file_io: Whether to instrument file I/O operations (default: False)
87
+ sqlalchemy_engine: Optional SQLAlchemy Engine instance to instrument. Required
88
+ when the engine is created before instrumentation runs (e.g.,
89
+ at module import time). If not provided, only future engines
90
+ created via create_engine() will be instrumented.
91
+ """
92
+ global _span_processor, _ignored_url_prefixes
93
+ _span_processor = span_processor
94
+
95
+ # Set ignored URL prefixes (always include api_url to prevent recursion)
96
+ _ignored_url_prefixes = set(ignored_urls) if ignored_urls else set()
97
+ _ignored_url_prefixes.add(api_url.rstrip("/"))
98
+ logger.info(f"Ignoring URLs with prefixes: {_ignored_url_prefixes}")
99
+
100
+ # Configure governance modules
101
+ _hook_gov.configure(
102
+ api_url, api_key, span_processor,
103
+ api_timeout=api_timeout, on_api_error=on_api_error,
104
+ )
105
+ _db_gov.configure(span_processor)
106
+
107
+ # Register span processor with OTel tracer provider
108
+ # This ensures on_end() is called when spans complete
109
+ from opentelemetry import trace
110
+ from opentelemetry.sdk.trace import TracerProvider
111
+
112
+ provider = trace.get_tracer_provider()
113
+ if not isinstance(provider, TracerProvider):
114
+ # Create a new TracerProvider if none exists
115
+ provider = TracerProvider()
116
+ trace.set_tracer_provider(provider)
117
+
118
+ provider.add_span_processor(span_processor)
119
+ logger.info("Registered WorkflowSpanProcessor with OTel TracerProvider")
120
+
121
+ # Track what was instrumented
122
+ instrumented = []
123
+
124
+ # 1. requests library
125
+ try:
126
+ from opentelemetry.instrumentation.requests import RequestsInstrumentor
127
+
128
+ RequestsInstrumentor().instrument(
129
+ request_hook=_requests_request_hook,
130
+ response_hook=_requests_response_hook,
131
+ )
132
+ instrumented.append("requests")
133
+ logger.info("Instrumented: requests")
134
+ except ImportError:
135
+ logger.debug("requests instrumentation not available")
136
+
137
+ # 2. httpx library (sync + async) - hooks for metadata only
138
+ try:
139
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
140
+
141
+ HTTPXClientInstrumentor().instrument(
142
+ request_hook=_httpx_request_hook,
143
+ response_hook=_httpx_response_hook,
144
+ async_request_hook=_httpx_async_request_hook,
145
+ async_response_hook=_httpx_async_response_hook,
146
+ )
147
+ instrumented.append("httpx")
148
+ logger.info("Instrumented: httpx")
149
+ except ImportError:
150
+ logger.debug("httpx instrumentation not available")
151
+
152
+ # 3. urllib3 library
153
+ try:
154
+ from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
155
+
156
+ URLLib3Instrumentor().instrument(
157
+ request_hook=_urllib3_request_hook,
158
+ response_hook=_urllib3_response_hook,
159
+ )
160
+ instrumented.append("urllib3")
161
+ logger.info("Instrumented: urllib3")
162
+ except ImportError:
163
+ logger.debug("urllib3 instrumentation not available")
164
+
165
+ # 4. urllib (standard library) - request body only, response body cannot be captured
166
+ try:
167
+ from opentelemetry.instrumentation.urllib import URLLibInstrumentor
168
+
169
+ URLLibInstrumentor().instrument(
170
+ request_hook=_urllib_request_hook,
171
+ )
172
+ instrumented.append("urllib")
173
+ logger.info("Instrumented: urllib")
174
+ except ImportError:
175
+ logger.debug("urllib instrumentation not available")
176
+
177
+ # 5. httpx body capture (separate from OTel - patches Client.send)
178
+ setup_httpx_body_capture(span_processor)
179
+
180
+ logger.info(f"OpenTelemetry HTTP instrumentation complete. Instrumented: {instrumented}")
181
+
182
+ # 6. Database instrumentation (optional)
183
+ if sqlalchemy_engine is not None and not instrument_databases:
184
+ logger.warning(
185
+ "sqlalchemy_engine was provided but instrument_databases=False; "
186
+ "engine will not be instrumented"
187
+ )
188
+ if instrument_databases:
189
+ db_instrumented = setup_database_instrumentation(db_libraries, sqlalchemy_engine)
190
+ if db_instrumented:
191
+ instrumented.extend(db_instrumented)
192
+
193
+ # 7. File I/O instrumentation (optional)
194
+ if instrument_file_io:
195
+ if setup_file_io_instrumentation():
196
+ instrumented.append("file_io")
197
+
198
+ logger.info(f"OpenTelemetry governance setup complete. Instrumented: {instrumented}")
199
+
200
+
201
+ def setup_database_instrumentation(
202
+ db_libraries: set[str] | None = None,
203
+ sqlalchemy_engine: Any | None = None,
204
+ ) -> list[str]:
205
+ """
206
+ Setup OpenTelemetry database instrumentors.
207
+
208
+ Database spans will be captured by the WorkflowSpanProcessor (already registered
209
+ with the TracerProvider) and included in governance events.
210
+
211
+ Args:
212
+ db_libraries: Set of library names to instrument. If None, instruments all
213
+ available libraries. Valid values:
214
+ - "psycopg2" (PostgreSQL sync)
215
+ - "asyncpg" (PostgreSQL async)
216
+ - "mysql" (mysql-connector-python)
217
+ - "pymysql"
218
+ - "sqlite3" (SQLite, stdlib)
219
+ - "pymongo" (MongoDB)
220
+ - "redis"
221
+ - "sqlalchemy" (ORM)
222
+ sqlalchemy_engine: Optional SQLAlchemy Engine instance to instrument. When
223
+ provided, registers event listeners on this engine to capture
224
+ queries. Without this, only engines created after this call
225
+ (via patched create_engine) will be instrumented.
226
+
227
+ Returns:
228
+ List of successfully instrumented library names
229
+ """
230
+ instrumented = []
231
+
232
+ # ── pymongo CommandListener first (must register before MongoClient creation) ──
233
+ if db_libraries is None or "pymongo" in db_libraries:
234
+ _db_gov.setup_pymongo_hooks()
235
+
236
+ # ── OTel dbapi instrumentors (governance via CursorTracer patch below) ──
237
+ if db_libraries is None or "psycopg2" in db_libraries:
238
+ try:
239
+ from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
240
+ Psycopg2Instrumentor().instrument()
241
+ instrumented.append("psycopg2")
242
+ logger.info("Instrumented: psycopg2")
243
+ except ImportError:
244
+ logger.debug("psycopg2 OTel instrumentation not available")
245
+
246
+ if db_libraries is None or "asyncpg" in db_libraries:
247
+ try:
248
+ from opentelemetry.instrumentation.asyncpg import AsyncPGInstrumentor
249
+ AsyncPGInstrumentor().instrument()
250
+ instrumented.append("asyncpg")
251
+ logger.info("Instrumented: asyncpg")
252
+ except ImportError:
253
+ logger.debug("asyncpg OTel instrumentation not available")
254
+
255
+ if db_libraries is None or "mysql" in db_libraries:
256
+ try:
257
+ from opentelemetry.instrumentation.mysql import MySQLInstrumentor
258
+ MySQLInstrumentor().instrument()
259
+ instrumented.append("mysql")
260
+ logger.info("Instrumented: mysql")
261
+ except ImportError:
262
+ logger.debug("mysql OTel instrumentation not available")
263
+
264
+ if db_libraries is None or "pymysql" in db_libraries:
265
+ try:
266
+ from opentelemetry.instrumentation.pymysql import PyMySQLInstrumentor
267
+ PyMySQLInstrumentor().instrument()
268
+ instrumented.append("pymysql")
269
+ logger.info("Instrumented: pymysql")
270
+ except ImportError:
271
+ logger.debug("pymysql OTel instrumentation not available")
272
+
273
+ if db_libraries is None or "sqlite3" in db_libraries:
274
+ try:
275
+ from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
276
+ SQLite3Instrumentor().instrument()
277
+ instrumented.append("sqlite3")
278
+ logger.info("Instrumented: sqlite3")
279
+ except ImportError:
280
+ logger.debug("sqlite3 OTel instrumentation not available")
281
+
282
+ # pymongo OTel (CommandListener already registered above)
283
+ if db_libraries is None or "pymongo" in db_libraries:
284
+ try:
285
+ from opentelemetry.instrumentation.pymongo import PymongoInstrumentor
286
+ PymongoInstrumentor().instrument()
287
+ instrumented.append("pymongo")
288
+ logger.info("Instrumented: pymongo")
289
+ except ImportError:
290
+ logger.debug("pymongo OTel instrumentation not available")
291
+
292
+ # redis — pass governance hooks to OTel instrumentor (native support)
293
+ if db_libraries is None or "redis" in db_libraries:
294
+ try:
295
+ from opentelemetry.instrumentation.redis import RedisInstrumentor
296
+
297
+ req_hook, resp_hook = _db_gov.setup_redis_hooks()
298
+ RedisInstrumentor().instrument(
299
+ request_hook=req_hook, response_hook=resp_hook,
300
+ )
301
+ instrumented.append("redis")
302
+ logger.info("Instrumented: redis")
303
+ except ImportError:
304
+ logger.debug("redis instrumentation not available")
305
+
306
+ # sqlalchemy (ORM)
307
+ if (
308
+ sqlalchemy_engine is not None
309
+ and db_libraries is not None
310
+ and "sqlalchemy" not in db_libraries
311
+ ):
312
+ logger.warning(
313
+ "sqlalchemy_engine was provided but 'sqlalchemy' is not in db_libraries; "
314
+ "engine will not be instrumented"
315
+ )
316
+ if db_libraries is None or "sqlalchemy" in db_libraries:
317
+ try:
318
+ from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
319
+
320
+ if sqlalchemy_engine is not None:
321
+ # Validate engine type before passing to instrumentor
322
+ try:
323
+ from sqlalchemy.engine import Engine as _SAEngine
324
+ except ImportError as exc:
325
+ raise TypeError(
326
+ "sqlalchemy_engine was provided but sqlalchemy is not installed"
327
+ ) from exc
328
+ if not isinstance(sqlalchemy_engine, _SAEngine):
329
+ raise TypeError(
330
+ f"sqlalchemy_engine must be a sqlalchemy.engine.Engine instance, "
331
+ f"got {type(sqlalchemy_engine).__name__}"
332
+ )
333
+ # Governance hooks on engine events
334
+ _db_gov.setup_sqlalchemy_hooks(sqlalchemy_engine)
335
+ # Instrument the existing engine directly (registers event listeners)
336
+ SQLAlchemyInstrumentor().instrument(engine=sqlalchemy_engine)
337
+ logger.info("Instrumented: sqlalchemy (existing engine)")
338
+ else:
339
+ # Patch create_engine() for future engines only
340
+ SQLAlchemyInstrumentor().instrument()
341
+ logger.info("Instrumented: sqlalchemy (future engines)")
342
+ instrumented.append("sqlalchemy")
343
+ except ImportError:
344
+ logger.debug("sqlalchemy instrumentation not available")
345
+
346
+ # ── Governance hooks for dbapi libs (must be AFTER instrumentors) ──
347
+ # OTel dbapi instrumentors silently discard request_hook/response_hook kwargs.
348
+ # Instead, we patch CursorTracer.traced_execution to inject governance hooks
349
+ # around the query_method call (runs inside the OTel span context).
350
+ dbapi_libs = {"psycopg2", "mysql", "pymysql", "sqlite3"}
351
+ if any(lib in instrumented for lib in dbapi_libs):
352
+ if _db_gov.install_cursor_tracer_hooks():
353
+ logger.info("CursorTracer governance hooks installed for dbapi libs")
354
+
355
+ # asyncpg uses its own _do_execute (not CursorTracer) — needs separate wrapt hooks
356
+ if "asyncpg" in instrumented:
357
+ _db_gov.install_asyncpg_hooks()
358
+
359
+ if instrumented:
360
+ logger.info(f"Database instrumentation complete. Instrumented: {instrumented}")
361
+ else:
362
+ logger.debug("No database libraries instrumented (none available or installed)")
363
+
364
+ return instrumented
365
+
366
+
367
+ def uninstrument_databases() -> None:
368
+ """Uninstrument all database libraries."""
369
+ try:
370
+ from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor
371
+
372
+ Psycopg2Instrumentor().uninstrument()
373
+ except (ImportError, Exception):
374
+ pass
375
+
376
+ try:
377
+ from opentelemetry.instrumentation.asyncpg import AsyncPGInstrumentor
378
+
379
+ AsyncPGInstrumentor().uninstrument()
380
+ except (ImportError, Exception):
381
+ pass
382
+
383
+ try:
384
+ from opentelemetry.instrumentation.mysql import MySQLInstrumentor
385
+
386
+ MySQLInstrumentor().uninstrument()
387
+ except (ImportError, Exception):
388
+ pass
389
+
390
+ try:
391
+ from opentelemetry.instrumentation.pymysql import PyMySQLInstrumentor
392
+
393
+ PyMySQLInstrumentor().uninstrument()
394
+ except (ImportError, Exception):
395
+ pass
396
+
397
+ try:
398
+ from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
399
+
400
+ SQLite3Instrumentor().uninstrument()
401
+ except (ImportError, Exception):
402
+ pass
403
+
404
+ try:
405
+ from opentelemetry.instrumentation.pymongo import PymongoInstrumentor
406
+
407
+ PymongoInstrumentor().uninstrument()
408
+ except (ImportError, Exception):
409
+ pass
410
+
411
+ try:
412
+ from opentelemetry.instrumentation.redis import RedisInstrumentor
413
+
414
+ RedisInstrumentor().uninstrument()
415
+ except (ImportError, Exception):
416
+ pass
417
+
418
+ try:
419
+ from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
420
+
421
+ SQLAlchemyInstrumentor().uninstrument()
422
+ except (ImportError, Exception):
423
+ pass
424
+
425
+ # Clean up DB governance hooks
426
+ _db_gov.uninstrument_all()
427
+
428
+
429
+ def uninstrument_all() -> None:
430
+ """Uninstrument all HTTP and database libraries."""
431
+ global _span_processor, _ignored_url_prefixes
432
+ _span_processor = None
433
+ _ignored_url_prefixes = set()
434
+
435
+ # Uninstrument HTTP libraries
436
+ try:
437
+ from opentelemetry.instrumentation.requests import RequestsInstrumentor
438
+
439
+ RequestsInstrumentor().uninstrument()
440
+ except (ImportError, Exception):
441
+ pass
442
+
443
+ try:
444
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
445
+
446
+ HTTPXClientInstrumentor().uninstrument()
447
+ except (ImportError, Exception):
448
+ pass
449
+
450
+ try:
451
+ from opentelemetry.instrumentation.urllib3 import URLLib3Instrumentor
452
+
453
+ URLLib3Instrumentor().uninstrument()
454
+ except (ImportError, Exception):
455
+ pass
456
+
457
+ try:
458
+ from opentelemetry.instrumentation.urllib import URLLibInstrumentor
459
+
460
+ URLLibInstrumentor().uninstrument()
461
+ except (ImportError, Exception):
462
+ pass
463
+
464
+ # Uninstrument database libraries
465
+ uninstrument_databases()
466
+
467
+ # Uninstrument file I/O
468
+ uninstrument_file_io()