openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,897 @@
1
+ # openbox/db_governance_hooks.py
2
+ """Hook-level governance for database operations.
3
+
4
+ Intercepts DB queries at 'started' (pre-query) and 'completed' (post-query)
5
+ stages, sending governance evaluations to OpenBox Core via hook_governance.
6
+
7
+ Supported libraries:
8
+ - All dbapi-based (psycopg2, asyncpg, mysql, pymysql) via CursorTracer patch
9
+ - pymongo (CommandListener monitoring API)
10
+ - redis (native OTel request_hook/response_hook)
11
+ - sqlalchemy (before/after_cursor_execute events)
12
+
13
+ Architecture for dbapi libs: After OTel instrumentors wrap psycopg2.connect()
14
+ etc., we monkey-patch CursorTracer.traced_execution to inject governance
15
+ hooks around the query_method call (which runs inside the OTel span context).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import threading
22
+ import time
23
+ from collections.abc import Callable
24
+ from typing import TYPE_CHECKING, Any
25
+
26
+ from opentelemetry import trace as otel_trace
27
+
28
+ if TYPE_CHECKING:
29
+ from .span_processor import WorkflowSpanProcessor
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Track installed wrapt patches (informational — wrapt patches can't be cleanly removed)
34
+ _installed_patches: list[tuple[str, str]] = []
35
+
36
+ # Track SQLAlchemy event listeners for cleanup: (engine, event_name, listener_fn)
37
+ _sqlalchemy_listeners: list[tuple[Any, str, Callable]] = []
38
+
39
+ # pymongo dedup: thread-local depth counter for wrapt wrapper nesting.
40
+ # find_one() internally calls find() — both are wrapped. A depth counter
41
+ # (not boolean) prevents the inner wrapper's finally from unblocking
42
+ # CommandListener prematurely. CommandListener skips when depth > 0.
43
+ _pymongo_wrapt_depth = threading.local()
44
+
45
+
46
+ # pymongo: store command string from started event (keyed by request_id)
47
+ # so succeeded/failed can reuse the same db_statement for consistency.
48
+ # Capped to prevent unbounded growth if succeeded/failed events are missed.
49
+ _pymongo_pending_commands: dict[int, str] = {}
50
+ _PYMONGO_PENDING_MAX = 1000
51
+
52
+
53
+ _span_processor: WorkflowSpanProcessor | None = None
54
+
55
+
56
+ def configure(span_processor: WorkflowSpanProcessor) -> None:
57
+ """Store span_processor reference for span data building.
58
+
59
+ Args:
60
+ span_processor: WorkflowSpanProcessor for governed span tracking
61
+ """
62
+ global _span_processor
63
+ _span_processor = span_processor
64
+ logger.info("DB governance hooks configured")
65
+
66
+
67
+ # ═══════════════════════════════════════════════════════════════════════════════
68
+ # Shared helpers
69
+ # ═══════════════════════════════════════════════════════════════════════════════
70
+
71
+ def _classify_sql(query: Any) -> str:
72
+ """Extract SQL verb from a query string (SELECT, INSERT, UPDATE, etc.)."""
73
+ if not query:
74
+ return "UNKNOWN"
75
+ q = str(query).strip().upper()
76
+ for verb in ("SELECT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP",
77
+ "ALTER", "TRUNCATE", "BEGIN", "COMMIT", "ROLLBACK", "EXPLAIN"):
78
+ if q.startswith(verb):
79
+ return verb
80
+ return "UNKNOWN"
81
+
82
+
83
+ def _generate_span_id() -> str:
84
+ """Generate a random 16-hex-char span ID for pymongo governance spans."""
85
+ import random
86
+ return format(random.getrandbits(64), "016x")
87
+
88
+
89
+ def _build_db_span_data(
90
+ span: Any,
91
+ db_system: str,
92
+ db_name: str | None,
93
+ db_operation: str,
94
+ db_statement: str,
95
+ server_address: str | None,
96
+ server_port: int | None,
97
+ stage: str,
98
+ duration_ms: float | None = None,
99
+ error: str | None = None,
100
+ rowcount: int | None = None,
101
+ gov_span_id: str | None = None,
102
+ ) -> dict:
103
+ """Build span data dict for a DB operation (matches _extract_span_data format).
104
+
105
+ Creates a span data entry with `stage` at root level for OpenBox Core.
106
+ For 'started' stage: end_time=None, duration_ns=None.
107
+ For 'completed' stage: includes duration and result metadata.
108
+
109
+ If gov_span_id is provided, uses it as the span_id and sets the current
110
+ span as parent_span_id (used by pymongo to avoid span_id collisions).
111
+ """
112
+ from . import hook_governance as _hook_gov
113
+
114
+ current_span_id, trace_id_hex, default_parent = _hook_gov.extract_span_context(span)
115
+
116
+ if gov_span_id:
117
+ # pymongo: use generated span_id, current span becomes parent
118
+ span_id_hex = gov_span_id
119
+ parent_span_id = current_span_id
120
+ else:
121
+ # Default: use current span_id, extract parent normally
122
+ span_id_hex = current_span_id
123
+ parent_span_id = default_parent
124
+
125
+ raw_attrs = getattr(span, 'attributes', None)
126
+ attrs = dict(raw_attrs) if raw_attrs and isinstance(raw_attrs, dict) else {}
127
+
128
+ span_name = getattr(span, 'name', None)
129
+ if not span_name or not isinstance(span_name, str):
130
+ span_name = f"{db_operation} {db_system}"
131
+ now_ns = time.time_ns()
132
+
133
+ return {
134
+ "span_id": span_id_hex,
135
+ "trace_id": trace_id_hex,
136
+ "parent_span_id": parent_span_id,
137
+ "name": span_name,
138
+ "kind": "CLIENT",
139
+ "stage": stage,
140
+ "start_time": now_ns,
141
+ "end_time": now_ns if stage == "completed" else None,
142
+ "duration_ns": int(duration_ms * 1_000_000) if duration_ms else None,
143
+ "attributes": attrs,
144
+ "status": {"code": "ERROR" if error else "UNSET", "description": error},
145
+ "events": [],
146
+ # Hook type identification
147
+ "hook_type": "db_query",
148
+ # DB-specific root fields
149
+ "db_system": db_system,
150
+ "db_name": str(db_name) if db_name else None,
151
+ "db_operation": db_operation,
152
+ "db_statement": db_statement,
153
+ "server_address": server_address,
154
+ "server_port": int(server_port) if server_port else None,
155
+ "rowcount": (
156
+ rowcount
157
+ if rowcount is not None and isinstance(rowcount, int) and rowcount >= 0
158
+ else None
159
+ ),
160
+ "error": error,
161
+ }
162
+
163
+
164
+ def _db_identifier(
165
+ db_system: str, server_address: str | None, server_port: int | None, db_name: str | None
166
+ ) -> str:
167
+ """Build a stable identifier string for DB governance evaluations."""
168
+ return f"{db_system}://{server_address or 'unknown'}:{server_port or 0}/{db_name or ''}"
169
+
170
+
171
+ def _evaluate_db_sync(
172
+ identifier: str,
173
+ span_data: dict,
174
+ *,
175
+ is_completed: bool = False,
176
+ ) -> None:
177
+ """Send DB governance evaluation (sync).
178
+
179
+ For started stage: raises GovernanceBlockedError to block.
180
+ For completed stage: swallows errors (query already executed).
181
+ """
182
+ from . import hook_governance as _hook_gov
183
+ if not _hook_gov.is_configured():
184
+ return
185
+ span = otel_trace.get_current_span()
186
+ if is_completed:
187
+ try:
188
+ _hook_gov.evaluate_sync(span, identifier=identifier, span_data=span_data)
189
+ except Exception as e:
190
+ logger.debug(f"DB governance completed evaluation error (non-blocking): {e}")
191
+ else:
192
+ _hook_gov.evaluate_sync(span, identifier=identifier, span_data=span_data)
193
+
194
+
195
+ async def _evaluate_db_async(
196
+ identifier: str,
197
+ span_data: dict,
198
+ *,
199
+ is_completed: bool = False,
200
+ ) -> None:
201
+ """Send DB governance evaluation (async).
202
+
203
+ For started stage: raises GovernanceBlockedError to block.
204
+ For completed stage: swallows errors (query already executed).
205
+ """
206
+ from . import hook_governance as _hook_gov
207
+ if not _hook_gov.is_configured():
208
+ return
209
+ span = otel_trace.get_current_span()
210
+ if is_completed:
211
+ try:
212
+ await _hook_gov.evaluate_async(span, identifier=identifier, span_data=span_data)
213
+ except Exception as e:
214
+ logger.debug(f"DB governance completed evaluation error (non-blocking): {e}")
215
+ else:
216
+ await _hook_gov.evaluate_async(span, identifier=identifier, span_data=span_data)
217
+
218
+
219
+ # ═══════════════════════════════════════════════════════════════════════════════
220
+ # CursorTracer patch — intercepts ALL dbapi query execution
221
+ # ═══════════════════════════════════════════════════════════════════════════════
222
+ #
223
+ # OTel dbapi instrumentors (psycopg2, asyncpg, mysql, pymysql) silently
224
+ # discard request_hook/response_hook kwargs. Instead, we monkey-patch
225
+ # CursorTracer.traced_execution AFTER OTel instruments, injecting governance
226
+ # hooks around the query_method call (which runs inside the OTel span context).
227
+ # ═══════════════════════════════════════════════════════════════════════════════
228
+
229
+ # Saved originals for uninstrumentation
230
+ _orig_traced_execution: Callable | None = None
231
+ _orig_traced_execution_async: Callable | None = None
232
+
233
+
234
+ def install_cursor_tracer_hooks() -> bool:
235
+ """Monkey-patch OTel CursorTracer to inject governance hooks.
236
+
237
+ Must be called AFTER OTel dbapi instrumentors are set up.
238
+ Patches traced_execution and traced_execution_async so governance
239
+ evaluations fire inside the OTel span context.
240
+
241
+ Returns True if patch was applied, False otherwise.
242
+ """
243
+ global _orig_traced_execution, _orig_traced_execution_async
244
+
245
+ try:
246
+ from opentelemetry.instrumentation.dbapi import CursorTracer
247
+ except ImportError:
248
+ logger.debug("OTel dbapi not available for CursorTracer patching")
249
+ return False
250
+
251
+ # Guard against double-patching
252
+ if _orig_traced_execution is not None:
253
+ logger.debug("CursorTracer already patched — skipping")
254
+ return True
255
+
256
+ _orig_traced_execution = CursorTracer.traced_execution
257
+ _orig_traced_execution_async = CursorTracer.traced_execution_async
258
+
259
+ def _gov_traced_execution(self, cursor, query_method, *args, **kwargs):
260
+ """Wrapped traced_execution with governance hooks."""
261
+ db_system = self._db_api_integration.database_system
262
+ db_name = self._db_api_integration.database
263
+ query = args[0] if args else ""
264
+ operation = _classify_sql(query)
265
+ stmt = str(query)[:2000]
266
+ host = self._db_api_integration.connection_props.get("host", "unknown")
267
+ port = self._db_api_integration.connection_props.get("port")
268
+
269
+ def _governed_query(*qargs, **qkwargs):
270
+ # Runs inside OTel span context — get_current_span() returns DB span
271
+ current_span = otel_trace.get_current_span()
272
+ ident = _db_identifier(db_system, host, port, db_name)
273
+
274
+ # Build & send "started" span data entry
275
+ started_sd = _build_db_span_data(
276
+ current_span, db_system, db_name, operation, stmt, host, port, "started",
277
+ )
278
+ _evaluate_db_sync(ident, started_sd)
279
+
280
+ start = time.perf_counter()
281
+ try:
282
+ result = query_method(*qargs, **qkwargs)
283
+ duration_ms = (time.perf_counter() - start) * 1000
284
+
285
+ # Capture rowcount
286
+ rc = None
287
+ try:
288
+ rc = getattr(cursor, "rowcount", -1)
289
+ if rc is None or rc < 0:
290
+ rc = None
291
+ except Exception:
292
+ pass
293
+
294
+ # Build & send "completed" span data entry
295
+ completed_sd = _build_db_span_data(
296
+ current_span, db_system, db_name, operation, stmt, host, port,
297
+ "completed", duration_ms=duration_ms, rowcount=rc,
298
+ )
299
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
300
+ return result
301
+ except Exception as e:
302
+ from openbox_langgraph.errors import GovernanceBlockedError
303
+ if isinstance(e, GovernanceBlockedError):
304
+ raise
305
+ duration_ms = (time.perf_counter() - start) * 1000
306
+ completed_sd = _build_db_span_data(
307
+ current_span, db_system, db_name, operation, stmt, host, port,
308
+ "completed", duration_ms=duration_ms, error=str(e),
309
+ )
310
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
311
+ raise
312
+
313
+ return _orig_traced_execution(self, cursor, _governed_query, *args, **kwargs)
314
+
315
+ async def _gov_traced_execution_async(self, cursor, query_method, *args, **kwargs):
316
+ """Wrapped traced_execution_async with governance hooks."""
317
+ db_system = self._db_api_integration.database_system
318
+ db_name = self._db_api_integration.database
319
+ query = args[0] if args else ""
320
+ operation = _classify_sql(query)
321
+ stmt = str(query)[:2000]
322
+ host = self._db_api_integration.connection_props.get("host", "unknown")
323
+ port = self._db_api_integration.connection_props.get("port")
324
+
325
+ async def _governed_query_async(*qargs, **qkwargs):
326
+ current_span = otel_trace.get_current_span()
327
+ ident = _db_identifier(db_system, host, port, db_name)
328
+
329
+ started_sd = _build_db_span_data(
330
+ current_span, db_system, db_name, operation, stmt, host, port, "started",
331
+ )
332
+ await _evaluate_db_async(ident, started_sd)
333
+
334
+ start = time.perf_counter()
335
+ try:
336
+ result = await query_method(*qargs, **qkwargs)
337
+ duration_ms = (time.perf_counter() - start) * 1000
338
+ rc = None
339
+ try:
340
+ rc = getattr(cursor, "rowcount", -1)
341
+ if rc is None or rc < 0:
342
+ rc = None
343
+ except Exception:
344
+ pass
345
+ completed_sd = _build_db_span_data(
346
+ current_span, db_system, db_name, operation, stmt, host, port,
347
+ "completed", duration_ms=duration_ms, rowcount=rc,
348
+ )
349
+ await _evaluate_db_async(ident, completed_sd, is_completed=True)
350
+ return result
351
+ except Exception as e:
352
+ from openbox_langgraph.errors import GovernanceBlockedError
353
+ if isinstance(e, GovernanceBlockedError):
354
+ raise
355
+ duration_ms = (time.perf_counter() - start) * 1000
356
+ completed_sd = _build_db_span_data(
357
+ current_span, db_system, db_name, operation, stmt, host, port,
358
+ "completed", duration_ms=duration_ms, error=str(e),
359
+ )
360
+ await _evaluate_db_async(ident, completed_sd, is_completed=True)
361
+ raise
362
+
363
+ return await _orig_traced_execution_async(
364
+ self, cursor, _governed_query_async, *args, **kwargs
365
+ )
366
+
367
+ CursorTracer.traced_execution = _gov_traced_execution
368
+ CursorTracer.traced_execution_async = _gov_traced_execution_async
369
+ logger.info("CursorTracer patched with governance hooks (all dbapi libs)")
370
+ return True
371
+
372
+
373
+ def _uninstall_cursor_tracer_hooks() -> None:
374
+ """Restore original CursorTracer methods."""
375
+ global _orig_traced_execution, _orig_traced_execution_async
376
+
377
+ if _orig_traced_execution is None:
378
+ return
379
+
380
+ try:
381
+ from opentelemetry.instrumentation.dbapi import CursorTracer
382
+ CursorTracer.traced_execution = _orig_traced_execution
383
+ CursorTracer.traced_execution_async = _orig_traced_execution_async
384
+ except ImportError:
385
+ pass
386
+
387
+ _orig_traced_execution = None
388
+ _orig_traced_execution_async = None
389
+ logger.debug("CursorTracer governance hooks removed")
390
+
391
+
392
+ # ═══════════════════════════════════════════════════════════════════════════════
393
+ # asyncpg — wrapt wrapper AFTER OTel (asyncpg doesn't use CursorTracer)
394
+ # ═══════════════════════════════════════════════════════════════════════════════
395
+
396
+ _asyncpg_patched = False
397
+
398
+
399
+ def install_asyncpg_hooks() -> bool:
400
+ """Install governance hooks on asyncpg via wrapt wrapping.
401
+
402
+ asyncpg's OTel instrumentor uses its own _do_execute (not CursorTracer),
403
+ so we wrap Connection methods with wrapt AFTER OTel instruments. Our
404
+ wrapper is outermost: governance → OTel → raw asyncpg method.
405
+
406
+ Must be called AFTER AsyncPGInstrumentor().instrument().
407
+ """
408
+ global _asyncpg_patched
409
+ if _asyncpg_patched:
410
+ return True
411
+
412
+ try:
413
+ import asyncpg # noqa: F401 — verify asyncpg is installed
414
+ import wrapt
415
+ except ImportError:
416
+ logger.debug("asyncpg or wrapt not available for governance hooks")
417
+ return False
418
+
419
+ async def _asyncpg_governance_wrapper(wrapped, instance, args, kwargs):
420
+ """Wrapt wrapper for asyncpg Connection methods with governance."""
421
+ query = args[0] if args else ""
422
+ operation = _classify_sql(query)
423
+ stmt = str(query)[:2000]
424
+
425
+ # Extract connection metadata
426
+ params = getattr(instance, "_params", None)
427
+ host = (
428
+ getattr(instance, "_addr", ("unknown",))[0] if hasattr(instance, "_addr") else "unknown"
429
+ )
430
+ port = getattr(instance, "_addr", (None, 5432))[1] if hasattr(instance, "_addr") else 5432
431
+ db_name = getattr(params, "database", None) if params else None
432
+
433
+ current_span = otel_trace.get_current_span()
434
+ ident = _db_identifier("postgresql", host, port, db_name)
435
+
436
+ started_sd = _build_db_span_data(
437
+ current_span, "postgresql", db_name, operation, stmt, host, port, "started",
438
+ )
439
+ await _evaluate_db_async(ident, started_sd)
440
+ start = time.perf_counter()
441
+ try:
442
+ result = await wrapped(*args, **kwargs)
443
+ duration_ms = (time.perf_counter() - start) * 1000
444
+ completed_sd = _build_db_span_data(
445
+ current_span, "postgresql", db_name, operation, stmt, host, port,
446
+ "completed", duration_ms=duration_ms,
447
+ )
448
+ await _evaluate_db_async(ident, completed_sd, is_completed=True)
449
+ return result
450
+ except Exception as e:
451
+ from openbox_langgraph.errors import GovernanceBlockedError
452
+ if isinstance(e, GovernanceBlockedError):
453
+ raise
454
+ duration_ms = (time.perf_counter() - start) * 1000
455
+ completed_sd = _build_db_span_data(
456
+ current_span, "postgresql", db_name, operation, stmt, host, port,
457
+ "completed", duration_ms=duration_ms, error=str(e),
458
+ )
459
+ await _evaluate_db_async(ident, completed_sd, is_completed=True)
460
+ raise
461
+
462
+ methods = [
463
+ ("asyncpg.connection", "Connection.execute"),
464
+ ("asyncpg.connection", "Connection.executemany"),
465
+ ("asyncpg.connection", "Connection.fetch"),
466
+ ("asyncpg.connection", "Connection.fetchval"),
467
+ ("asyncpg.connection", "Connection.fetchrow"),
468
+ ]
469
+ patched = 0
470
+ for module, method in methods:
471
+ try:
472
+ wrapt.wrap_function_wrapper(module, method, _asyncpg_governance_wrapper)
473
+ _installed_patches.append((module, method))
474
+ patched += 1
475
+ except (AttributeError, TypeError, ImportError) as e:
476
+ logger.debug(f"asyncpg governance hook failed for {method}: {e}")
477
+
478
+ if patched > 0:
479
+ _asyncpg_patched = True
480
+ logger.info(f"asyncpg governance hooks installed: {patched}/{len(methods)} methods")
481
+ return True
482
+
483
+ logger.debug("No asyncpg methods patched for governance")
484
+ return False
485
+
486
+
487
+ def _uninstall_asyncpg_hooks() -> None:
488
+ """Remove asyncpg wrapt governance hooks."""
489
+ global _asyncpg_patched
490
+ if not _asyncpg_patched:
491
+ return
492
+ # wrapt patches can't be cleanly unwrapped — clear tracking only
493
+ _asyncpg_patched = False
494
+
495
+
496
+ # ═══════════════════════════════════════════════════════════════════════════════
497
+ # pymongo (CommandListener — reliable monitoring for all pymongo versions)
498
+ # ═══════════════════════════════════════════════════════════════════════════════
499
+
500
+ # Track pymongo listener reference for cleanup
501
+ _pymongo_listener: Any = None
502
+
503
+
504
+ def setup_pymongo_hooks() -> None:
505
+ """Install governance hooks on pymongo via monitoring.CommandListener.
506
+
507
+ Uses pymongo's native monitoring API instead of wrapt wrapping, which is
508
+ more reliable across pymongo versions and C extension boundaries.
509
+
510
+ Note: CommandListener can monitor but cannot block operations (pymongo
511
+ swallows listener exceptions). For blocking support, we also wrap
512
+ Collection methods with wrapt where possible.
513
+
514
+ IMPORTANT: pymongo.monitoring.register() must be called BEFORE creating
515
+ MongoClient instances. Ensure setup_opentelemetry_for_governance() is
516
+ called early in application startup.
517
+ """
518
+ global _pymongo_listener
519
+ try:
520
+ import pymongo.monitoring
521
+
522
+ class _GovernanceCommandListener(pymongo.monitoring.CommandListener):
523
+ """Pymongo CommandListener that sends governance evaluations.
524
+
525
+ Skips operations already governed by wrapt wrappers (dedup).
526
+ When wrapt is active (depth > 0), marks OTel pymongo spans as
527
+ governed so they don't appear as separate entries in the buffer.
528
+ Stores command string from started event so succeeded/failed
529
+ can reuse the same db_statement for consistency.
530
+ """
531
+
532
+ def started(self, event):
533
+ # Skip if wrapt wrapper is already handling this operation
534
+ if getattr(_pymongo_wrapt_depth, 'value', 0) > 0:
535
+ return
536
+ try:
537
+ span = otel_trace.get_current_span()
538
+ host, port = _extract_pymongo_address(event)
539
+ cmd_str = str(event.command)[:2000]
540
+ # Store command string for reuse in succeeded/failed (FIFO eviction)
541
+ if len(_pymongo_pending_commands) >= _PYMONGO_PENDING_MAX:
542
+ # Evict oldest entries instead of clearing all in-flight commands
543
+ keys = list(_pymongo_pending_commands.keys())
544
+ for k in keys[:max(1, _PYMONGO_PENDING_MAX // 10)]:
545
+ _pymongo_pending_commands.pop(k, None)
546
+ _pymongo_pending_commands[event.request_id] = cmd_str
547
+ started_sd = _build_db_span_data(
548
+ span, "mongodb", event.database_name, event.command_name,
549
+ cmd_str, host, port, "started",
550
+ )
551
+ ident = _db_identifier("mongodb", host, port, event.database_name)
552
+ _evaluate_db_sync(ident, started_sd)
553
+ except Exception as e:
554
+ logger.debug(f"pymongo governance started error: {e}")
555
+
556
+ def succeeded(self, event):
557
+ # Skip if wrapt wrapper is already handling this operation
558
+ if getattr(_pymongo_wrapt_depth, 'value', 0) > 0:
559
+ _pymongo_pending_commands.pop(event.request_id, None)
560
+ return
561
+ try:
562
+ span = otel_trace.get_current_span()
563
+ host, port = _extract_pymongo_address(event)
564
+ duration_ms = event.duration_micros / 1000.0
565
+ # Reuse command string from started event for consistency
566
+ cmd_str = _pymongo_pending_commands.pop(event.request_id, event.command_name)
567
+ completed_sd = _build_db_span_data(
568
+ span, "mongodb", event.database_name, event.command_name,
569
+ cmd_str, host, port, "completed", duration_ms=duration_ms,
570
+ )
571
+ ident = _db_identifier("mongodb", host, port, event.database_name)
572
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
573
+ except Exception as e:
574
+ logger.debug(f"pymongo governance completed error: {e}")
575
+
576
+ def failed(self, event):
577
+ # Skip if wrapt wrapper is already handling this operation
578
+ if getattr(_pymongo_wrapt_depth, 'value', 0) > 0:
579
+ _pymongo_pending_commands.pop(event.request_id, None)
580
+ return
581
+ try:
582
+ span = otel_trace.get_current_span()
583
+ host, port = _extract_pymongo_address(event)
584
+ duration_ms = event.duration_micros / 1000.0
585
+ err = str(event.failure)
586
+ # Reuse command string from started event for consistency
587
+ cmd_str = _pymongo_pending_commands.pop(event.request_id, event.command_name)
588
+ completed_sd = _build_db_span_data(
589
+ span, "mongodb", event.database_name, event.command_name,
590
+ cmd_str, host, port, "completed", duration_ms=duration_ms, error=err,
591
+ )
592
+ ident = _db_identifier("mongodb", host, port, event.database_name)
593
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
594
+ except Exception as e:
595
+ logger.debug(f"pymongo governance failed error: {e}")
596
+
597
+ _pymongo_listener = _GovernanceCommandListener()
598
+ pymongo.monitoring.register(_pymongo_listener)
599
+ logger.info("DB governance hooks installed: pymongo (CommandListener)")
600
+ except ImportError:
601
+ logger.debug("pymongo not available for governance hooks")
602
+
603
+ # Also try wrapt wrapping for blocking support (best-effort)
604
+ _setup_pymongo_wrapt_hooks()
605
+
606
+
607
+ def _extract_pymongo_address(event) -> tuple[str, int]:
608
+ """Extract (host, port) from a pymongo monitoring event."""
609
+ try:
610
+ addr = event.connection_id # (host, port) tuple
611
+ if addr and len(addr) >= 2:
612
+ return str(addr[0]), int(addr[1])
613
+ except (AttributeError, TypeError, IndexError):
614
+ pass
615
+ return "unknown", 27017
616
+
617
+
618
+ def _setup_pymongo_wrapt_hooks() -> None:
619
+ """Best-effort wrapt wrapping of pymongo Collection methods for blocking."""
620
+ try:
621
+ import wrapt
622
+
623
+ from openbox_langgraph.errors import GovernanceBlockedError
624
+
625
+ def _collection_wrapper(wrapped, instance, args, kwargs):
626
+ # Increment depth counter — tracks nesting (find_one → find internally).
627
+ # Only the outermost call (depth 0→1) fires governance.
628
+ # Inner calls and CommandListener are suppressed when depth > 0.
629
+ depth = getattr(_pymongo_wrapt_depth, 'value', 0)
630
+ _pymongo_wrapt_depth.value = depth + 1
631
+
632
+ # Nested call — just pass through, outer wrapper handles governance
633
+ if depth > 0:
634
+ try:
635
+ return wrapped(*args, **kwargs)
636
+ finally:
637
+ _pymongo_wrapt_depth.value = getattr(_pymongo_wrapt_depth, 'value', 1) - 1
638
+
639
+ # Outermost call — fire governance
640
+ db_name = instance.database.name
641
+ operation = wrapped.__name__
642
+ try:
643
+ address = instance.database.client.address
644
+ host, port = address[0], address[1]
645
+ except (AttributeError, TypeError):
646
+ host, port = "unknown", 27017
647
+ statement = f"{instance.name}.{operation}"
648
+
649
+ current_span = otel_trace.get_current_span()
650
+
651
+ # Generate unique span_id for this pymongo operation (shared by started+completed)
652
+ gov_sid = _generate_span_id()
653
+
654
+ ident = _db_identifier("mongodb", host, port, db_name)
655
+ started_sd = _build_db_span_data(
656
+ current_span, "mongodb", db_name, operation, statement, host, port,
657
+ "started", gov_span_id=gov_sid,
658
+ )
659
+ _evaluate_db_sync(ident, started_sd)
660
+ start = time.perf_counter()
661
+ try:
662
+ result = wrapped(*args, **kwargs)
663
+ duration_ms = (time.perf_counter() - start) * 1000
664
+ completed_sd = _build_db_span_data(
665
+ current_span, "mongodb", db_name, operation, statement, host, port,
666
+ "completed", duration_ms=duration_ms, gov_span_id=gov_sid,
667
+ )
668
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
669
+ return result
670
+ except GovernanceBlockedError:
671
+ raise
672
+ except Exception as e:
673
+ duration_ms = (time.perf_counter() - start) * 1000
674
+ completed_sd = _build_db_span_data(
675
+ current_span, "mongodb", db_name, operation, statement, host, port,
676
+ "completed", duration_ms=duration_ms, error=str(e), gov_span_id=gov_sid,
677
+ )
678
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
679
+ raise
680
+ finally:
681
+ _pymongo_wrapt_depth.value = getattr(_pymongo_wrapt_depth, 'value', 1) - 1
682
+
683
+ methods = ("find", "find_one", "insert_one", "insert_many",
684
+ "update_one", "update_many", "delete_one", "delete_many",
685
+ "aggregate", "count_documents")
686
+ patched = 0
687
+ for method in methods:
688
+ try:
689
+ wrapt.wrap_function_wrapper(
690
+ "pymongo.collection", f"Collection.{method}", _collection_wrapper
691
+ )
692
+ _installed_patches.append(("pymongo.collection", f"Collection.{method}"))
693
+ patched += 1
694
+ except (AttributeError, TypeError):
695
+ pass
696
+ if patched > 0:
697
+ logger.info(f"pymongo wrapt hooks installed: {patched}/{len(methods)} methods")
698
+ else:
699
+ logger.debug("pymongo Collection wrapt hooks failed (C extension or immutable)")
700
+ except ImportError:
701
+ logger.debug("wrapt not available for pymongo blocking hooks")
702
+
703
+
704
+ # ═══════════════════════════════════════════════════════════════════════════════
705
+ # redis (native OTel hooks — returns callables for RedisInstrumentor)
706
+ # ═══════════════════════════════════════════════════════════════════════════════
707
+
708
+ _redis_span_meta: dict[int, tuple[float, str, str, str, int, str]] = {}
709
+ _REDIS_META_MAX = 1000
710
+
711
+
712
+ def setup_redis_hooks() -> tuple[Callable, Callable]:
713
+ """Return (request_hook, response_hook) for RedisInstrumentor.instrument().
714
+
715
+ request_hook fires at 'started' stage (can raise GovernanceBlockedError).
716
+ response_hook fires at 'completed' stage.
717
+ """
718
+
719
+ def _request_hook(span, instance, args, kwargs):
720
+ """OTel Redis request hook — 'started' stage."""
721
+ command = str(args[0]) if args else "UNKNOWN"
722
+ statement = " ".join(str(a) for a in args) if args else ""
723
+ try:
724
+ conn_kwargs = instance.connection_pool.connection_kwargs
725
+ host = conn_kwargs.get("host", "localhost")
726
+ port = conn_kwargs.get("port", 6379)
727
+ db_name = str(conn_kwargs.get("db", 0))
728
+ except AttributeError:
729
+ host, port, db_name = "localhost", 6379, "0"
730
+
731
+ ident = _db_identifier("redis", host, port, db_name)
732
+ started_sd = _build_db_span_data(
733
+ span, "redis", db_name, command, statement, host, port, "started"
734
+ )
735
+ _evaluate_db_sync(ident, started_sd)
736
+ if len(_redis_span_meta) >= _REDIS_META_MAX:
737
+ keys = list(_redis_span_meta.keys())
738
+ for k in keys[:max(1, _REDIS_META_MAX // 10)]:
739
+ _redis_span_meta.pop(k, None)
740
+ _redis_span_meta[id(span)] = (time.perf_counter(), command, statement, host, port, db_name)
741
+
742
+ def _response_hook(span, instance, response):
743
+ """OTel Redis response hook — 'completed' stage."""
744
+ meta = _redis_span_meta.pop(id(span), None)
745
+ start_time = meta[0] if meta else time.perf_counter()
746
+ command = meta[1] if meta else "UNKNOWN"
747
+ statement = meta[2] if meta else ""
748
+ host = meta[3] if meta and len(meta) > 3 else "localhost"
749
+ port = meta[4] if meta and len(meta) > 4 else 6379
750
+ db_name = meta[5] if meta and len(meta) > 5 else "0"
751
+ duration_ms = (time.perf_counter() - start_time) * 1000
752
+
753
+ ident = _db_identifier("redis", host, port, db_name)
754
+ completed_sd = _build_db_span_data(
755
+ span, "redis", db_name, command, statement, host, port,
756
+ "completed", duration_ms=duration_ms,
757
+ )
758
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
759
+
760
+ return _request_hook, _response_hook
761
+
762
+
763
+ # ═══════════════════════════════════════════════════════════════════════════════
764
+ # sqlalchemy (native SQLAlchemy before/after_cursor_execute events)
765
+ # ═══════════════════════════════════════════════════════════════════════════════
766
+
767
+ # Per-cursor timing for SQLAlchemy (maps (conn_id, cursor_id) → start time)
768
+ _sa_timings: dict[tuple[int, int], float] = {}
769
+ _SA_TIMINGS_MAX = 1000
770
+
771
+
772
+ def _get_sa_db_system(engine) -> str:
773
+ """Extract db_system from SQLAlchemy engine dialect name."""
774
+ dialect = getattr(engine, "dialect", None)
775
+ name = getattr(dialect, "name", "") if dialect else ""
776
+ mapping = {"postgresql": "postgresql", "mysql": "mysql", "sqlite": "sqlite",
777
+ "oracle": "oracle", "mssql": "mssql"}
778
+ return mapping.get(name, name or "unknown")
779
+
780
+
781
+ def setup_sqlalchemy_hooks(engine) -> None:
782
+ """Register SQLAlchemy event listeners for governance on the given engine."""
783
+ try:
784
+ from sqlalchemy import event
785
+ from sqlalchemy.engine import Engine as _SAEngine
786
+ except ImportError:
787
+ logger.debug("sqlalchemy not available for governance hooks")
788
+ return
789
+
790
+ # Only register on real SQLAlchemy Engine instances (not mocks in tests)
791
+ if not isinstance(engine, _SAEngine):
792
+ logger.debug("Skipping SQLAlchemy governance hooks: not a real Engine instance")
793
+ return
794
+
795
+ def _before_execute(conn, cursor, statement, parameters, context, executemany):
796
+ if len(_sa_timings) >= _SA_TIMINGS_MAX:
797
+ _sa_timings.clear()
798
+ _sa_timings[(id(conn), id(cursor))] = time.perf_counter()
799
+ db_system = _get_sa_db_system(conn.engine)
800
+ db_name = conn.engine.url.database
801
+ operation = _classify_sql(statement)
802
+ host = conn.engine.url.host
803
+ port = conn.engine.url.port
804
+
805
+ current_span = otel_trace.get_current_span()
806
+
807
+ ident = _db_identifier(db_system, host, port, db_name)
808
+ started_sd = _build_db_span_data(
809
+ current_span, db_system, db_name, operation, str(statement), host, port, "started",
810
+ )
811
+ _evaluate_db_sync(ident, started_sd)
812
+
813
+ def _after_execute(conn, cursor, statement, parameters, context, executemany):
814
+ start = _sa_timings.pop((id(conn), id(cursor)), None)
815
+ duration_ms = (time.perf_counter() - start) * 1000 if start else 0.0
816
+ db_system = _get_sa_db_system(conn.engine)
817
+ db_name = conn.engine.url.database
818
+ operation = _classify_sql(statement)
819
+ host = conn.engine.url.host
820
+ port = conn.engine.url.port
821
+
822
+ current_span = otel_trace.get_current_span()
823
+ ident = _db_identifier(db_system, host, port, db_name)
824
+ completed_sd = _build_db_span_data(
825
+ current_span, db_system, db_name, operation, str(statement), host, port,
826
+ "completed", duration_ms=duration_ms,
827
+ )
828
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
829
+
830
+ def _on_error(context):
831
+ """Handle DB errors — clean up timing and send completed with error."""
832
+ cursor = getattr(context, "cursor", None)
833
+ conn = getattr(context, "connection", None)
834
+ key = (id(conn), id(cursor)) if conn and cursor else None
835
+ start = _sa_timings.pop(key, None) if key else None
836
+ duration_ms = (time.perf_counter() - start) * 1000 if start else 0.0
837
+ db_system = _get_sa_db_system(context.engine)
838
+ db_name = context.engine.url.database
839
+ statement = str(getattr(context, "statement", "")) if hasattr(context, "statement") else ""
840
+ operation = _classify_sql(statement)
841
+ host = context.engine.url.host
842
+ port = context.engine.url.port
843
+ error_msg = (
844
+ str(context.original_exception)
845
+ if hasattr(context, "original_exception")
846
+ else "Unknown error"
847
+ )
848
+
849
+ current_span = otel_trace.get_current_span()
850
+ ident = _db_identifier(db_system, host, port, db_name)
851
+ completed_sd = _build_db_span_data(
852
+ current_span, db_system, db_name, operation, statement, host, port,
853
+ "completed", duration_ms=duration_ms, error=error_msg,
854
+ )
855
+ _evaluate_db_sync(ident, completed_sd, is_completed=True)
856
+
857
+ try:
858
+ event.listen(engine, "before_cursor_execute", _before_execute)
859
+ event.listen(engine, "after_cursor_execute", _after_execute)
860
+ event.listen(engine, "handle_error", _on_error)
861
+ _sqlalchemy_listeners.append((engine, "before_cursor_execute", _before_execute))
862
+ _sqlalchemy_listeners.append((engine, "after_cursor_execute", _after_execute))
863
+ _sqlalchemy_listeners.append((engine, "handle_error", _on_error))
864
+ logger.info("DB governance hooks installed: sqlalchemy")
865
+ except (AttributeError, Exception) as e:
866
+ # autospec mocks or incomplete Engine objects may fail event registration
867
+ logger.debug(f"Could not register SQLAlchemy governance events: {e}")
868
+
869
+
870
+ # ═══════════════════════════════════════════════════════════════════════════════
871
+ # Cleanup
872
+ # ═══════════════════════════════════════════════════════════════════════════════
873
+
874
+ def uninstrument_all() -> None:
875
+ """Remove all installed DB governance hooks."""
876
+ # Restore original CursorTracer methods
877
+ _uninstall_cursor_tracer_hooks()
878
+ _uninstall_asyncpg_hooks()
879
+
880
+ # Remove SQLAlchemy event listeners
881
+ for engine, event_name, listener_fn in _sqlalchemy_listeners:
882
+ try:
883
+ from sqlalchemy import event
884
+ event.remove(engine, event_name, listener_fn)
885
+ except Exception:
886
+ pass
887
+ _sqlalchemy_listeners.clear()
888
+
889
+ # wrapt patches can't be cleanly removed — clear the list for bookkeeping
890
+ _installed_patches.clear()
891
+
892
+ # Clear timing/tracking dicts
893
+ _sa_timings.clear()
894
+ _pymongo_pending_commands.clear()
895
+ _redis_span_meta.clear()
896
+
897
+ logger.info("DB governance hooks removed")