PyPI - openbox-langgraph-sdk-python - Versions diffs - 0.1.0__py3-none-any.whl - Mend

openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

openbox_langgraph/__init__.py +130 -0
openbox_langgraph/client.py +358 -0
openbox_langgraph/config.py +264 -0
openbox_langgraph/db_governance_hooks.py +897 -0
openbox_langgraph/errors.py +114 -0
openbox_langgraph/file_governance_hooks.py +413 -0
openbox_langgraph/hitl.py +88 -0
openbox_langgraph/hook_governance.py +397 -0
openbox_langgraph/http_governance_hooks.py +695 -0
openbox_langgraph/langgraph_handler.py +1616 -0
openbox_langgraph/otel_setup.py +468 -0
openbox_langgraph/span_processor.py +253 -0
openbox_langgraph/tracing.py +352 -0
openbox_langgraph/types.py +485 -0
openbox_langgraph/verdict_handler.py +203 -0
openbox_langgraph_sdk_python-0.1.0.dist-info/METADATA +492 -0
openbox_langgraph_sdk_python-0.1.0.dist-info/RECORD +18 -0
openbox_langgraph_sdk_python-0.1.0.dist-info/WHEEL +4 -0

openbox_langgraph/span_processor.py ADDED Viewed

@@ -0,0 +1,253 @@
+# openbox/span_processor.py
+"""
+OpenTelemetry SpanProcessor for workflow governance.
+WorkflowSpanProcessor manages activity context, trace mappings, and governance
+state (verdicts, abort/halt flags) for hook-level governance. Forwards spans
+to fallback exporters (Jaeger, OTLP, etc.) without buffering.
+"""
+import logging
+import threading
+from typing import TYPE_CHECKING, Optional
+from .types import Verdict, WorkflowSpanBuffer
+if TYPE_CHECKING:
+    from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
+_logger = logging.getLogger(__name__)
+class WorkflowSpanProcessor:
+    """
+    SpanProcessor that manages governance state and forwards spans to exporters.
+    Responsibilities:
+    - Activity context storage (for hook-level governance payload building)
+    - Trace → workflow/activity ID resolution (for hook → activity linkage)
+    - Workflow buffer management (verdicts, approvals, abort/halt flags)
+    - Span forwarding to fallback exporter (Jaeger, OTLP, etc.)
+    Thread-safe via _lock for all shared state.
+    """
+    def __init__(
+        self,
+        fallback_processor: Optional["SpanProcessor"] = None,
+        ignored_url_prefixes: list | None = None,
+    ):
+        self.fallback = fallback_processor
+        self._ignored_url_prefixes = set(ignored_url_prefixes or [])
+        self._buffers: dict[str, WorkflowSpanBuffer] = {}  # workflow_id -> buffer
+        self._trace_to_workflow: dict[int, str] = {}  # trace_id (int) -> workflow_id
+        self._trace_to_activity: dict[int, str] = {}  # trace_id (int) -> activity_id
+        self._verdicts: dict[str, dict] = {}  # workflow_id -> {"verdict": Verdict, "reason": str}
+        self._activity_context: dict[str, dict] = {}  # "{workflow_id}:{activity_id}" -> event data
+        # "{workflow_id}:{activity_id}" -> abort reason
+        self._aborted_activities: dict[str, str] = {}
+        self._halt_requests: dict[str, str] = {}  # "{workflow_id}:{activity_id}" -> halt reason
+        self._sync_mode: bool = False  # Set by middleware when using sync invoke()
+        self._last_activity_key: str | None = None  # Most recently set activity context key
+        self._lock = threading.Lock()
+    def _should_ignore_span(self, span: "ReadableSpan") -> bool:
+        """Check if span should be ignored based on URL."""
+        if not self._ignored_url_prefixes:
+            return False
+        url = span.attributes.get("http.url") if span.attributes else None
+        if url:
+            for prefix in self._ignored_url_prefixes:
+                if url.startswith(prefix):
+                    return True
+        return False
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Workflow Buffer Management
+    # ═══════════════════════════════════════════════════════════════════════════
+    def register_workflow(self, workflow_id: str, buffer: WorkflowSpanBuffer) -> None:
+        """Register buffer for a workflow."""
+        with self._lock:
+            self._buffers[workflow_id] = buffer
+    def register_trace(
+        self, trace_id: int, workflow_id: str, activity_id: str | None = None
+    ) -> None:
+        """Register trace_id → workflow_id (and activity_id) mapping for hook lookups."""
+        with self._lock:
+            self._trace_to_workflow[trace_id] = workflow_id
+            if activity_id:
+                self._trace_to_activity[trace_id] = activity_id
+    def get_buffer(self, workflow_id: str) -> WorkflowSpanBuffer | None:
+        """Retrieve buffer without removing it."""
+        with self._lock:
+            return self._buffers.get(workflow_id)
+    def remove_buffer(self, workflow_id: str) -> WorkflowSpanBuffer | None:
+        """Remove and return buffer."""
+        with self._lock:
+            return self._buffers.pop(workflow_id, None)
+    def unregister_workflow(self, workflow_id: str) -> None:
+        """Clean all state associated with a workflow to prevent memory leaks."""
+        with self._lock:
+            self._buffers.pop(workflow_id, None)
+            self._verdicts.pop(workflow_id, None)
+            for store in (self._aborted_activities, self._halt_requests, self._activity_context):
+                stale = [k for k in store if k.startswith(f"{workflow_id}:")]
+                for k in stale:
+                    del store[k]
+            stale_traces = [t for t, w in self._trace_to_workflow.items() if w == workflow_id]
+            for t in stale_traces:
+                del self._trace_to_workflow[t]
+                self._trace_to_activity.pop(t, None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Verdict Storage (workflow interceptor → activity interceptor)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def set_verdict(
+        self,
+        workflow_id: str,
+        verdict: Verdict,
+        reason: str | None = None,
+        run_id: str | None = None,
+    ) -> None:
+        """Store governance verdict. Called when SignalReceived returns BLOCK/HALT."""
+        with self._lock:
+            self._verdicts[workflow_id] = {"verdict": verdict, "reason": reason, "run_id": run_id}
+            if workflow_id in self._buffers:
+                self._buffers[workflow_id].verdict = verdict
+                self._buffers[workflow_id].verdict_reason = reason
+    def get_verdict(self, workflow_id: str) -> dict | None:
+        """Get stored verdict for a workflow."""
+        with self._lock:
+            return self._verdicts.get(workflow_id)
+    def clear_verdict(self, workflow_id: str) -> None:
+        """Clear stored verdict for a workflow."""
+        with self._lock:
+            self._verdicts.pop(workflow_id, None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Activity Context Storage (for hook-level governance)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def set_sync_mode(self, enabled: bool) -> None:
+        """Enable/disable sync mode for fallback activity context resolution."""
+        with self._lock:
+            self._sync_mode = enabled
+    def set_activity_context(self, workflow_id: str, activity_id: str, context: dict) -> None:
+        """Store ActivityStarted event data for hook-level governance payload building."""
+        with self._lock:
+            key = f"{workflow_id}:{activity_id}"
+            self._activity_context[key] = context
+            self._last_activity_key = key
+    def get_activity_context_by_trace(self, trace_id: int) -> dict | None:
+        """Look up activity context using trace_id from a child span (hook → activity linkage).
+        LangGraph adaptation: when trace_id lookup fails (asyncio.Task spawns new
+        OTel trace contexts), falls back to the most recently set activity context.
+        Fallback strategies (in order):
+        1. Primary: trace_id → workflow_id/activity_id → context (works in async mode)
+        2. Single-activity: exactly one active context → return it (async fallback)
+        3. Sync mode: return most recently set context (sync mode only — sequential execution)
+        """
+        with self._lock:
+            workflow_id = self._trace_to_workflow.get(trace_id)
+            activity_id = self._trace_to_activity.get(trace_id)
+            if workflow_id and activity_id:
+                ctx = self._activity_context.get(f"{workflow_id}:{activity_id}")
+                if ctx:
+                    return ctx
+            # Fallback: LangGraph spawns asyncio.Tasks with new trace contexts
+            if len(self._activity_context) == 1:
+                last_key = list(self._activity_context.keys())[-1]
+                return self._activity_context[last_key]
+            # Sync mode fallback: trace_id fragments across thread pool boundary.
+            # Safe because sync execution is sequential — one activity at a time.
+            if self._sync_mode and self._last_activity_key:
+                return self._activity_context.get(self._last_activity_key)
+            return None
+    def clear_activity_context(self, workflow_id: str, activity_id: str) -> None:
+        """Clear buffered activity context after activity completes."""
+        with self._lock:
+            self._activity_context.pop(f"{workflow_id}:{activity_id}", None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Activity Abort Signal (block subsequent hooks after BLOCK/HALT/REQUIRE_APPROVAL)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def set_activity_abort(self, workflow_id: str, activity_id: str, reason: str) -> None:
+        """Set abort flag for an activity. Subsequent hooks will raise immediately."""
+        with self._lock:
+            self._aborted_activities[f"{workflow_id}:{activity_id}"] = reason
+    def get_activity_abort(self, workflow_id: str, activity_id: str) -> str | None:
+        """Check if activity is aborted. Returns reason string or None."""
+        with self._lock:
+            return self._aborted_activities.get(f"{workflow_id}:{activity_id}")
+    def clear_activity_abort(self, workflow_id: str, activity_id: str) -> None:
+        """Clear abort flag for an activity (on retry or completion)."""
+        with self._lock:
+            self._aborted_activities.pop(f"{workflow_id}:{activity_id}", None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Halt Request (hook → activity interceptor for HALT verdict)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def set_halt_requested(self, workflow_id: str, activity_id: str, reason: str) -> None:
+        """Hook sets this when HALT verdict received. Activity interceptor calls terminate()."""
+        with self._lock:
+            self._halt_requests[f"{workflow_id}:{activity_id}"] = reason
+    def get_halt_requested(self, workflow_id: str, activity_id: str) -> str | None:
+        """Check if HALT was requested by a hook. Returns reason or None."""
+        with self._lock:
+            return self._halt_requests.get(f"{workflow_id}:{activity_id}")
+    def clear_halt_requested(self, workflow_id: str, activity_id: str) -> None:
+        """Clear halt request flag."""
+        with self._lock:
+            self._halt_requests.pop(f"{workflow_id}:{activity_id}", None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # SpanProcessor Interface
+    # ═══════════════════════════════════════════════════════════════════════════
+    def on_start(self, span, parent_context=None) -> None:
+        """Called when span starts. No-op."""
+    def _on_ending(self, span) -> None:
+        """Called when span is ending (before on_end). Required by newer OTel SDK."""
+    def on_end(self, span: "ReadableSpan") -> None:
+        """Called when span ends. Forward to fallback exporter only."""
+        if self._should_ignore_span(span):
+            if self.fallback:
+                self.fallback.on_end(span)
+            return
+        if self.fallback:
+            self.fallback.on_end(span)
+    def shutdown(self) -> None:
+        """Shutdown the processor."""
+        if self.fallback:
+            self.fallback.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """Force flush any buffered spans."""
+        if self.fallback:
+            return self.fallback.force_flush(timeout_millis)
+        return True

openbox_langgraph/tracing.py ADDED Viewed

@@ -0,0 +1,352 @@
+# openbox/tracing.py
+"""
+OpenBox Tracing Decorators for capturing internal function calls.
+Use the @traced decorator to capture function calls as OpenTelemetry spans.
+These spans will be automatically captured by WorkflowSpanProcessor and
+included in governance events.
+Usage:
+    from openbox.tracing import traced
+    @traced
+    def my_function(arg1, arg2):
+        return do_something(arg1, arg2)
+    @traced(name="custom-span-name", capture_args=True, capture_result=True)
+    async def my_async_function(data):
+        return await process(data)
+"""
+import json
+import logging
+import time as _time
+from collections.abc import Callable
+from functools import wraps
+from typing import Any, TypeVar
+from opentelemetry import trace
+from . import hook_governance as _hook_gov
+logger = logging.getLogger(__name__)
+def _build_traced_span_data(
+    span, func_name: str, module: str, stage: str,
+    error: str | None = None, duration_ms: float | None = None,
+    args: Any = None, result: Any = None,
+) -> dict:
+    """Build span data dict for a @traced function call.
+    attributes: OTel-original only. All custom data at root level.
+    """
+    span_id_hex, trace_id_hex, parent_span_id = _hook_gov.extract_span_context(span)
+    raw_attrs = getattr(span, 'attributes', None)
+    attrs = dict(raw_attrs) if raw_attrs and isinstance(raw_attrs, dict) else {}
+    now_ns = _time.time_ns()
+    duration_ns = int(duration_ms * 1_000_000) if duration_ms else None
+    end_time = now_ns if stage == "completed" else None
+    start_time = (now_ns - duration_ns) if duration_ns else now_ns
+    return {
+        "span_id": span_id_hex,
+        "trace_id": trace_id_hex,
+        "parent_span_id": parent_span_id,
+        "name": getattr(span, 'name', None) or func_name,
+        "kind": "INTERNAL",
+        "stage": stage,
+        "start_time": start_time,
+        "end_time": end_time,
+        "duration_ns": duration_ns,
+        "attributes": attrs,
+        "status": {"code": "ERROR" if error else "UNSET", "description": error},
+        "events": [],
+        # Hook type identification
+        "hook_type": "function_call",
+        # Function-specific root fields
+        "function": func_name,
+        "module": module,
+        "args": args,
+        "result": result,
+        "error": error,
+    }
+# Get tracer for internal function tracing
+_tracer: trace.Tracer | None = None
+def _get_tracer() -> trace.Tracer:
+    """Lazy tracer initialization."""
+    global _tracer
+    if _tracer is None:
+        _tracer = trace.get_tracer("openbox.traced")
+    return _tracer
+def _safe_serialize(value: Any, max_length: int = 2000) -> str:
+    """Safely serialize a value to string for span attributes."""
+    try:
+        if value is None:
+            return "null"
+        if isinstance(value, (str, int, float, bool)):
+            result = str(value)
+        elif isinstance(value, (list, dict)):
+            result = json.dumps(value, default=str)
+        else:
+            result = str(value)
+        # Truncate if too long
+        if len(result) > max_length:
+            return result[:max_length] + "...[truncated]"
+        return result
+    except Exception:
+        return "<unserializable>"
+F = TypeVar("F", bound=Callable[..., Any])
+def traced(
+    _func: F | None = None,
+    *,
+    name: str | None = None,
+    capture_args: bool = True,
+    capture_result: bool = True,
+    capture_exception: bool = True,
+    max_arg_length: int = 2000,
+) -> F | Callable[[F], F]:
+    """
+    Decorator to trace function calls as OpenTelemetry spans.
+    The spans will be captured by WorkflowSpanProcessor and included
+    in ActivityCompleted governance events.
+    Args:
+        name: Custom span name. Defaults to function name.
+        capture_args: Capture function arguments as span attributes.
+        capture_result: Capture return value as span attribute.
+        capture_exception: Capture exception details on error.
+        max_arg_length: Maximum length for serialized arguments.
+    Examples:
+        # Basic usage
+        @traced
+        def process_data(input_data):
+            return transform(input_data)
+        # With options
+        @traced(name="data-processing", capture_result=False)
+        def process_sensitive_data(data):
+            return handle(data)
+        # Async functions
+        @traced
+        async def fetch_data(url):
+            return await http_get(url)
+    """
+    def decorator(func: F) -> F:
+        span_name = name or func.__name__
+        is_async = _is_async_function(func)
+        if is_async:
+            @wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                tracer = _get_tracer()
+                with tracer.start_as_current_span(span_name) as span:
+                    # Set function metadata
+                    span.set_attribute("code.function", func.__name__)
+                    span.set_attribute("code.namespace", func.__module__)
+                    # Capture arguments
+                    if capture_args:
+                        _set_args_attributes(span, args, kwargs, max_arg_length)
+                    # Governance: started stage
+                    if _hook_gov.is_configured():
+                        _args_data = (
+                            _safe_serialize({"args": args, "kwargs": kwargs}, max_arg_length)
+                            if capture_args else None
+                        )
+                        started_sd = _build_traced_span_data(
+                            span, func.__name__, func.__module__, "started", args=_args_data
+                        )
+                        await _hook_gov.evaluate_async(
+                            span, identifier=func.__name__, span_data=started_sd
+                        )
+                    _start = _time.perf_counter()
+                    try:
+                        result = await func(*args, **kwargs)
+                        _dur_ms = (_time.perf_counter() - _start) * 1000
+                        # Capture result
+                        if capture_result:
+                            span.set_attribute(
+                                "function.result", _safe_serialize(result, max_arg_length)
+                            )
+                        # Governance: completed stage
+                        if _hook_gov.is_configured():
+                            _result_data = (
+                                _safe_serialize(result, max_arg_length) if capture_result else None
+                            )
+                            completed_sd = _build_traced_span_data(
+                                span, func.__name__, func.__module__, "completed",
+                                duration_ms=_dur_ms, result=_result_data,
+                            )
+                            await _hook_gov.evaluate_async(
+                                span, identifier=func.__name__, span_data=completed_sd
+                            )
+                        return result
+                    except Exception as e:
+                        if capture_exception:
+                            span.set_attribute("error", True)
+                            span.set_attribute("error.type", type(e).__name__)
+                            span.set_attribute("error.message", str(e))
+                        # Governance: completed stage with error
+                        if _hook_gov.is_configured():
+                            error_sd = _build_traced_span_data(
+                                span, func.__name__, func.__module__, "completed", error=str(e)
+                            )
+                            await _hook_gov.evaluate_async(
+                                span, identifier=func.__name__, span_data=error_sd
+                            )
+                        raise
+            return async_wrapper  # type: ignore
+        else:
+            @wraps(func)
+            def sync_wrapper(*args, **kwargs):
+                tracer = _get_tracer()
+                with tracer.start_as_current_span(span_name) as span:
+                    # Set function metadata
+                    span.set_attribute("code.function", func.__name__)
+                    span.set_attribute("code.namespace", func.__module__)
+                    # Capture arguments
+                    if capture_args:
+                        _set_args_attributes(span, args, kwargs, max_arg_length)
+                    # Governance: started stage
+                    if _hook_gov.is_configured():
+                        _args_data = (
+                            _safe_serialize({"args": args, "kwargs": kwargs}, max_arg_length)
+                            if capture_args else None
+                        )
+                        started_sd = _build_traced_span_data(
+                            span, func.__name__, func.__module__, "started", args=_args_data
+                        )
+                        _hook_gov.evaluate_sync(
+                            span, identifier=func.__name__, span_data=started_sd
+                        )
+                    _start = _time.perf_counter()
+                    try:
+                        result = func(*args, **kwargs)
+                        _dur_ms = (_time.perf_counter() - _start) * 1000
+                        # Capture result
+                        if capture_result:
+                            span.set_attribute(
+                                "function.result", _safe_serialize(result, max_arg_length)
+                            )
+                        # Governance: completed stage
+                        if _hook_gov.is_configured():
+                            _result_data = (
+                                _safe_serialize(result, max_arg_length) if capture_result else None
+                            )
+                            completed_sd = _build_traced_span_data(
+                                span, func.__name__, func.__module__, "completed",
+                                duration_ms=_dur_ms, result=_result_data,
+                            )
+                            _hook_gov.evaluate_sync(
+                                span, identifier=func.__name__, span_data=completed_sd
+                            )
+                        return result
+                    except Exception as e:
+                        if capture_exception:
+                            span.set_attribute("error", True)
+                            span.set_attribute("error.type", type(e).__name__)
+                            span.set_attribute("error.message", str(e))
+                        # Governance: completed stage with error
+                        if _hook_gov.is_configured():
+                            error_sd = _build_traced_span_data(
+                                span, func.__name__, func.__module__, "completed", error=str(e)
+                            )
+                            _hook_gov.evaluate_sync(
+                                span, identifier=func.__name__, span_data=error_sd
+                            )
+                        raise
+            return sync_wrapper  # type: ignore
+    # Handle both @traced and @traced() syntax
+    if _func is not None:
+        return decorator(_func)
+    return decorator
+def _is_async_function(func: Callable) -> bool:
+    """Check if function is async."""
+    import asyncio
+    return asyncio.iscoroutinefunction(func)
+def _set_args_attributes(
+    span: trace.Span, args: tuple, kwargs: dict, max_length: int
+) -> None:
+    """Set function arguments as span attributes."""
+    if args:
+        for i, arg in enumerate(args):
+            span.set_attribute(f"function.arg.{i}", _safe_serialize(arg, max_length))
+    if kwargs:
+        for key, value in kwargs.items():
+            span.set_attribute(f"function.kwarg.{key}", _safe_serialize(value, max_length))
+# Convenience function to create a span context manager
+def create_span(
+    name: str,
+    attributes: dict | None = None,
+) -> trace.Span:
+    """
+    Create a span context manager for manual tracing.
+    Usage:
+        from openbox.tracing import create_span
+        with create_span("my-operation", {"input": data}) as span:
+            result = do_something()
+            span.set_attribute("output", result)
+    Args:
+        name: Span name
+        attributes: Initial attributes to set on the span
+    Returns:
+        Span context manager
+    """
+    tracer = _get_tracer()
+    span = tracer.start_span(name)
+    if attributes:
+        for key, value in attributes.items():
+            span.set_attribute(key, _safe_serialize(value))
+    return span