PyPI - openbox-temporal-sdk-python - Versions diffs - 1.0.0__py3-none-any.whl - Mend

openbox-temporal-sdk-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

openbox/__init__.py +107 -0
openbox/activities.py +163 -0
openbox/activity_interceptor.py +755 -0
openbox/config.py +274 -0
openbox/otel_setup.py +969 -0
openbox/py.typed +0 -0
openbox/span_processor.py +361 -0
openbox/tracing.py +228 -0
openbox/types.py +166 -0
openbox/worker.py +257 -0
openbox/workflow_interceptor.py +264 -0
openbox_temporal_sdk_python-1.0.0.dist-info/METADATA +1214 -0
openbox_temporal_sdk_python-1.0.0.dist-info/RECORD +15 -0
openbox_temporal_sdk_python-1.0.0.dist-info/WHEEL +4 -0
openbox_temporal_sdk_python-1.0.0.dist-info/licenses/LICENSE +21 -0

openbox/py.typed ADDED Viewed

File without changes

openbox/span_processor.py ADDED Viewed

@@ -0,0 +1,361 @@
+# openbox/span_processor.py
+"""
+OpenTelemetry SpanProcessor for workflow-boundary governance.
+WorkflowSpanProcessor buffers spans per-workflow for batch submission
+to OpenBox Core. Bodies are stored separately via store_body() and merged
+on span end - this keeps bodies OUT of OTel spans but IN the OpenBox buffer.
+"""
+from typing import TYPE_CHECKING, Dict, Optional
+import threading
+import logging
+# Logger for debugging HITL flow (outside workflow sandbox)
+_logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
+from .types import WorkflowSpanBuffer, Verdict
+class WorkflowSpanProcessor:
+    """
+    SpanProcessor that buffers spans per-workflow for batch submission.
+    Bodies are stored separately via store_body() and merged on span end.
+    This keeps bodies OUT of OTel spans but IN the OpenBox buffer.
+    Thread-safe: Uses workflow_id from span attributes as key, with trace_id
+    as fallback for child spans (like HTTP spans) that don't have workflow_id.
+    Usage:
+        processor = WorkflowSpanProcessor(fallback_processor=batch_processor)
+        # Register buffer before workflow starts
+        processor.register_workflow(workflow_id, buffer)
+        # Spans with temporal.workflow_id attribute are buffered
+        # Child spans (same trace_id) are also buffered via trace_id mapping
+        # Bodies stored via store_body() are merged on span end
+        # Get buffer after workflow completes, spans are in buffer.spans
+        buffer = processor.get_buffer(workflow_id)
+        spans = buffer.spans  # List of span dicts
+    """
+    def __init__(
+        self,
+        fallback_processor: Optional["SpanProcessor"] = None,
+        ignored_url_prefixes: Optional[list] = None,
+    ):
+        """
+        Initialize the span processor.
+        Args:
+            fallback_processor: Optional processor to forward spans to (e.g., Jaeger exporter).
+                               Spans are forwarded WITHOUT body data for privacy.
+            ignored_url_prefixes: List of URL prefixes to ignore (e.g., OpenBox Core API)
+        """
+        self.fallback = fallback_processor
+        self._ignored_url_prefixes = set(ignored_url_prefixes or [])
+        self._buffers: Dict[str, WorkflowSpanBuffer] = {}  # workflow_id -> buffer
+        self._trace_to_workflow: Dict[int, str] = {}  # trace_id (int) -> workflow_id
+        self._trace_to_activity: Dict[int, str] = {}  # trace_id (int) -> activity_id
+        self._body_data: Dict[int, dict] = {}  # span_id (int) -> {request_body, response_body}
+        self._verdicts: Dict[str, dict] = {}  # workflow_id -> {"verdict": Verdict, "reason": str}
+        self._lock = threading.Lock()
+    def _should_ignore_span(self, span: "ReadableSpan") -> bool:
+        """Check if span should be ignored based on URL."""
+        if not self._ignored_url_prefixes:
+            return False
+        # Check http.url attribute
+        url = span.attributes.get("http.url") if span.attributes else None
+        if url:
+            for prefix in self._ignored_url_prefixes:
+                if url.startswith(prefix):
+                    return True
+        return False
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Workflow Buffer Management (called by GovernanceWorkflowInterceptor)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def register_workflow(self, workflow_id: str, buffer: WorkflowSpanBuffer) -> None:
+        """
+        Register buffer for a workflow.
+        Called by ActivityGovernanceInterceptor when first activity starts.
+        Args:
+            workflow_id: Temporal workflow ID
+            buffer: Buffer to collect spans for this workflow
+        """
+        with self._lock:
+            self._buffers[workflow_id] = buffer
+    def register_trace(self, trace_id: int, workflow_id: str, activity_id: str = None) -> None:
+        """
+        Register trace_id to workflow_id (and optionally activity_id) mapping.
+        Called when creating an activity span to enable child span buffering.
+        Child spans (like HTTP calls) don't have temporal.workflow_id attribute,
+        but share the same trace_id with the parent activity span.
+        Args:
+            trace_id: OTel trace ID (integer form)
+            workflow_id: Temporal workflow ID
+            activity_id: Temporal activity ID (optional, for filtering)
+        """
+        with self._lock:
+            self._trace_to_workflow[trace_id] = workflow_id
+            if activity_id:
+                self._trace_to_activity[trace_id] = activity_id
+    def get_buffer(self, workflow_id: str) -> Optional[WorkflowSpanBuffer]:
+        """
+        Retrieve buffer without removing it.
+        Args:
+            workflow_id: Temporal workflow ID
+        Returns:
+            Buffer if found, None otherwise
+        """
+        with self._lock:
+            return self._buffers.get(workflow_id)
+    def remove_buffer(self, workflow_id: str) -> Optional[WorkflowSpanBuffer]:
+        """
+        Remove and return buffer.
+        Called by GovernanceWorkflowInterceptor after submission.
+        Args:
+            workflow_id: Temporal workflow ID
+        Returns:
+            Buffer if found, None otherwise
+        """
+        with self._lock:
+            return self._buffers.pop(workflow_id, None)
+    def unregister_workflow(self, workflow_id: str) -> None:
+        """
+        Remove buffer for a workflow (alias for remove_buffer).
+        Called when clearing stale buffers from previous workflow runs.
+        Args:
+            workflow_id: Temporal workflow ID
+        """
+        with self._lock:
+            self._buffers.pop(workflow_id, None)
+            self._verdicts.pop(workflow_id, None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Verdict Storage (called by workflow interceptor for SignalReceived stop)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def set_verdict(self, workflow_id: str, verdict: Verdict, reason: str = None, run_id: str = None) -> None:
+        """Store governance verdict for a workflow. Called when SignalReceived returns BLOCK/HALT."""
+        with self._lock:
+            self._verdicts[workflow_id] = {"verdict": verdict, "reason": reason, "run_id": run_id}
+            if workflow_id in self._buffers:
+                self._buffers[workflow_id].verdict = verdict
+                self._buffers[workflow_id].verdict_reason = reason
+    def get_verdict(self, workflow_id: str) -> Optional[dict]:
+        """Get stored verdict for a workflow. Returns dict with 'verdict' (Verdict) and 'reason' keys."""
+        with self._lock:
+            return self._verdicts.get(workflow_id)
+    def clear_verdict(self, workflow_id: str) -> None:
+        """Clear stored verdict for a workflow."""
+        with self._lock:
+            self._verdicts.pop(workflow_id, None)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # Body Storage (called by HTTP hooks in otel_setup.py)
+    # ═══════════════════════════════════════════════════════════════════════════
+    def store_body(
+        self,
+        span_id: int,
+        request_body: Optional[str] = None,
+        response_body: Optional[str] = None,
+        request_headers: Optional[dict] = None,
+        response_headers: Optional[dict] = None,
+    ) -> None:
+        """
+        Store body and header data for a span (called from HTTP hooks).
+        Bodies and headers are stored here, NOT in OTel span attributes.
+        They will be merged with span data in on_end().
+        Args:
+            span_id: OTel span ID (integer form)
+            request_body: HTTP request body (if available)
+            response_body: HTTP response body (if available)
+            request_headers: HTTP request headers (if available)
+            response_headers: HTTP response headers (if available)
+        """
+        with self._lock:
+            if span_id not in self._body_data:
+                self._body_data[span_id] = {}
+            if request_body is not None:
+                self._body_data[span_id]["request_body"] = request_body
+            if response_body is not None:
+                self._body_data[span_id]["response_body"] = response_body
+            if request_headers is not None:
+                self._body_data[span_id]["request_headers"] = request_headers
+            if response_headers is not None:
+                self._body_data[span_id]["response_headers"] = response_headers
+    def get_pending_body(self, span_id: int) -> Optional[dict]:
+        """
+        Get pending body data for a span (not yet merged).
+        Used by activity interceptor to propagate body data to child spans
+        before the activity span has ended (and on_end merged the data).
+        Args:
+            span_id: OTel span ID (integer form)
+        Returns:
+            Dict with request_body and/or response_body, or None
+        """
+        with self._lock:
+            return self._body_data.get(span_id)
+    # ═══════════════════════════════════════════════════════════════════════════
+    # SpanProcessor Interface
+    # ═══════════════════════════════════════════════════════════════════════════
+    def on_start(self, span, parent_context=None) -> None:
+        """Called when span starts. No-op for this processor."""
+        pass
+    def on_end(self, span: "ReadableSpan") -> None:
+        """
+        Called when span ends. Buffer by workflow_id.
+        Spans with temporal.workflow_id attribute are buffered directly.
+        Child spans (like HTTP calls) without workflow_id are buffered via
+        trace_id -> workflow_id mapping.
+        Body data stored via store_body() is merged here.
+        """
+        # Skip spans to ignored URLs (e.g., OpenBox Core API)
+        if self._should_ignore_span(span):
+            if self.fallback:
+                self.fallback.on_end(span)
+            return
+        # Get workflow_id from span attributes (direct)
+        workflow_id = span.attributes.get("temporal.workflow_id") if span.attributes else None
+        activity_id = span.attributes.get("temporal.activity_id") if span.attributes else None
+        # Fallback: look up by trace_id (for child spans like HTTP calls)
+        if not workflow_id:
+            with self._lock:
+                workflow_id = self._trace_to_workflow.get(span.context.trace_id)
+                # Also get activity_id from trace mapping for child spans
+                if not activity_id:
+                    activity_id = self._trace_to_activity.get(span.context.trace_id)
+        if workflow_id:
+            with self._lock:
+                buffer = self._buffers.get(workflow_id)
+            if buffer:
+                span_data = self._extract_span_data(span)
+                # Set activity_id for filtering later
+                if activity_id:
+                    span_data["activity_id"] = activity_id
+                # Merge body data (stored separately, NOT in OTel span)
+                span_id = span.context.span_id
+                with self._lock:
+                    if span_id in self._body_data:
+                        body_data = self._body_data.pop(span_id)
+                        span_data.update(body_data)
+                buffer.spans.append(span_data)
+        # Always forward to fallback (OTel exporter) - WITHOUT body
+        if self.fallback:
+            self.fallback.on_end(span)
+    def _extract_span_data(self, span: "ReadableSpan") -> dict:
+        """
+        Extract span data for OpenBox API.
+        Args:
+            span: OTel ReadableSpan
+        Returns:
+            Dictionary matching SpanData structure
+        """
+        # Format span_id and trace_id as hex strings
+        span_id_hex = format(span.context.span_id, "016x")
+        trace_id_hex = format(span.context.trace_id, "032x")
+        # Format parent span ID if present
+        parent_span_id = None
+        if span.parent and span.parent.span_id:
+            parent_span_id = format(span.parent.span_id, "016x")
+        # Extract status
+        status = None
+        if span.status:
+            status = {
+                "code": span.status.status_code.name if span.status.status_code else "UNSET",
+                "description": span.status.description,
+            }
+        # Extract events
+        events = []
+        if span.events:
+            for event in span.events:
+                events.append(
+                    {
+                        "name": event.name,
+                        "timestamp": event.timestamp,
+                        "attributes": dict(event.attributes) if event.attributes else {},
+                    }
+                )
+        # Calculate duration
+        duration_ns = None
+        if span.end_time and span.start_time:
+            duration_ns = span.end_time - span.start_time
+        return {
+            "span_id": span_id_hex,
+            "trace_id": trace_id_hex,
+            "parent_span_id": parent_span_id,
+            "name": span.name,
+            "kind": span.kind.name if span.kind else None,
+            "start_time": span.start_time,
+            "end_time": span.end_time,
+            "duration_ns": duration_ns,
+            "attributes": dict(span.attributes) if span.attributes else {},
+            "status": status,
+            "events": events,
+            # request_body and response_body will be merged from _body_data
+        }
+    def shutdown(self) -> None:
+        """Shutdown the processor."""
+        if self.fallback:
+            self.fallback.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """Force flush any buffered spans."""
+        if self.fallback:
+            return self.fallback.force_flush(timeout_millis)
+        return True

openbox/tracing.py ADDED Viewed

@@ -0,0 +1,228 @@
+# openbox/tracing.py
+"""
+OpenBox Tracing Decorators for capturing internal function calls.
+Use the @traced decorator to capture function calls as OpenTelemetry spans.
+These spans will be automatically captured by WorkflowSpanProcessor and
+included in governance events.
+Usage:
+    from openbox.tracing import traced
+    @traced
+    def my_function(arg1, arg2):
+        return do_something(arg1, arg2)
+    @traced(name="custom-span-name", capture_args=True, capture_result=True)
+    async def my_async_function(data):
+        return await process(data)
+"""
+import json
+import logging
+from functools import wraps
+from typing import Any, Callable, Optional, TypeVar, Union
+from opentelemetry import trace
+logger = logging.getLogger(__name__)
+# Get tracer for internal function tracing
+_tracer: Optional[trace.Tracer] = None
+def _get_tracer() -> trace.Tracer:
+    """Lazy tracer initialization."""
+    global _tracer
+    if _tracer is None:
+        _tracer = trace.get_tracer("openbox.traced")
+    return _tracer
+def _safe_serialize(value: Any, max_length: int = 2000) -> str:
+    """Safely serialize a value to string for span attributes."""
+    try:
+        if value is None:
+            return "null"
+        if isinstance(value, (str, int, float, bool)):
+            result = str(value)
+        elif isinstance(value, (list, dict)):
+            result = json.dumps(value, default=str)
+        else:
+            result = str(value)
+        # Truncate if too long
+        if len(result) > max_length:
+            return result[:max_length] + "...[truncated]"
+        return result
+    except Exception:
+        return "<unserializable>"
+F = TypeVar("F", bound=Callable[..., Any])
+def traced(
+    _func: Optional[F] = None,
+    *,
+    name: Optional[str] = None,
+    capture_args: bool = True,
+    capture_result: bool = True,
+    capture_exception: bool = True,
+    max_arg_length: int = 2000,
+) -> Union[F, Callable[[F], F]]:
+    """
+    Decorator to trace function calls as OpenTelemetry spans.
+    The spans will be captured by WorkflowSpanProcessor and included
+    in ActivityCompleted governance events.
+    Args:
+        name: Custom span name. Defaults to function name.
+        capture_args: Capture function arguments as span attributes.
+        capture_result: Capture return value as span attribute.
+        capture_exception: Capture exception details on error.
+        max_arg_length: Maximum length for serialized arguments.
+    Examples:
+        # Basic usage
+        @traced
+        def process_data(input_data):
+            return transform(input_data)
+        # With options
+        @traced(name="data-processing", capture_result=False)
+        def process_sensitive_data(data):
+            return handle(data)
+        # Async functions
+        @traced
+        async def fetch_data(url):
+            return await http_get(url)
+    """
+    def decorator(func: F) -> F:
+        span_name = name or func.__name__
+        is_async = _is_async_function(func)
+        if is_async:
+            @wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                tracer = _get_tracer()
+                with tracer.start_as_current_span(span_name) as span:
+                    # Set function metadata
+                    span.set_attribute("code.function", func.__name__)
+                    span.set_attribute("code.namespace", func.__module__)
+                    # Capture arguments
+                    if capture_args:
+                        _set_args_attributes(span, args, kwargs, max_arg_length)
+                    try:
+                        result = await func(*args, **kwargs)
+                        # Capture result
+                        if capture_result:
+                            span.set_attribute(
+                                "function.result", _safe_serialize(result, max_arg_length)
+                            )
+                        return result
+                    except Exception as e:
+                        if capture_exception:
+                            span.set_attribute("error", True)
+                            span.set_attribute("error.type", type(e).__name__)
+                            span.set_attribute("error.message", str(e))
+                        raise
+            return async_wrapper  # type: ignore
+        else:
+            @wraps(func)
+            def sync_wrapper(*args, **kwargs):
+                tracer = _get_tracer()
+                with tracer.start_as_current_span(span_name) as span:
+                    # Set function metadata
+                    span.set_attribute("code.function", func.__name__)
+                    span.set_attribute("code.namespace", func.__module__)
+                    # Capture arguments
+                    if capture_args:
+                        _set_args_attributes(span, args, kwargs, max_arg_length)
+                    try:
+                        result = func(*args, **kwargs)
+                        # Capture result
+                        if capture_result:
+                            span.set_attribute(
+                                "function.result", _safe_serialize(result, max_arg_length)
+                            )
+                        return result
+                    except Exception as e:
+                        if capture_exception:
+                            span.set_attribute("error", True)
+                            span.set_attribute("error.type", type(e).__name__)
+                            span.set_attribute("error.message", str(e))
+                        raise
+            return sync_wrapper  # type: ignore
+    # Handle both @traced and @traced() syntax
+    if _func is not None:
+        return decorator(_func)
+    return decorator
+def _is_async_function(func: Callable) -> bool:
+    """Check if function is async."""
+    import asyncio
+    return asyncio.iscoroutinefunction(func)
+def _set_args_attributes(
+    span: trace.Span, args: tuple, kwargs: dict, max_length: int
+) -> None:
+    """Set function arguments as span attributes."""
+    if args:
+        for i, arg in enumerate(args):
+            span.set_attribute(f"function.arg.{i}", _safe_serialize(arg, max_length))
+    if kwargs:
+        for key, value in kwargs.items():
+            span.set_attribute(f"function.kwarg.{key}", _safe_serialize(value, max_length))
+# Convenience function to create a span context manager
+def create_span(
+    name: str,
+    attributes: Optional[dict] = None,
+) -> trace.Span:
+    """
+    Create a span context manager for manual tracing.
+    Usage:
+        from openbox.tracing import create_span
+        with create_span("my-operation", {"input": data}) as span:
+            result = do_something()
+            span.set_attribute("output", result)
+    Args:
+        name: Span name
+        attributes: Initial attributes to set on the span
+    Returns:
+        Span context manager
+    """
+    tracer = _get_tracer()
+    span = tracer.start_span(name)
+    if attributes:
+        for key, value in attributes.items():
+            span.set_attribute(key, _safe_serialize(value))
+    return span