openbox-temporal-sdk-python 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openbox/py.typed ADDED
File without changes
@@ -0,0 +1,361 @@
1
+ # openbox/span_processor.py
2
+ """
3
+ OpenTelemetry SpanProcessor for workflow-boundary governance.
4
+
5
+ WorkflowSpanProcessor buffers spans per-workflow for batch submission
6
+ to OpenBox Core. Bodies are stored separately via store_body() and merged
7
+ on span end - this keeps bodies OUT of OTel spans but IN the OpenBox buffer.
8
+ """
9
+
10
+ from typing import TYPE_CHECKING, Dict, Optional
11
+ import threading
12
+ import logging
13
+
14
+ # Logger for debugging HITL flow (outside workflow sandbox)
15
+ _logger = logging.getLogger(__name__)
16
+
17
+ if TYPE_CHECKING:
18
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
19
+
20
+ from .types import WorkflowSpanBuffer, Verdict
21
+
22
+
23
+ class WorkflowSpanProcessor:
24
+ """
25
+ SpanProcessor that buffers spans per-workflow for batch submission.
26
+
27
+ Bodies are stored separately via store_body() and merged on span end.
28
+ This keeps bodies OUT of OTel spans but IN the OpenBox buffer.
29
+
30
+ Thread-safe: Uses workflow_id from span attributes as key, with trace_id
31
+ as fallback for child spans (like HTTP spans) that don't have workflow_id.
32
+
33
+ Usage:
34
+ processor = WorkflowSpanProcessor(fallback_processor=batch_processor)
35
+
36
+ # Register buffer before workflow starts
37
+ processor.register_workflow(workflow_id, buffer)
38
+
39
+ # Spans with temporal.workflow_id attribute are buffered
40
+ # Child spans (same trace_id) are also buffered via trace_id mapping
41
+ # Bodies stored via store_body() are merged on span end
42
+
43
+ # Get buffer after workflow completes, spans are in buffer.spans
44
+ buffer = processor.get_buffer(workflow_id)
45
+ spans = buffer.spans # List of span dicts
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ fallback_processor: Optional["SpanProcessor"] = None,
51
+ ignored_url_prefixes: Optional[list] = None,
52
+ ):
53
+ """
54
+ Initialize the span processor.
55
+
56
+ Args:
57
+ fallback_processor: Optional processor to forward spans to (e.g., Jaeger exporter).
58
+ Spans are forwarded WITHOUT body data for privacy.
59
+ ignored_url_prefixes: List of URL prefixes to ignore (e.g., OpenBox Core API)
60
+ """
61
+ self.fallback = fallback_processor
62
+ self._ignored_url_prefixes = set(ignored_url_prefixes or [])
63
+ self._buffers: Dict[str, WorkflowSpanBuffer] = {} # workflow_id -> buffer
64
+ self._trace_to_workflow: Dict[int, str] = {} # trace_id (int) -> workflow_id
65
+ self._trace_to_activity: Dict[int, str] = {} # trace_id (int) -> activity_id
66
+ self._body_data: Dict[int, dict] = {} # span_id (int) -> {request_body, response_body}
67
+ self._verdicts: Dict[str, dict] = {} # workflow_id -> {"verdict": Verdict, "reason": str}
68
+ self._lock = threading.Lock()
69
+
70
+ def _should_ignore_span(self, span: "ReadableSpan") -> bool:
71
+ """Check if span should be ignored based on URL."""
72
+ if not self._ignored_url_prefixes:
73
+ return False
74
+
75
+ # Check http.url attribute
76
+ url = span.attributes.get("http.url") if span.attributes else None
77
+ if url:
78
+ for prefix in self._ignored_url_prefixes:
79
+ if url.startswith(prefix):
80
+ return True
81
+ return False
82
+
83
+ # ═══════════════════════════════════════════════════════════════════════════
84
+ # Workflow Buffer Management (called by GovernanceWorkflowInterceptor)
85
+ # ═══════════════════════════════════════════════════════════════════════════
86
+
87
+ def register_workflow(self, workflow_id: str, buffer: WorkflowSpanBuffer) -> None:
88
+ """
89
+ Register buffer for a workflow.
90
+
91
+ Called by ActivityGovernanceInterceptor when first activity starts.
92
+
93
+ Args:
94
+ workflow_id: Temporal workflow ID
95
+ buffer: Buffer to collect spans for this workflow
96
+ """
97
+ with self._lock:
98
+ self._buffers[workflow_id] = buffer
99
+
100
+ def register_trace(self, trace_id: int, workflow_id: str, activity_id: str = None) -> None:
101
+ """
102
+ Register trace_id to workflow_id (and optionally activity_id) mapping.
103
+
104
+ Called when creating an activity span to enable child span buffering.
105
+ Child spans (like HTTP calls) don't have temporal.workflow_id attribute,
106
+ but share the same trace_id with the parent activity span.
107
+
108
+ Args:
109
+ trace_id: OTel trace ID (integer form)
110
+ workflow_id: Temporal workflow ID
111
+ activity_id: Temporal activity ID (optional, for filtering)
112
+ """
113
+ with self._lock:
114
+ self._trace_to_workflow[trace_id] = workflow_id
115
+ if activity_id:
116
+ self._trace_to_activity[trace_id] = activity_id
117
+
118
+ def get_buffer(self, workflow_id: str) -> Optional[WorkflowSpanBuffer]:
119
+ """
120
+ Retrieve buffer without removing it.
121
+
122
+ Args:
123
+ workflow_id: Temporal workflow ID
124
+
125
+ Returns:
126
+ Buffer if found, None otherwise
127
+ """
128
+ with self._lock:
129
+ return self._buffers.get(workflow_id)
130
+
131
+ def remove_buffer(self, workflow_id: str) -> Optional[WorkflowSpanBuffer]:
132
+ """
133
+ Remove and return buffer.
134
+
135
+ Called by GovernanceWorkflowInterceptor after submission.
136
+
137
+ Args:
138
+ workflow_id: Temporal workflow ID
139
+
140
+ Returns:
141
+ Buffer if found, None otherwise
142
+ """
143
+ with self._lock:
144
+ return self._buffers.pop(workflow_id, None)
145
+
146
+ def unregister_workflow(self, workflow_id: str) -> None:
147
+ """
148
+ Remove buffer for a workflow (alias for remove_buffer).
149
+
150
+ Called when clearing stale buffers from previous workflow runs.
151
+
152
+ Args:
153
+ workflow_id: Temporal workflow ID
154
+ """
155
+ with self._lock:
156
+ self._buffers.pop(workflow_id, None)
157
+ self._verdicts.pop(workflow_id, None)
158
+
159
+ # ═══════════════════════════════════════════════════════════════════════════
160
+ # Verdict Storage (called by workflow interceptor for SignalReceived stop)
161
+ # ═══════════════════════════════════════════════════════════════════════════
162
+
163
+ def set_verdict(self, workflow_id: str, verdict: Verdict, reason: str = None, run_id: str = None) -> None:
164
+ """Store governance verdict for a workflow. Called when SignalReceived returns BLOCK/HALT."""
165
+ with self._lock:
166
+ self._verdicts[workflow_id] = {"verdict": verdict, "reason": reason, "run_id": run_id}
167
+ if workflow_id in self._buffers:
168
+ self._buffers[workflow_id].verdict = verdict
169
+ self._buffers[workflow_id].verdict_reason = reason
170
+
171
+ def get_verdict(self, workflow_id: str) -> Optional[dict]:
172
+ """Get stored verdict for a workflow. Returns dict with 'verdict' (Verdict) and 'reason' keys."""
173
+ with self._lock:
174
+ return self._verdicts.get(workflow_id)
175
+
176
+ def clear_verdict(self, workflow_id: str) -> None:
177
+ """Clear stored verdict for a workflow."""
178
+ with self._lock:
179
+ self._verdicts.pop(workflow_id, None)
180
+
181
+ # ═══════════════════════════════════════════════════════════════════════════
182
+ # Body Storage (called by HTTP hooks in otel_setup.py)
183
+ # ═══════════════════════════════════════════════════════════════════════════
184
+
185
+ def store_body(
186
+ self,
187
+ span_id: int,
188
+ request_body: Optional[str] = None,
189
+ response_body: Optional[str] = None,
190
+ request_headers: Optional[dict] = None,
191
+ response_headers: Optional[dict] = None,
192
+ ) -> None:
193
+ """
194
+ Store body and header data for a span (called from HTTP hooks).
195
+
196
+ Bodies and headers are stored here, NOT in OTel span attributes.
197
+ They will be merged with span data in on_end().
198
+
199
+ Args:
200
+ span_id: OTel span ID (integer form)
201
+ request_body: HTTP request body (if available)
202
+ response_body: HTTP response body (if available)
203
+ request_headers: HTTP request headers (if available)
204
+ response_headers: HTTP response headers (if available)
205
+ """
206
+ with self._lock:
207
+ if span_id not in self._body_data:
208
+ self._body_data[span_id] = {}
209
+ if request_body is not None:
210
+ self._body_data[span_id]["request_body"] = request_body
211
+ if response_body is not None:
212
+ self._body_data[span_id]["response_body"] = response_body
213
+ if request_headers is not None:
214
+ self._body_data[span_id]["request_headers"] = request_headers
215
+ if response_headers is not None:
216
+ self._body_data[span_id]["response_headers"] = response_headers
217
+
218
+ def get_pending_body(self, span_id: int) -> Optional[dict]:
219
+ """
220
+ Get pending body data for a span (not yet merged).
221
+
222
+ Used by activity interceptor to propagate body data to child spans
223
+ before the activity span has ended (and on_end merged the data).
224
+
225
+ Args:
226
+ span_id: OTel span ID (integer form)
227
+
228
+ Returns:
229
+ Dict with request_body and/or response_body, or None
230
+ """
231
+ with self._lock:
232
+ return self._body_data.get(span_id)
233
+
234
+ # ═══════════════════════════════════════════════════════════════════════════
235
+ # SpanProcessor Interface
236
+ # ═══════════════════════════════════════════════════════════════════════════
237
+
238
+ def on_start(self, span, parent_context=None) -> None:
239
+ """Called when span starts. No-op for this processor."""
240
+ pass
241
+
242
+ def on_end(self, span: "ReadableSpan") -> None:
243
+ """
244
+ Called when span ends. Buffer by workflow_id.
245
+
246
+ Spans with temporal.workflow_id attribute are buffered directly.
247
+ Child spans (like HTTP calls) without workflow_id are buffered via
248
+ trace_id -> workflow_id mapping.
249
+ Body data stored via store_body() is merged here.
250
+ """
251
+ # Skip spans to ignored URLs (e.g., OpenBox Core API)
252
+ if self._should_ignore_span(span):
253
+ if self.fallback:
254
+ self.fallback.on_end(span)
255
+ return
256
+
257
+ # Get workflow_id from span attributes (direct)
258
+ workflow_id = span.attributes.get("temporal.workflow_id") if span.attributes else None
259
+ activity_id = span.attributes.get("temporal.activity_id") if span.attributes else None
260
+
261
+ # Fallback: look up by trace_id (for child spans like HTTP calls)
262
+ if not workflow_id:
263
+ with self._lock:
264
+ workflow_id = self._trace_to_workflow.get(span.context.trace_id)
265
+ # Also get activity_id from trace mapping for child spans
266
+ if not activity_id:
267
+ activity_id = self._trace_to_activity.get(span.context.trace_id)
268
+
269
+ if workflow_id:
270
+ with self._lock:
271
+ buffer = self._buffers.get(workflow_id)
272
+
273
+ if buffer:
274
+ span_data = self._extract_span_data(span)
275
+
276
+ # Set activity_id for filtering later
277
+ if activity_id:
278
+ span_data["activity_id"] = activity_id
279
+
280
+ # Merge body data (stored separately, NOT in OTel span)
281
+ span_id = span.context.span_id
282
+ with self._lock:
283
+ if span_id in self._body_data:
284
+ body_data = self._body_data.pop(span_id)
285
+ span_data.update(body_data)
286
+
287
+ buffer.spans.append(span_data)
288
+
289
+ # Always forward to fallback (OTel exporter) - WITHOUT body
290
+ if self.fallback:
291
+ self.fallback.on_end(span)
292
+
293
+ def _extract_span_data(self, span: "ReadableSpan") -> dict:
294
+ """
295
+ Extract span data for OpenBox API.
296
+
297
+ Args:
298
+ span: OTel ReadableSpan
299
+
300
+ Returns:
301
+ Dictionary matching SpanData structure
302
+ """
303
+ # Format span_id and trace_id as hex strings
304
+ span_id_hex = format(span.context.span_id, "016x")
305
+ trace_id_hex = format(span.context.trace_id, "032x")
306
+
307
+ # Format parent span ID if present
308
+ parent_span_id = None
309
+ if span.parent and span.parent.span_id:
310
+ parent_span_id = format(span.parent.span_id, "016x")
311
+
312
+ # Extract status
313
+ status = None
314
+ if span.status:
315
+ status = {
316
+ "code": span.status.status_code.name if span.status.status_code else "UNSET",
317
+ "description": span.status.description,
318
+ }
319
+
320
+ # Extract events
321
+ events = []
322
+ if span.events:
323
+ for event in span.events:
324
+ events.append(
325
+ {
326
+ "name": event.name,
327
+ "timestamp": event.timestamp,
328
+ "attributes": dict(event.attributes) if event.attributes else {},
329
+ }
330
+ )
331
+
332
+ # Calculate duration
333
+ duration_ns = None
334
+ if span.end_time and span.start_time:
335
+ duration_ns = span.end_time - span.start_time
336
+
337
+ return {
338
+ "span_id": span_id_hex,
339
+ "trace_id": trace_id_hex,
340
+ "parent_span_id": parent_span_id,
341
+ "name": span.name,
342
+ "kind": span.kind.name if span.kind else None,
343
+ "start_time": span.start_time,
344
+ "end_time": span.end_time,
345
+ "duration_ns": duration_ns,
346
+ "attributes": dict(span.attributes) if span.attributes else {},
347
+ "status": status,
348
+ "events": events,
349
+ # request_body and response_body will be merged from _body_data
350
+ }
351
+
352
+ def shutdown(self) -> None:
353
+ """Shutdown the processor."""
354
+ if self.fallback:
355
+ self.fallback.shutdown()
356
+
357
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
358
+ """Force flush any buffered spans."""
359
+ if self.fallback:
360
+ return self.fallback.force_flush(timeout_millis)
361
+ return True
openbox/tracing.py ADDED
@@ -0,0 +1,228 @@
1
+ # openbox/tracing.py
2
+ """
3
+ OpenBox Tracing Decorators for capturing internal function calls.
4
+
5
+ Use the @traced decorator to capture function calls as OpenTelemetry spans.
6
+ These spans will be automatically captured by WorkflowSpanProcessor and
7
+ included in governance events.
8
+
9
+ Usage:
10
+ from openbox.tracing import traced
11
+
12
+ @traced
13
+ def my_function(arg1, arg2):
14
+ return do_something(arg1, arg2)
15
+
16
+ @traced(name="custom-span-name", capture_args=True, capture_result=True)
17
+ async def my_async_function(data):
18
+ return await process(data)
19
+ """
20
+
21
+ import json
22
+ import logging
23
+ from functools import wraps
24
+ from typing import Any, Callable, Optional, TypeVar, Union
25
+
26
+ from opentelemetry import trace
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Get tracer for internal function tracing
31
+ _tracer: Optional[trace.Tracer] = None
32
+
33
+
34
+ def _get_tracer() -> trace.Tracer:
35
+ """Lazy tracer initialization."""
36
+ global _tracer
37
+ if _tracer is None:
38
+ _tracer = trace.get_tracer("openbox.traced")
39
+ return _tracer
40
+
41
+
42
+ def _safe_serialize(value: Any, max_length: int = 2000) -> str:
43
+ """Safely serialize a value to string for span attributes."""
44
+ try:
45
+ if value is None:
46
+ return "null"
47
+ if isinstance(value, (str, int, float, bool)):
48
+ result = str(value)
49
+ elif isinstance(value, (list, dict)):
50
+ result = json.dumps(value, default=str)
51
+ else:
52
+ result = str(value)
53
+
54
+ # Truncate if too long
55
+ if len(result) > max_length:
56
+ return result[:max_length] + "...[truncated]"
57
+ return result
58
+ except Exception:
59
+ return "<unserializable>"
60
+
61
+
62
+ F = TypeVar("F", bound=Callable[..., Any])
63
+
64
+
65
+ def traced(
66
+ _func: Optional[F] = None,
67
+ *,
68
+ name: Optional[str] = None,
69
+ capture_args: bool = True,
70
+ capture_result: bool = True,
71
+ capture_exception: bool = True,
72
+ max_arg_length: int = 2000,
73
+ ) -> Union[F, Callable[[F], F]]:
74
+ """
75
+ Decorator to trace function calls as OpenTelemetry spans.
76
+
77
+ The spans will be captured by WorkflowSpanProcessor and included
78
+ in ActivityCompleted governance events.
79
+
80
+ Args:
81
+ name: Custom span name. Defaults to function name.
82
+ capture_args: Capture function arguments as span attributes.
83
+ capture_result: Capture return value as span attribute.
84
+ capture_exception: Capture exception details on error.
85
+ max_arg_length: Maximum length for serialized arguments.
86
+
87
+ Examples:
88
+ # Basic usage
89
+ @traced
90
+ def process_data(input_data):
91
+ return transform(input_data)
92
+
93
+ # With options
94
+ @traced(name="data-processing", capture_result=False)
95
+ def process_sensitive_data(data):
96
+ return handle(data)
97
+
98
+ # Async functions
99
+ @traced
100
+ async def fetch_data(url):
101
+ return await http_get(url)
102
+ """
103
+
104
+ def decorator(func: F) -> F:
105
+ span_name = name or func.__name__
106
+ is_async = _is_async_function(func)
107
+
108
+ if is_async:
109
+ @wraps(func)
110
+ async def async_wrapper(*args, **kwargs):
111
+ tracer = _get_tracer()
112
+ with tracer.start_as_current_span(span_name) as span:
113
+ # Set function metadata
114
+ span.set_attribute("code.function", func.__name__)
115
+ span.set_attribute("code.namespace", func.__module__)
116
+
117
+ # Capture arguments
118
+ if capture_args:
119
+ _set_args_attributes(span, args, kwargs, max_arg_length)
120
+
121
+ try:
122
+ result = await func(*args, **kwargs)
123
+
124
+ # Capture result
125
+ if capture_result:
126
+ span.set_attribute(
127
+ "function.result", _safe_serialize(result, max_arg_length)
128
+ )
129
+
130
+ return result
131
+
132
+ except Exception as e:
133
+ if capture_exception:
134
+ span.set_attribute("error", True)
135
+ span.set_attribute("error.type", type(e).__name__)
136
+ span.set_attribute("error.message", str(e))
137
+ raise
138
+
139
+ return async_wrapper # type: ignore
140
+
141
+ else:
142
+ @wraps(func)
143
+ def sync_wrapper(*args, **kwargs):
144
+ tracer = _get_tracer()
145
+ with tracer.start_as_current_span(span_name) as span:
146
+ # Set function metadata
147
+ span.set_attribute("code.function", func.__name__)
148
+ span.set_attribute("code.namespace", func.__module__)
149
+
150
+ # Capture arguments
151
+ if capture_args:
152
+ _set_args_attributes(span, args, kwargs, max_arg_length)
153
+
154
+ try:
155
+ result = func(*args, **kwargs)
156
+
157
+ # Capture result
158
+ if capture_result:
159
+ span.set_attribute(
160
+ "function.result", _safe_serialize(result, max_arg_length)
161
+ )
162
+
163
+ return result
164
+
165
+ except Exception as e:
166
+ if capture_exception:
167
+ span.set_attribute("error", True)
168
+ span.set_attribute("error.type", type(e).__name__)
169
+ span.set_attribute("error.message", str(e))
170
+ raise
171
+
172
+ return sync_wrapper # type: ignore
173
+
174
+ # Handle both @traced and @traced() syntax
175
+ if _func is not None:
176
+ return decorator(_func)
177
+ return decorator
178
+
179
+
180
+ def _is_async_function(func: Callable) -> bool:
181
+ """Check if function is async."""
182
+ import asyncio
183
+ return asyncio.iscoroutinefunction(func)
184
+
185
+
186
+ def _set_args_attributes(
187
+ span: trace.Span, args: tuple, kwargs: dict, max_length: int
188
+ ) -> None:
189
+ """Set function arguments as span attributes."""
190
+ if args:
191
+ for i, arg in enumerate(args):
192
+ span.set_attribute(f"function.arg.{i}", _safe_serialize(arg, max_length))
193
+
194
+ if kwargs:
195
+ for key, value in kwargs.items():
196
+ span.set_attribute(f"function.kwarg.{key}", _safe_serialize(value, max_length))
197
+
198
+
199
+ # Convenience function to create a span context manager
200
+ def create_span(
201
+ name: str,
202
+ attributes: Optional[dict] = None,
203
+ ) -> trace.Span:
204
+ """
205
+ Create a span context manager for manual tracing.
206
+
207
+ Usage:
208
+ from openbox.tracing import create_span
209
+
210
+ with create_span("my-operation", {"input": data}) as span:
211
+ result = do_something()
212
+ span.set_attribute("output", result)
213
+
214
+ Args:
215
+ name: Span name
216
+ attributes: Initial attributes to set on the span
217
+
218
+ Returns:
219
+ Span context manager
220
+ """
221
+ tracer = _get_tracer()
222
+ span = tracer.start_span(name)
223
+
224
+ if attributes:
225
+ for key, value in attributes.items():
226
+ span.set_attribute(key, _safe_serialize(value))
227
+
228
+ return span