openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,253 @@
1
+ # openbox/span_processor.py
2
+ """
3
+ OpenTelemetry SpanProcessor for workflow governance.
4
+
5
+ WorkflowSpanProcessor manages activity context, trace mappings, and governance
6
+ state (verdicts, abort/halt flags) for hook-level governance. Forwards spans
7
+ to fallback exporters (Jaeger, OTLP, etc.) without buffering.
8
+ """
9
+
10
+ import logging
11
+ import threading
12
+ from typing import TYPE_CHECKING, Optional
13
+
14
+ from .types import Verdict, WorkflowSpanBuffer
15
+
16
+ if TYPE_CHECKING:
17
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
18
+
19
+ _logger = logging.getLogger(__name__)
20
+
21
+
22
+ class WorkflowSpanProcessor:
23
+ """
24
+ SpanProcessor that manages governance state and forwards spans to exporters.
25
+
26
+ Responsibilities:
27
+ - Activity context storage (for hook-level governance payload building)
28
+ - Trace → workflow/activity ID resolution (for hook → activity linkage)
29
+ - Workflow buffer management (verdicts, approvals, abort/halt flags)
30
+ - Span forwarding to fallback exporter (Jaeger, OTLP, etc.)
31
+
32
+ Thread-safe via _lock for all shared state.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ fallback_processor: Optional["SpanProcessor"] = None,
38
+ ignored_url_prefixes: list | None = None,
39
+ ):
40
+ self.fallback = fallback_processor
41
+ self._ignored_url_prefixes = set(ignored_url_prefixes or [])
42
+ self._buffers: dict[str, WorkflowSpanBuffer] = {} # workflow_id -> buffer
43
+ self._trace_to_workflow: dict[int, str] = {} # trace_id (int) -> workflow_id
44
+ self._trace_to_activity: dict[int, str] = {} # trace_id (int) -> activity_id
45
+ self._verdicts: dict[str, dict] = {} # workflow_id -> {"verdict": Verdict, "reason": str}
46
+ self._activity_context: dict[str, dict] = {} # "{workflow_id}:{activity_id}" -> event data
47
+ # "{workflow_id}:{activity_id}" -> abort reason
48
+ self._aborted_activities: dict[str, str] = {}
49
+ self._halt_requests: dict[str, str] = {} # "{workflow_id}:{activity_id}" -> halt reason
50
+ self._sync_mode: bool = False # Set by middleware when using sync invoke()
51
+ self._last_activity_key: str | None = None # Most recently set activity context key
52
+ self._lock = threading.Lock()
53
+
54
+ def _should_ignore_span(self, span: "ReadableSpan") -> bool:
55
+ """Check if span should be ignored based on URL."""
56
+ if not self._ignored_url_prefixes:
57
+ return False
58
+ url = span.attributes.get("http.url") if span.attributes else None
59
+ if url:
60
+ for prefix in self._ignored_url_prefixes:
61
+ if url.startswith(prefix):
62
+ return True
63
+ return False
64
+
65
+ # ═══════════════════════════════════════════════════════════════════════════
66
+ # Workflow Buffer Management
67
+ # ═══════════════════════════════════════════════════════════════════════════
68
+
69
+ def register_workflow(self, workflow_id: str, buffer: WorkflowSpanBuffer) -> None:
70
+ """Register buffer for a workflow."""
71
+ with self._lock:
72
+ self._buffers[workflow_id] = buffer
73
+
74
+ def register_trace(
75
+ self, trace_id: int, workflow_id: str, activity_id: str | None = None
76
+ ) -> None:
77
+ """Register trace_id → workflow_id (and activity_id) mapping for hook lookups."""
78
+ with self._lock:
79
+ self._trace_to_workflow[trace_id] = workflow_id
80
+ if activity_id:
81
+ self._trace_to_activity[trace_id] = activity_id
82
+
83
+ def get_buffer(self, workflow_id: str) -> WorkflowSpanBuffer | None:
84
+ """Retrieve buffer without removing it."""
85
+ with self._lock:
86
+ return self._buffers.get(workflow_id)
87
+
88
+ def remove_buffer(self, workflow_id: str) -> WorkflowSpanBuffer | None:
89
+ """Remove and return buffer."""
90
+ with self._lock:
91
+ return self._buffers.pop(workflow_id, None)
92
+
93
+ def unregister_workflow(self, workflow_id: str) -> None:
94
+ """Clean all state associated with a workflow to prevent memory leaks."""
95
+ with self._lock:
96
+ self._buffers.pop(workflow_id, None)
97
+ self._verdicts.pop(workflow_id, None)
98
+ for store in (self._aborted_activities, self._halt_requests, self._activity_context):
99
+ stale = [k for k in store if k.startswith(f"{workflow_id}:")]
100
+ for k in stale:
101
+ del store[k]
102
+ stale_traces = [t for t, w in self._trace_to_workflow.items() if w == workflow_id]
103
+ for t in stale_traces:
104
+ del self._trace_to_workflow[t]
105
+ self._trace_to_activity.pop(t, None)
106
+
107
+ # ═══════════════════════════════════════════════════════════════════════════
108
+ # Verdict Storage (workflow interceptor → activity interceptor)
109
+ # ═══════════════════════════════════════════════════════════════════════════
110
+
111
+ def set_verdict(
112
+ self,
113
+ workflow_id: str,
114
+ verdict: Verdict,
115
+ reason: str | None = None,
116
+ run_id: str | None = None,
117
+ ) -> None:
118
+ """Store governance verdict. Called when SignalReceived returns BLOCK/HALT."""
119
+ with self._lock:
120
+ self._verdicts[workflow_id] = {"verdict": verdict, "reason": reason, "run_id": run_id}
121
+ if workflow_id in self._buffers:
122
+ self._buffers[workflow_id].verdict = verdict
123
+ self._buffers[workflow_id].verdict_reason = reason
124
+
125
+ def get_verdict(self, workflow_id: str) -> dict | None:
126
+ """Get stored verdict for a workflow."""
127
+ with self._lock:
128
+ return self._verdicts.get(workflow_id)
129
+
130
+ def clear_verdict(self, workflow_id: str) -> None:
131
+ """Clear stored verdict for a workflow."""
132
+ with self._lock:
133
+ self._verdicts.pop(workflow_id, None)
134
+
135
+ # ═══════════════════════════════════════════════════════════════════════════
136
+ # Activity Context Storage (for hook-level governance)
137
+ # ═══════════════════════════════════════════════════════════════════════════
138
+
139
+ def set_sync_mode(self, enabled: bool) -> None:
140
+ """Enable/disable sync mode for fallback activity context resolution."""
141
+ with self._lock:
142
+ self._sync_mode = enabled
143
+
144
+ def set_activity_context(self, workflow_id: str, activity_id: str, context: dict) -> None:
145
+ """Store ActivityStarted event data for hook-level governance payload building."""
146
+ with self._lock:
147
+ key = f"{workflow_id}:{activity_id}"
148
+ self._activity_context[key] = context
149
+ self._last_activity_key = key
150
+
151
+ def get_activity_context_by_trace(self, trace_id: int) -> dict | None:
152
+ """Look up activity context using trace_id from a child span (hook → activity linkage).
153
+
154
+ LangGraph adaptation: when trace_id lookup fails (asyncio.Task spawns new
155
+ OTel trace contexts), falls back to the most recently set activity context.
156
+
157
+ Fallback strategies (in order):
158
+ 1. Primary: trace_id → workflow_id/activity_id → context (works in async mode)
159
+ 2. Single-activity: exactly one active context → return it (async fallback)
160
+ 3. Sync mode: return most recently set context (sync mode only — sequential execution)
161
+ """
162
+ with self._lock:
163
+ workflow_id = self._trace_to_workflow.get(trace_id)
164
+ activity_id = self._trace_to_activity.get(trace_id)
165
+ if workflow_id and activity_id:
166
+ ctx = self._activity_context.get(f"{workflow_id}:{activity_id}")
167
+ if ctx:
168
+ return ctx
169
+
170
+ # Fallback: LangGraph spawns asyncio.Tasks with new trace contexts
171
+ if len(self._activity_context) == 1:
172
+ last_key = list(self._activity_context.keys())[-1]
173
+ return self._activity_context[last_key]
174
+
175
+ # Sync mode fallback: trace_id fragments across thread pool boundary.
176
+ # Safe because sync execution is sequential — one activity at a time.
177
+ if self._sync_mode and self._last_activity_key:
178
+ return self._activity_context.get(self._last_activity_key)
179
+ return None
180
+
181
+ def clear_activity_context(self, workflow_id: str, activity_id: str) -> None:
182
+ """Clear buffered activity context after activity completes."""
183
+ with self._lock:
184
+ self._activity_context.pop(f"{workflow_id}:{activity_id}", None)
185
+
186
+ # ═══════════════════════════════════════════════════════════════════════════
187
+ # Activity Abort Signal (block subsequent hooks after BLOCK/HALT/REQUIRE_APPROVAL)
188
+ # ═══════════════════════════════════════════════════════════════════════════
189
+
190
+ def set_activity_abort(self, workflow_id: str, activity_id: str, reason: str) -> None:
191
+ """Set abort flag for an activity. Subsequent hooks will raise immediately."""
192
+ with self._lock:
193
+ self._aborted_activities[f"{workflow_id}:{activity_id}"] = reason
194
+
195
+ def get_activity_abort(self, workflow_id: str, activity_id: str) -> str | None:
196
+ """Check if activity is aborted. Returns reason string or None."""
197
+ with self._lock:
198
+ return self._aborted_activities.get(f"{workflow_id}:{activity_id}")
199
+
200
+ def clear_activity_abort(self, workflow_id: str, activity_id: str) -> None:
201
+ """Clear abort flag for an activity (on retry or completion)."""
202
+ with self._lock:
203
+ self._aborted_activities.pop(f"{workflow_id}:{activity_id}", None)
204
+
205
+ # ═══════════════════════════════════════════════════════════════════════════
206
+ # Halt Request (hook → activity interceptor for HALT verdict)
207
+ # ═══════════════════════════════════════════════════════════════════════════
208
+
209
+ def set_halt_requested(self, workflow_id: str, activity_id: str, reason: str) -> None:
210
+ """Hook sets this when HALT verdict received. Activity interceptor calls terminate()."""
211
+ with self._lock:
212
+ self._halt_requests[f"{workflow_id}:{activity_id}"] = reason
213
+
214
+ def get_halt_requested(self, workflow_id: str, activity_id: str) -> str | None:
215
+ """Check if HALT was requested by a hook. Returns reason or None."""
216
+ with self._lock:
217
+ return self._halt_requests.get(f"{workflow_id}:{activity_id}")
218
+
219
+ def clear_halt_requested(self, workflow_id: str, activity_id: str) -> None:
220
+ """Clear halt request flag."""
221
+ with self._lock:
222
+ self._halt_requests.pop(f"{workflow_id}:{activity_id}", None)
223
+
224
+ # ═══════════════════════════════════════════════════════════════════════════
225
+ # SpanProcessor Interface
226
+ # ═══════════════════════════════════════════════════════════════════════════
227
+
228
+ def on_start(self, span, parent_context=None) -> None:
229
+ """Called when span starts. No-op."""
230
+
231
+ def _on_ending(self, span) -> None:
232
+ """Called when span is ending (before on_end). Required by newer OTel SDK."""
233
+
234
+ def on_end(self, span: "ReadableSpan") -> None:
235
+ """Called when span ends. Forward to fallback exporter only."""
236
+ if self._should_ignore_span(span):
237
+ if self.fallback:
238
+ self.fallback.on_end(span)
239
+ return
240
+
241
+ if self.fallback:
242
+ self.fallback.on_end(span)
243
+
244
+ def shutdown(self) -> None:
245
+ """Shutdown the processor."""
246
+ if self.fallback:
247
+ self.fallback.shutdown()
248
+
249
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
250
+ """Force flush any buffered spans."""
251
+ if self.fallback:
252
+ return self.fallback.force_flush(timeout_millis)
253
+ return True
@@ -0,0 +1,352 @@
1
+ # openbox/tracing.py
2
+ """
3
+ OpenBox Tracing Decorators for capturing internal function calls.
4
+
5
+ Use the @traced decorator to capture function calls as OpenTelemetry spans.
6
+ These spans will be automatically captured by WorkflowSpanProcessor and
7
+ included in governance events.
8
+
9
+ Usage:
10
+ from openbox.tracing import traced
11
+
12
+ @traced
13
+ def my_function(arg1, arg2):
14
+ return do_something(arg1, arg2)
15
+
16
+ @traced(name="custom-span-name", capture_args=True, capture_result=True)
17
+ async def my_async_function(data):
18
+ return await process(data)
19
+ """
20
+
21
+ import json
22
+ import logging
23
+ import time as _time
24
+ from collections.abc import Callable
25
+ from functools import wraps
26
+ from typing import Any, TypeVar
27
+
28
+ from opentelemetry import trace
29
+
30
+ from . import hook_governance as _hook_gov
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def _build_traced_span_data(
36
+ span, func_name: str, module: str, stage: str,
37
+ error: str | None = None, duration_ms: float | None = None,
38
+ args: Any = None, result: Any = None,
39
+ ) -> dict:
40
+ """Build span data dict for a @traced function call.
41
+
42
+ attributes: OTel-original only. All custom data at root level.
43
+ """
44
+
45
+
46
+ span_id_hex, trace_id_hex, parent_span_id = _hook_gov.extract_span_context(span)
47
+ raw_attrs = getattr(span, 'attributes', None)
48
+ attrs = dict(raw_attrs) if raw_attrs and isinstance(raw_attrs, dict) else {}
49
+
50
+ now_ns = _time.time_ns()
51
+ duration_ns = int(duration_ms * 1_000_000) if duration_ms else None
52
+ end_time = now_ns if stage == "completed" else None
53
+ start_time = (now_ns - duration_ns) if duration_ns else now_ns
54
+
55
+ return {
56
+ "span_id": span_id_hex,
57
+ "trace_id": trace_id_hex,
58
+ "parent_span_id": parent_span_id,
59
+ "name": getattr(span, 'name', None) or func_name,
60
+ "kind": "INTERNAL",
61
+ "stage": stage,
62
+ "start_time": start_time,
63
+ "end_time": end_time,
64
+ "duration_ns": duration_ns,
65
+ "attributes": attrs,
66
+ "status": {"code": "ERROR" if error else "UNSET", "description": error},
67
+ "events": [],
68
+ # Hook type identification
69
+ "hook_type": "function_call",
70
+ # Function-specific root fields
71
+ "function": func_name,
72
+ "module": module,
73
+ "args": args,
74
+ "result": result,
75
+ "error": error,
76
+ }
77
+
78
+ # Get tracer for internal function tracing
79
+ _tracer: trace.Tracer | None = None
80
+
81
+
82
+ def _get_tracer() -> trace.Tracer:
83
+ """Lazy tracer initialization."""
84
+ global _tracer
85
+ if _tracer is None:
86
+ _tracer = trace.get_tracer("openbox.traced")
87
+ return _tracer
88
+
89
+
90
+ def _safe_serialize(value: Any, max_length: int = 2000) -> str:
91
+ """Safely serialize a value to string for span attributes."""
92
+ try:
93
+ if value is None:
94
+ return "null"
95
+ if isinstance(value, (str, int, float, bool)):
96
+ result = str(value)
97
+ elif isinstance(value, (list, dict)):
98
+ result = json.dumps(value, default=str)
99
+ else:
100
+ result = str(value)
101
+
102
+ # Truncate if too long
103
+ if len(result) > max_length:
104
+ return result[:max_length] + "...[truncated]"
105
+ return result
106
+ except Exception:
107
+ return "<unserializable>"
108
+
109
+
110
+ F = TypeVar("F", bound=Callable[..., Any])
111
+
112
+
113
+ def traced(
114
+ _func: F | None = None,
115
+ *,
116
+ name: str | None = None,
117
+ capture_args: bool = True,
118
+ capture_result: bool = True,
119
+ capture_exception: bool = True,
120
+ max_arg_length: int = 2000,
121
+ ) -> F | Callable[[F], F]:
122
+ """
123
+ Decorator to trace function calls as OpenTelemetry spans.
124
+
125
+ The spans will be captured by WorkflowSpanProcessor and included
126
+ in ActivityCompleted governance events.
127
+
128
+ Args:
129
+ name: Custom span name. Defaults to function name.
130
+ capture_args: Capture function arguments as span attributes.
131
+ capture_result: Capture return value as span attribute.
132
+ capture_exception: Capture exception details on error.
133
+ max_arg_length: Maximum length for serialized arguments.
134
+
135
+ Examples:
136
+ # Basic usage
137
+ @traced
138
+ def process_data(input_data):
139
+ return transform(input_data)
140
+
141
+ # With options
142
+ @traced(name="data-processing", capture_result=False)
143
+ def process_sensitive_data(data):
144
+ return handle(data)
145
+
146
+ # Async functions
147
+ @traced
148
+ async def fetch_data(url):
149
+ return await http_get(url)
150
+ """
151
+
152
+ def decorator(func: F) -> F:
153
+ span_name = name or func.__name__
154
+ is_async = _is_async_function(func)
155
+
156
+ if is_async:
157
+ @wraps(func)
158
+ async def async_wrapper(*args, **kwargs):
159
+ tracer = _get_tracer()
160
+ with tracer.start_as_current_span(span_name) as span:
161
+ # Set function metadata
162
+ span.set_attribute("code.function", func.__name__)
163
+ span.set_attribute("code.namespace", func.__module__)
164
+
165
+ # Capture arguments
166
+ if capture_args:
167
+ _set_args_attributes(span, args, kwargs, max_arg_length)
168
+
169
+ # Governance: started stage
170
+ if _hook_gov.is_configured():
171
+ _args_data = (
172
+ _safe_serialize({"args": args, "kwargs": kwargs}, max_arg_length)
173
+ if capture_args else None
174
+ )
175
+ started_sd = _build_traced_span_data(
176
+ span, func.__name__, func.__module__, "started", args=_args_data
177
+ )
178
+ await _hook_gov.evaluate_async(
179
+ span, identifier=func.__name__, span_data=started_sd
180
+ )
181
+
182
+ _start = _time.perf_counter()
183
+ try:
184
+ result = await func(*args, **kwargs)
185
+ _dur_ms = (_time.perf_counter() - _start) * 1000
186
+
187
+ # Capture result
188
+ if capture_result:
189
+ span.set_attribute(
190
+ "function.result", _safe_serialize(result, max_arg_length)
191
+ )
192
+
193
+ # Governance: completed stage
194
+ if _hook_gov.is_configured():
195
+ _result_data = (
196
+ _safe_serialize(result, max_arg_length) if capture_result else None
197
+ )
198
+ completed_sd = _build_traced_span_data(
199
+ span, func.__name__, func.__module__, "completed",
200
+ duration_ms=_dur_ms, result=_result_data,
201
+ )
202
+ await _hook_gov.evaluate_async(
203
+ span, identifier=func.__name__, span_data=completed_sd
204
+ )
205
+
206
+ return result
207
+
208
+ except Exception as e:
209
+ if capture_exception:
210
+ span.set_attribute("error", True)
211
+ span.set_attribute("error.type", type(e).__name__)
212
+ span.set_attribute("error.message", str(e))
213
+
214
+ # Governance: completed stage with error
215
+ if _hook_gov.is_configured():
216
+ error_sd = _build_traced_span_data(
217
+ span, func.__name__, func.__module__, "completed", error=str(e)
218
+ )
219
+ await _hook_gov.evaluate_async(
220
+ span, identifier=func.__name__, span_data=error_sd
221
+ )
222
+
223
+ raise
224
+
225
+ return async_wrapper # type: ignore
226
+
227
+ else:
228
+ @wraps(func)
229
+ def sync_wrapper(*args, **kwargs):
230
+ tracer = _get_tracer()
231
+ with tracer.start_as_current_span(span_name) as span:
232
+ # Set function metadata
233
+ span.set_attribute("code.function", func.__name__)
234
+ span.set_attribute("code.namespace", func.__module__)
235
+
236
+ # Capture arguments
237
+ if capture_args:
238
+ _set_args_attributes(span, args, kwargs, max_arg_length)
239
+
240
+ # Governance: started stage
241
+ if _hook_gov.is_configured():
242
+ _args_data = (
243
+ _safe_serialize({"args": args, "kwargs": kwargs}, max_arg_length)
244
+ if capture_args else None
245
+ )
246
+ started_sd = _build_traced_span_data(
247
+ span, func.__name__, func.__module__, "started", args=_args_data
248
+ )
249
+ _hook_gov.evaluate_sync(
250
+ span, identifier=func.__name__, span_data=started_sd
251
+ )
252
+
253
+ _start = _time.perf_counter()
254
+ try:
255
+ result = func(*args, **kwargs)
256
+ _dur_ms = (_time.perf_counter() - _start) * 1000
257
+
258
+ # Capture result
259
+ if capture_result:
260
+ span.set_attribute(
261
+ "function.result", _safe_serialize(result, max_arg_length)
262
+ )
263
+
264
+ # Governance: completed stage
265
+ if _hook_gov.is_configured():
266
+ _result_data = (
267
+ _safe_serialize(result, max_arg_length) if capture_result else None
268
+ )
269
+ completed_sd = _build_traced_span_data(
270
+ span, func.__name__, func.__module__, "completed",
271
+ duration_ms=_dur_ms, result=_result_data,
272
+ )
273
+ _hook_gov.evaluate_sync(
274
+ span, identifier=func.__name__, span_data=completed_sd
275
+ )
276
+
277
+ return result
278
+
279
+ except Exception as e:
280
+ if capture_exception:
281
+ span.set_attribute("error", True)
282
+ span.set_attribute("error.type", type(e).__name__)
283
+ span.set_attribute("error.message", str(e))
284
+
285
+ # Governance: completed stage with error
286
+ if _hook_gov.is_configured():
287
+ error_sd = _build_traced_span_data(
288
+ span, func.__name__, func.__module__, "completed", error=str(e)
289
+ )
290
+ _hook_gov.evaluate_sync(
291
+ span, identifier=func.__name__, span_data=error_sd
292
+ )
293
+
294
+ raise
295
+
296
+ return sync_wrapper # type: ignore
297
+
298
+ # Handle both @traced and @traced() syntax
299
+ if _func is not None:
300
+ return decorator(_func)
301
+ return decorator
302
+
303
+
304
+ def _is_async_function(func: Callable) -> bool:
305
+ """Check if function is async."""
306
+ import asyncio
307
+ return asyncio.iscoroutinefunction(func)
308
+
309
+
310
+ def _set_args_attributes(
311
+ span: trace.Span, args: tuple, kwargs: dict, max_length: int
312
+ ) -> None:
313
+ """Set function arguments as span attributes."""
314
+ if args:
315
+ for i, arg in enumerate(args):
316
+ span.set_attribute(f"function.arg.{i}", _safe_serialize(arg, max_length))
317
+
318
+ if kwargs:
319
+ for key, value in kwargs.items():
320
+ span.set_attribute(f"function.kwarg.{key}", _safe_serialize(value, max_length))
321
+
322
+
323
+ # Convenience function to create a span context manager
324
+ def create_span(
325
+ name: str,
326
+ attributes: dict | None = None,
327
+ ) -> trace.Span:
328
+ """
329
+ Create a span context manager for manual tracing.
330
+
331
+ Usage:
332
+ from openbox.tracing import create_span
333
+
334
+ with create_span("my-operation", {"input": data}) as span:
335
+ result = do_something()
336
+ span.set_attribute("output", result)
337
+
338
+ Args:
339
+ name: Span name
340
+ attributes: Initial attributes to set on the span
341
+
342
+ Returns:
343
+ Span context manager
344
+ """
345
+ tracer = _get_tracer()
346
+ span = tracer.start_span(name)
347
+
348
+ if attributes:
349
+ for key, value in attributes.items():
350
+ span.set_attribute(key, _safe_serialize(value))
351
+
352
+ return span