openbox-langgraph-sdk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1616 @@
1
+ """OpenBox LangGraph SDK — OpenBoxLangGraphHandler.
2
+
3
+ Wraps any compiled LangGraph graph and processes the v2 event stream to apply
4
+ OpenBox governance at every node, tool, and LLM invocation.
5
+
6
+ For framework-specific integrations (e.g. DeepAgents) use the dedicated
7
+ `openbox-deepagent` package which extends this handler.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import os
14
+ import sys
15
+ import time
16
+ import uuid
17
+ from collections.abc import AsyncIterator, Callable
18
+ from dataclasses import dataclass, field
19
+ from typing import Any
20
+ from uuid import UUID
21
+
22
+ from langchain_core.callbacks import AsyncCallbackHandler
23
+ from langchain_core.messages import BaseMessage
24
+ from opentelemetry import context as otel_context
25
+ from opentelemetry import trace as otel_trace
26
+
27
+ from openbox_langgraph.client import GovernanceClient
28
+ from openbox_langgraph.config import GovernanceConfig, get_global_config, merge_config
29
+ from openbox_langgraph.errors import (
30
+ ApprovalExpiredError,
31
+ ApprovalRejectedError,
32
+ ApprovalTimeoutError,
33
+ GovernanceBlockedError,
34
+ GovernanceHaltError,
35
+ GuardrailsValidationError,
36
+ )
37
+ from openbox_langgraph.hitl import HITLPollParams, poll_until_decision
38
+ from openbox_langgraph.types import (
39
+ GovernanceVerdictResponse,
40
+ LangChainGovernanceEvent,
41
+ LangGraphStreamEvent,
42
+ rfc3339_now,
43
+ safe_serialize,
44
+ )
45
+ from openbox_langgraph.verdict_handler import (
46
+ enforce_verdict,
47
+ lang_graph_event_to_context,
48
+ )
49
+
50
+ _logger = logging.getLogger(__name__)
51
+
52
+ _otel_tracer = otel_trace.get_tracer("openbox-langgraph")
53
+
54
+
55
+ def _extract_governance_blocked(exc: Exception) -> GovernanceBlockedError | None:
56
+ """Walk exception chain to find a wrapped GovernanceBlockedError.
57
+
58
+ LLM SDKs (OpenAI, Anthropic) wrap httpx errors. When an OTel hook raises
59
+ GovernanceBlockedError inside httpx, the LLM SDK wraps it as APIConnectionError.
60
+ This function unwraps the chain via __cause__ / __context__ to recover it.
61
+ """
62
+ cause: BaseException | None = exc
63
+ seen: set[int] = set()
64
+ while cause is not None:
65
+ if id(cause) in seen:
66
+ break
67
+ seen.add(id(cause))
68
+ if isinstance(cause, GovernanceBlockedError):
69
+ return cause
70
+ cause = getattr(cause, '__cause__', None) or getattr(cause, '__context__', None)
71
+ return None
72
+
73
+
74
+ # ═══════════════════════════════════════════════════════════════════
75
+ # Guardrails callback handler — pre-LLM interception for PII redaction
76
+ # ═══════════════════════════════════════════════════════════════════
77
+
78
+ class _GuardrailsCallbackHandler(AsyncCallbackHandler):
79
+ """LangChain callback handler that intercepts on_chat_model_start BEFORE the
80
+ LLM call fires, sends a governance LLMStarted event, and mutates the messages
81
+ in-place with redacted_input from Core.
82
+
83
+ This mirrors the TypeScript SDK's handleChatModelStart with awaitHandlers=True.
84
+ Injected into config['callbacks'] so LangGraph propagates it to every LLM node.
85
+ """
86
+
87
+ raise_error = True # Surface GuardrailsValidationError / GovernanceHaltError
88
+
89
+ def __init__(
90
+ self,
91
+ client: GovernanceClient,
92
+ config: GovernanceConfig,
93
+ workflow_id: str,
94
+ run_id: str,
95
+ thread_id: str,
96
+ pre_screen_response: GovernanceVerdictResponse | None = None,
97
+ pre_screen_activity_id: str | None = None,
98
+ llm_activity_map: dict[str, str] | None = None,
99
+ ) -> None:
100
+ super().__init__()
101
+ self._client = client
102
+ self._config = config
103
+ self._workflow_id = workflow_id
104
+ self._run_id = run_id
105
+ self._thread_id = thread_id
106
+ self._pre_screen_response = pre_screen_response
107
+ self._pre_screen_activity_id = pre_screen_activity_id
108
+ # Shared dict: LangChain callback UUID → activity_id to use for span hook.
109
+ # Written here, read by _process_event when LLMCompleted fires.
110
+ self._llm_activity_map: dict[str, str] = (
111
+ llm_activity_map if llm_activity_map is not None else {}
112
+ )
113
+
114
+ async def on_chat_model_start(
115
+ self,
116
+ serialized: dict[str, Any],
117
+ messages: list[list[BaseMessage]],
118
+ *,
119
+ run_id: UUID,
120
+ parent_run_id: UUID | None = None,
121
+ tags: list[str] | None = None,
122
+ metadata: dict[str, Any] | None = None,
123
+ **kwargs: Any,
124
+ ) -> None:
125
+ if not self._config.send_llm_start_event:
126
+ return
127
+
128
+ # Extract human/user turn text only — mirrors _extract_prompt_from_messages.
129
+ # Subagent-internal LLM calls have only system/tool messages → empty prompt
130
+ # → skip guard below prevents sending {"prompt": ""} to Core's guardrail.
131
+ prompt_parts: list[str] = []
132
+ for group in messages:
133
+ for msg in group:
134
+ role = getattr(msg, "type", None) or getattr(msg, "role", None) or ""
135
+ if role not in ("human", "user", "generic"):
136
+ continue
137
+ content = msg.content
138
+ if isinstance(content, str):
139
+ prompt_parts.append(content)
140
+ elif isinstance(content, list):
141
+ for part in content:
142
+ if isinstance(part, dict) and part.get("type") == "text":
143
+ prompt_parts.append(part.get("text", ""))
144
+ prompt_text = "\n".join(prompt_parts)
145
+
146
+ # Skip governance for LLM calls with no human-turn text (e.g. subagent
147
+ # internal LLMs that only have system/tool messages). Sending an empty
148
+ # prompt causes Core's guardrail to return a JSON parse error (block).
149
+ if not prompt_text.strip():
150
+ return
151
+
152
+ model_name = (
153
+ serialized.get("name")
154
+ or (serialized.get("id") or [None])[-1]
155
+ or "LLM"
156
+ )
157
+ event_run_id = str(run_id)
158
+
159
+ gov = LangChainGovernanceEvent(
160
+ source="workflow-telemetry",
161
+ event_type="LLMStarted",
162
+ workflow_id=self._workflow_id,
163
+ run_id=self._run_id,
164
+ workflow_type=self._config.agent_name or "LangGraphRun",
165
+ task_queue=self._config.task_queue,
166
+ timestamp=rfc3339_now(),
167
+ session_id=self._config.session_id,
168
+ activity_id=event_run_id,
169
+ activity_type="llm_call",
170
+ activity_input=[{"prompt": prompt_text}],
171
+ llm_model=model_name,
172
+ prompt=prompt_text,
173
+ )
174
+
175
+ if self._pre_screen_response is not None:
176
+ # Reuse the pre-screen verdict for PII redaction — the pre-screen already
177
+ # created an ActivityStarted row (activity_id=run_id+"-pre"). Record that
178
+ # mapping so _process_event knows to attach the LLM span hook to THAT row
179
+ # rather than creating a new one with the callback UUID.
180
+ response = self._pre_screen_response
181
+ self._pre_screen_response = None
182
+ if self._pre_screen_activity_id:
183
+ self._llm_activity_map[event_run_id] = self._pre_screen_activity_id
184
+ else:
185
+ # No pre-screen (second+ LLM call, or pre-screen disabled) — create a
186
+ # new row with the callback UUID so the span hook has a row to attach to.
187
+ response = await self._client.evaluate_event(gov)
188
+ self._llm_activity_map[event_run_id] = event_run_id
189
+ if response is None:
190
+ return
191
+
192
+ # NOTE: enforce_verdict / HITL are intentionally NOT called here.
193
+ # LangGraph's graph runner catches callback exceptions even with raise_error=True
194
+ # and logs them as warnings instead of propagating them to the caller.
195
+ # Block/halt/guardrail enforcement is done in _pre_screen_input() which runs
196
+ # directly in ainvoke/astream_governed before the stream starts.
197
+ #
198
+ # This callback handler's only job is PII redaction (in-place message mutation).
199
+
200
+ # Apply PII redaction: mutate messages in-place before the LLM call fires
201
+ gr = response.guardrails_result
202
+ if gr and gr.input_type == "activity_input" and gr.redacted_input is not None:
203
+ redacted = gr.redacted_input
204
+ # Core returns [{"prompt": "..."}] — extract the prompt string
205
+ if isinstance(redacted, list) and redacted:
206
+ first = redacted[0]
207
+ if isinstance(first, dict):
208
+ redacted_text = first.get("prompt")
209
+ elif isinstance(first, str):
210
+ redacted_text = first
211
+ else:
212
+ redacted_text = None
213
+ elif isinstance(redacted, str):
214
+ redacted_text = redacted
215
+ else:
216
+ redacted_text = None
217
+
218
+ if redacted_text:
219
+ # Replace the last human message in each message group
220
+ for group in messages:
221
+ for j in range(len(group) - 1, -1, -1):
222
+ msg = group[j]
223
+ if msg.type in ("human", "generic"):
224
+ msg.content = redacted_text # type: ignore[assignment]
225
+ break
226
+
227
+
228
+ # ═══════════════════════════════════════════════════════════════════
229
+ # Run buffer (tracks in-flight runs for duration/context)
230
+ # ═══════════════════════════════════════════════════════════════════
231
+
232
+ @dataclass
233
+ class _RunBuffer:
234
+ run_id: str
235
+ run_type: str
236
+ name: str
237
+ thread_id: str
238
+ start_time_ms: float = field(default_factory=lambda: time.monotonic() * 1000)
239
+ start_time_ns: int = field(default_factory=time.time_ns)
240
+ langgraph_node: str | None = None
241
+ langgraph_step: int | None = None
242
+ subagent_name: str | None = None
243
+ llm_started: bool = False # True only when LLMStarted was actually sent to Core
244
+ otel_span: Any = None # OTel span for context propagation across asyncio.Task
245
+ otel_token: Any = None # OTel context detach token
246
+
247
+
248
+ class _RunBufferManager:
249
+ def __init__(self) -> None:
250
+ self._buffers: dict[str, _RunBuffer] = {}
251
+
252
+ def register(
253
+ self,
254
+ run_id: str,
255
+ run_type: str,
256
+ name: str,
257
+ thread_id: str,
258
+ langgraph_node: str | None = None,
259
+ langgraph_step: int | None = None,
260
+ subagent_name: str | None = None,
261
+ ) -> None:
262
+ self._buffers[run_id] = _RunBuffer(
263
+ run_id=run_id,
264
+ run_type=run_type,
265
+ name=name,
266
+ thread_id=thread_id,
267
+ langgraph_node=langgraph_node,
268
+ langgraph_step=langgraph_step,
269
+ subagent_name=subagent_name,
270
+ )
271
+
272
+ def get(self, run_id: str) -> _RunBuffer | None:
273
+ return self._buffers.get(run_id)
274
+
275
+ def remove(self, run_id: str) -> None:
276
+ self._buffers.pop(run_id, None)
277
+
278
+ def duration_ms(self, run_id: str) -> float | None:
279
+ buf = self._buffers.get(run_id)
280
+ if buf is None:
281
+ return None
282
+ return time.monotonic() * 1000 - buf.start_time_ms
283
+
284
+
285
+ # ═══════════════════════════════════════════════════════════════════
286
+ # Root run tracker (identifies the outermost graph invocation)
287
+ # ═══════════════════════════════════════════════════════════════════
288
+
289
+ class _RootRunTracker:
290
+ def __init__(self) -> None:
291
+ self._root_run_id: str | None = None
292
+
293
+ def is_root(self, run_id: str) -> bool:
294
+ """Return True and register run_id as root if no root exists yet."""
295
+ if self._root_run_id is None:
296
+ self._root_run_id = run_id
297
+ return True
298
+ return self._root_run_id == run_id
299
+
300
+ @property
301
+ def root_run_id(self) -> str | None:
302
+ return self._root_run_id
303
+
304
+ def reset(self) -> None:
305
+ self._root_run_id = None
306
+
307
+
308
+ # ═══════════════════════════════════════════════════════════════════
309
+ # Options
310
+ # ═══════════════════════════════════════════════════════════════════
311
+
312
+ @dataclass
313
+ class OpenBoxLangGraphHandlerOptions:
314
+ """Configuration options for `OpenBoxLangGraphHandler`."""
315
+
316
+ client: GovernanceClient | None = None
317
+ on_api_error: str = "fail_open"
318
+ api_timeout: int = 30_000
319
+ send_chain_start_event: bool = True
320
+ send_chain_end_event: bool = True
321
+ send_tool_start_event: bool = True
322
+ send_tool_end_event: bool = True
323
+ send_llm_start_event: bool = True
324
+ send_llm_end_event: bool = True
325
+ skip_chain_types: set[str] = field(default_factory=set)
326
+ skip_tool_types: set[str] = field(default_factory=set)
327
+ hitl: Any = None # HITLConfig | dict | None
328
+ session_id: str | None = None
329
+ agent_name: str | None = None
330
+ task_queue: str = "langgraph"
331
+ use_native_interrupt: bool = False
332
+ root_node_names: set[str] = field(default_factory=set)
333
+ resolve_subagent_name: Callable[[LangGraphStreamEvent], str | None] | None = None
334
+ """Optional hook for framework-specific subagent name detection.
335
+
336
+ Called on every `on_chain_start` / `on_tool_start` event.
337
+ Return the subagent name if this event is a subagent invocation, else None.
338
+ DeepAgents integration sets this to detect `task` tool sub-graphs.
339
+ """
340
+ sqlalchemy_engine: Any = None
341
+ """Optional SQLAlchemy Engine instance to instrument for DB governance.
342
+ Required when the engine is created before the handler (e.g. SQLDatabase.from_uri()).
343
+ """
344
+ tool_type_map: dict[str, str] | None = None
345
+ """Optional mapping of tool_name → tool_type for execution tree classification.
346
+
347
+ Supported values: "http", "database", "builtin", "a2a", "custom".
348
+ If a tool is not listed and subagent_name is set, defaults to "a2a".
349
+ Otherwise defaults to "custom".
350
+
351
+ Example::
352
+
353
+ tool_type_map={"search_web": "http", "query_db": "database"}
354
+ """
355
+
356
+
357
+ # ═══════════════════════════════════════════════════════════════════
358
+ # OpenBoxLangGraphHandler
359
+ # ═══════════════════════════════════════════════════════════════════
360
+
361
+ class OpenBoxLangGraphHandler:
362
+ """Wraps a compiled LangGraph graph and applies OpenBox governance to its event stream.
363
+
364
+ Usage:
365
+ governed = await create_openbox_graph_handler(
366
+ graph=my_compiled_graph,
367
+ api_url=os.environ["OPENBOX_URL"],
368
+ api_key=os.environ["OPENBOX_API_KEY"],
369
+ agent_name="MyAgent",
370
+ )
371
+ result = await governed.ainvoke(
372
+ {"messages": [{"role": "user", "content": "Hello"}]},
373
+ config={"configurable": {"thread_id": "session-abc"}},
374
+ )
375
+ """
376
+
377
+ def __init__(
378
+ self,
379
+ graph: Any,
380
+ options: OpenBoxLangGraphHandlerOptions | None = None,
381
+ ) -> None:
382
+ opts = options or OpenBoxLangGraphHandlerOptions()
383
+ self._graph = graph
384
+ self._resolve_subagent_name = opts.resolve_subagent_name
385
+
386
+ # Build GovernanceConfig from options
387
+ self._config = merge_config({
388
+ "on_api_error": opts.on_api_error,
389
+ "api_timeout": opts.api_timeout,
390
+ "send_chain_start_event": opts.send_chain_start_event,
391
+ "send_chain_end_event": opts.send_chain_end_event,
392
+ "send_tool_start_event": opts.send_tool_start_event,
393
+ "send_tool_end_event": opts.send_tool_end_event,
394
+ "send_llm_start_event": opts.send_llm_start_event,
395
+ "send_llm_end_event": opts.send_llm_end_event,
396
+ "skip_chain_types": opts.skip_chain_types,
397
+ "skip_tool_types": opts.skip_tool_types,
398
+ "hitl": opts.hitl,
399
+ "session_id": opts.session_id,
400
+ "agent_name": opts.agent_name,
401
+ "task_queue": opts.task_queue,
402
+ "use_native_interrupt": opts.use_native_interrupt,
403
+ "root_node_names": opts.root_node_names,
404
+ "tool_type_map": opts.tool_type_map or {},
405
+ })
406
+
407
+ if opts.client:
408
+ self._client = opts.client
409
+ else:
410
+ gc = get_global_config()
411
+ self._client = GovernanceClient(
412
+ api_url=gc.api_url,
413
+ api_key=gc.api_key,
414
+ timeout=gc.governance_timeout, # seconds
415
+ on_api_error=self._config.on_api_error,
416
+ )
417
+
418
+ # Setup OTel HTTP governance hooks (required)
419
+ gc = get_global_config()
420
+ if gc and gc.api_url and gc.api_key:
421
+ from openbox_langgraph.otel_setup import setup_opentelemetry_for_governance
422
+ from openbox_langgraph.span_processor import WorkflowSpanProcessor
423
+ self._span_processor = WorkflowSpanProcessor()
424
+ setup_opentelemetry_for_governance(
425
+ span_processor=self._span_processor,
426
+ api_url=gc.api_url,
427
+ api_key=gc.api_key,
428
+ ignored_urls=[gc.api_url],
429
+ api_timeout=gc.governance_timeout,
430
+ on_api_error=self._config.on_api_error,
431
+ sqlalchemy_engine=opts.sqlalchemy_engine,
432
+ )
433
+ _logger.debug("[OpenBox] OTel HTTP governance hooks enabled")
434
+ else:
435
+ self._span_processor = None
436
+
437
+ # ─────────────────────────────────────────────────────────────
438
+ # Pre-screen: enforce guardrails before stream starts
439
+ # ─────────────────────────────────────────────────────────────
440
+
441
+ async def _pre_screen_input(
442
+ self,
443
+ input: dict[str, Any],
444
+ workflow_id: str,
445
+ run_id: str,
446
+ graph_input: dict[str, Any] | None = None,
447
+ ) -> tuple[bool, GovernanceVerdictResponse | None]:
448
+ """Send WorkflowStarted + LLMStarted governance events before the stream starts.
449
+
450
+ Returns (workflow_started_sent, pre_screen_response):
451
+ - workflow_started_sent: True if WorkflowStarted was sent (suppress duplicate
452
+ from on_chain_start in _process_event).
453
+ - pre_screen_response: the LLMStarted verdict response, passed to the callback
454
+ handler so on_chat_model_start can reuse it for PII redaction without sending
455
+ a second ActivityStarted event.
456
+
457
+ Unlike the callback handler (which LangGraph's runner silently swallows),
458
+ exceptions raised here propagate directly to the ainvoke/astream_governed
459
+ caller — so GuardrailsValidationError, GovernanceHaltError, GovernanceBlockedError
460
+ all reach the user's except block and halt the session correctly.
461
+ """
462
+ # ── 0. SignalReceived — fire before WorkflowStarted so the dashboard shows
463
+ # the user prompt as the trigger that initiated the session.
464
+ # Extract the last human message from the input as the signal payload.
465
+ _sig_messages = input.get("messages") or []
466
+ _user_prompt: str | None = None
467
+ for _msg in reversed(_sig_messages):
468
+ if isinstance(_msg, dict):
469
+ if _msg.get("role") in ("user", "human"):
470
+ _user_prompt = _msg.get("content") or None
471
+ break
472
+ elif hasattr(_msg, "type") and _msg.type in ("human", "generic"):
473
+ _c = _msg.content
474
+ _user_prompt = _c if isinstance(_c, str) else None
475
+ break
476
+ if _user_prompt:
477
+ sig_event = LangChainGovernanceEvent(
478
+ source="workflow-telemetry",
479
+ event_type="SignalReceived",
480
+ workflow_id=workflow_id,
481
+ run_id=run_id,
482
+ workflow_type=self._config.agent_name or "LangGraphRun",
483
+ task_queue=self._config.task_queue,
484
+ timestamp=rfc3339_now(),
485
+ session_id=self._config.session_id,
486
+ activity_id=f"{run_id}-sig",
487
+ activity_type="user_prompt",
488
+ signal_name="user_prompt",
489
+ signal_args=[_user_prompt],
490
+ )
491
+ await self._client.evaluate_event(sig_event)
492
+
493
+ # ── 1. WorkflowStarted — must precede any ActivityStarted so the dashboard
494
+ # creates a session to attach the guardrail event to (mirrors both SDKs).
495
+ # Gated on send_chain_start_event only, NOT on send_llm_start_event.
496
+ if self._config.send_chain_start_event:
497
+ wf_start = LangChainGovernanceEvent(
498
+ source="workflow-telemetry",
499
+ event_type="WorkflowStarted",
500
+ workflow_id=workflow_id,
501
+ run_id=run_id,
502
+ workflow_type=self._config.agent_name or "LangGraphRun",
503
+ task_queue=self._config.task_queue,
504
+ timestamp=rfc3339_now(),
505
+ session_id=self._config.session_id,
506
+ activity_id=f"{run_id}-wf",
507
+ activity_type=self._config.agent_name or "LangGraphRun",
508
+ activity_input=[safe_serialize(input)],
509
+ )
510
+ await self._client.evaluate_event(wf_start)
511
+ workflow_started_sent = True
512
+ else:
513
+ workflow_started_sent = False
514
+
515
+ # ── 2. LLMStarted pre-screen — enforce guardrails on the user prompt
516
+ if not self._config.send_llm_start_event:
517
+ return workflow_started_sent, None
518
+
519
+ messages = input.get("messages") or []
520
+ prompt_parts: list[str] = []
521
+ for msg in messages:
522
+ if isinstance(msg, dict):
523
+ role = msg.get("role", "")
524
+ content = msg.get("content", "")
525
+ if role in ("user", "human") and isinstance(content, str):
526
+ prompt_parts.append(content)
527
+ elif hasattr(msg, "type") and msg.type in ("human", "generic"):
528
+ c = msg.content
529
+ if isinstance(c, str):
530
+ prompt_parts.append(c)
531
+ if not prompt_parts:
532
+ return workflow_started_sent, None
533
+ prompt_text = "\n".join(prompt_parts)
534
+
535
+ gov = LangChainGovernanceEvent(
536
+ source="workflow-telemetry",
537
+ event_type="LLMStarted",
538
+ workflow_id=workflow_id,
539
+ run_id=run_id,
540
+ workflow_type=self._config.agent_name or "LangGraphRun",
541
+ task_queue=self._config.task_queue,
542
+ timestamp=rfc3339_now(),
543
+ session_id=self._config.session_id,
544
+ activity_id=f"{run_id}-pre",
545
+ activity_type="llm_call",
546
+ activity_input=[{"prompt": prompt_text}],
547
+ prompt=prompt_text,
548
+ )
549
+
550
+ response = await self._client.evaluate_event(gov)
551
+ if response is None:
552
+ return workflow_started_sent, None
553
+
554
+ # Enforce — exceptions propagate directly to the caller here.
555
+ # If blocked/halted, close the WorkflowStarted session first so the
556
+ # dashboard doesn't show an orphaned open session.
557
+ enforcement_error: Exception | None = None
558
+ try:
559
+ result = enforce_verdict(response, "llm_start")
560
+ except Exception as exc:
561
+ enforcement_error = exc
562
+ result = None # type: ignore[assignment]
563
+
564
+ if (
565
+ enforcement_error is not None
566
+ and workflow_started_sent
567
+ and self._config.send_chain_end_event
568
+ ):
569
+ wf_end = LangChainGovernanceEvent(
570
+ source="workflow-telemetry",
571
+ event_type="WorkflowCompleted",
572
+ workflow_id=workflow_id,
573
+ run_id=run_id,
574
+ workflow_type=self._config.agent_name or "LangGraphRun",
575
+ task_queue=self._config.task_queue,
576
+ timestamp=rfc3339_now(),
577
+ session_id=self._config.session_id,
578
+ activity_id=f"{run_id}-wf",
579
+ activity_type=self._config.agent_name or "LangGraphRun",
580
+ status="failed",
581
+ error=str(enforcement_error),
582
+ )
583
+ await self._client.evaluate_event(wf_end)
584
+ raise enforcement_error
585
+
586
+ if result and result.requires_hitl:
587
+ try:
588
+ await poll_until_decision(
589
+ self._client,
590
+ HITLPollParams(
591
+ workflow_id=workflow_id,
592
+ run_id=run_id,
593
+ activity_id=f"{run_id}-pre",
594
+ activity_type="llm_call",
595
+ ),
596
+ self._config.hitl,
597
+ )
598
+ except (ApprovalRejectedError, ApprovalExpiredError, ApprovalTimeoutError) as e:
599
+ raise GovernanceHaltError(str(e)) from e
600
+
601
+ return workflow_started_sent, response
602
+
603
+ # ─────────────────────────────────────────────────────────────
604
+ # Public invoke / ainvoke
605
+ # ─────────────────────────────────────────────────────────────
606
+
607
+ async def ainvoke(
608
+ self,
609
+ input: dict[str, Any],
610
+ *,
611
+ config: dict[str, Any] | None = None,
612
+ **kwargs: Any,
613
+ ) -> dict[str, Any]:
614
+ """Invoke the governed graph and return the final state.
615
+
616
+ Streams events via `astream_events` (governance applied inline) and
617
+ returns the final graph output from the root `on_chain_end` event.
618
+ Does NOT call `ainvoke` on the underlying graph a second time.
619
+
620
+ Args:
621
+ input: The initial graph state (e.g. `{"messages": [...]}`)
622
+ config: LangGraph RunnableConfig — must include
623
+ `{"configurable": {"thread_id": "..."}}` for session tracking.
624
+ """
625
+ thread_id = _extract_thread_id(config)
626
+ # Generate fresh workflow_id + run_id per turn, matching Temporal SDK:
627
+ # workflow_id = stable logical session ID (unique per-turn)
628
+ # run_id = unique execution attempt ID (distinct from workflow_id)
629
+ # Core seals a workflow after WorkflowCompleted — reusing the same
630
+ # workflow_id causes HALT: "fully attested and sealed".
631
+ _turn = uuid.uuid4().hex
632
+ workflow_id = f"{thread_id}-{_turn[:8]}"
633
+ run_id = f"{thread_id}-run-{_turn[8:16]}"
634
+ root_tracker = _RootRunTracker()
635
+ buffer = _RunBufferManager()
636
+ final_output: dict[str, Any] = {}
637
+
638
+ # Pre-screen: enforce guardrails BEFORE stream starts so exceptions
639
+ # propagate to the caller (LangGraph runner swallows callback exceptions).
640
+ # Returns (workflow_started_sent, pre_screen_response) — response reused
641
+ # by callback handler for PII redaction to avoid duplicate ActivityStarted.
642
+ workflow_started_sent, pre_screen_response = await self._pre_screen_input(
643
+ input, workflow_id, run_id
644
+ )
645
+
646
+ # Shared map: LangChain callback UUID → activity_id to use for the LLM span hook.
647
+ # Written by _GuardrailsCallbackHandler.on_chat_model_start, read by _process_event.
648
+ llm_activity_map: dict[str, str] = {}
649
+
650
+ # Inject guardrails callback for PII redaction only (in-place message mutation).
651
+ guardrails_cb = _GuardrailsCallbackHandler(
652
+ client=self._client,
653
+ config=self._config,
654
+ workflow_id=workflow_id,
655
+ run_id=run_id,
656
+ thread_id=thread_id,
657
+ pre_screen_response=pre_screen_response,
658
+ pre_screen_activity_id=f"{run_id}-pre" if pre_screen_response is not None else None,
659
+ llm_activity_map=llm_activity_map,
660
+ )
661
+ cfg = dict(config or {})
662
+ cfg["callbacks"] = [*list(cfg.get("callbacks") or []), guardrails_cb]
663
+
664
+ try:
665
+ async for event in self._graph.astream_events(
666
+ input, config=cfg, version="v2", **kwargs
667
+ ):
668
+ stream_event = LangGraphStreamEvent.from_dict(event)
669
+ await self._process_event(
670
+ stream_event, thread_id, workflow_id, run_id, root_tracker, buffer,
671
+ workflow_started_sent=workflow_started_sent, llm_activity_map=llm_activity_map,
672
+ )
673
+ # Capture the root graph's final output from on_chain_end
674
+ if (
675
+ stream_event.event == "on_chain_end"
676
+ and root_tracker.root_run_id == stream_event.run_id
677
+ ):
678
+ output = stream_event.data.get("output")
679
+ if isinstance(output, dict):
680
+ final_output = output
681
+ except GovernanceBlockedError as hook_err:
682
+ if hook_err.verdict != "require_approval":
683
+ raise
684
+ _logger.info("[OpenBox] Hook REQUIRE_APPROVAL during ainvoke, polling")
685
+ await poll_until_decision(
686
+ self._client,
687
+ HITLPollParams(workflow_id=workflow_id, run_id=run_id,
688
+ activity_id=f"{run_id}-hook", activity_type="hook"),
689
+ self._config.hitl,
690
+ )
691
+ _logger.info("[OpenBox] Approval granted, retrying ainvoke")
692
+ final_output = await self._graph.ainvoke(input, config=cfg, **kwargs)
693
+ except Exception as exc:
694
+ hook_err = _extract_governance_blocked(exc)
695
+ if hook_err is None or hook_err.verdict != "require_approval":
696
+ raise
697
+ _logger.info("[OpenBox] Hook REQUIRE_APPROVAL (wrapped) during ainvoke, polling")
698
+ await poll_until_decision(
699
+ self._client,
700
+ HITLPollParams(workflow_id=workflow_id, run_id=run_id,
701
+ activity_id=f"{run_id}-hook", activity_type="hook"),
702
+ self._config.hitl,
703
+ )
704
+ _logger.info("[OpenBox] Approval granted, retrying ainvoke")
705
+ final_output = await self._graph.ainvoke(input, config=cfg, **kwargs)
706
+
707
+ return final_output
708
+
709
+ async def astream_governed(
710
+ self,
711
+ input: dict[str, Any],
712
+ *,
713
+ config: dict[str, Any] | None = None,
714
+ **kwargs: Any,
715
+ ) -> AsyncIterator[dict[str, Any]]:
716
+ """Stream governed graph updates, yielding each update chunk.
717
+
718
+ Governance is applied inline as events are streamed. The caller
719
+ receives graph state update chunks identically to `astream_events`.
720
+
721
+ Args:
722
+ input: The initial graph state.
723
+ config: LangGraph RunnableConfig with `thread_id`.
724
+ """
725
+ thread_id = _extract_thread_id(config)
726
+ _turn = uuid.uuid4().hex
727
+ workflow_id = f"{thread_id}-{_turn[:8]}"
728
+ run_id = f"{thread_id}-run-{_turn[8:16]}"
729
+ root_tracker = _RootRunTracker()
730
+ buffer = _RunBufferManager()
731
+
732
+ workflow_started_sent, pre_screen_response = await self._pre_screen_input(
733
+ input, workflow_id, run_id
734
+ )
735
+
736
+ llm_activity_map: dict[str, str] = {}
737
+ guardrails_cb = _GuardrailsCallbackHandler(
738
+ client=self._client,
739
+ config=self._config,
740
+ workflow_id=workflow_id,
741
+ run_id=run_id,
742
+ thread_id=thread_id,
743
+ pre_screen_response=pre_screen_response,
744
+ pre_screen_activity_id=f"{run_id}-pre" if pre_screen_response is not None else None,
745
+ llm_activity_map=llm_activity_map,
746
+ )
747
+ cfg = dict(config or {})
748
+ cfg["callbacks"] = [*list(cfg.get("callbacks") or []), guardrails_cb]
749
+
750
+ _debug = os.environ.get("OPENBOX_DEBUG") == "1"
751
+ async for event in self._graph.astream_events(
752
+ input, config=cfg, version="v2", **kwargs
753
+ ):
754
+ stream_event = LangGraphStreamEvent.from_dict(event)
755
+ if _debug and "_stream" not in stream_event.event:
756
+ sys.stderr.write(
757
+ f"[OBX_EVENT] {stream_event.event:<25} name={stream_event.name!r:<35} "
758
+ f"node={stream_event.metadata.get('langgraph_node')!r}\n"
759
+ )
760
+ await self._process_event(
761
+ stream_event, thread_id, workflow_id, run_id, root_tracker, buffer,
762
+ workflow_started_sent=workflow_started_sent, llm_activity_map=llm_activity_map,
763
+ )
764
+ yield event
765
+
766
+ async def astream(
767
+ self,
768
+ input: dict[str, Any],
769
+ config: dict[str, Any] | None = None,
770
+ **kwargs: Any,
771
+ ) -> AsyncIterator[dict[str, Any]]:
772
+ """Graph-compatible astream — delegates to astream_governed.
773
+
774
+ Provided so `langgraph dev` and other LangGraph tooling that calls
775
+ ``graph.astream(...)`` can use this handler as a drop-in replacement
776
+ for a ``CompiledStateGraph``.
777
+ """
778
+ async for chunk in self.astream_governed(input, config=config, **kwargs):
779
+ yield chunk
780
+
781
+ async def astream_events(
782
+ self,
783
+ input: dict[str, Any],
784
+ config: dict[str, Any] | None = None,
785
+ *,
786
+ version: str = "v2",
787
+ **kwargs: Any,
788
+ ) -> AsyncIterator[dict[str, Any]]:
789
+ """Graph-compatible astream_events — runs governance and re-yields raw events.
790
+
791
+ Provided so tooling that calls ``graph.astream_events(...)`` works
792
+ transparently with the governed handler.
793
+ """
794
+ thread_id = _extract_thread_id(config)
795
+ _turn = uuid.uuid4().hex
796
+ workflow_id = f"{thread_id}-{_turn[:8]}"
797
+ run_id = f"{thread_id}-run-{_turn[8:16]}"
798
+ root_tracker = _RootRunTracker()
799
+ buffer = _RunBufferManager()
800
+
801
+ workflow_started_sent, pre_screen_response = await self._pre_screen_input(
802
+ input, workflow_id, run_id
803
+ )
804
+
805
+ llm_activity_map: dict[str, str] = {}
806
+ guardrails_cb = _GuardrailsCallbackHandler(
807
+ client=self._client,
808
+ config=self._config,
809
+ workflow_id=workflow_id,
810
+ run_id=run_id,
811
+ thread_id=thread_id,
812
+ pre_screen_response=pre_screen_response,
813
+ pre_screen_activity_id=f"{run_id}-pre" if pre_screen_response is not None else None,
814
+ llm_activity_map=llm_activity_map,
815
+ )
816
+ cfg = dict(config or {})
817
+ cfg["callbacks"] = [*list(cfg.get("callbacks") or []), guardrails_cb]
818
+
819
+ async for event in self._graph.astream_events(
820
+ input, config=cfg, version=version, **kwargs
821
+ ):
822
+ stream_event = LangGraphStreamEvent.from_dict(event)
823
+ await self._process_event(
824
+ stream_event, thread_id, workflow_id, run_id, root_tracker, buffer,
825
+ workflow_started_sent=workflow_started_sent, llm_activity_map=llm_activity_map,
826
+ )
827
+ yield event
828
+
829
+ # ─────────────────────────────────────────────────────────────
830
+ # Event processing
831
+ # ─────────────────────────────────────────────────────────────
832
+
833
+ async def _process_event(
834
+ self,
835
+ event: LangGraphStreamEvent,
836
+ thread_id: str,
837
+ workflow_id: str,
838
+ run_id: str,
839
+ root_tracker: _RootRunTracker,
840
+ buffer: _RunBufferManager,
841
+ *,
842
+ workflow_started_sent: bool = False,
843
+ llm_activity_map: dict[str, str] | None = None,
844
+ ) -> None:
845
+ """Process a single LangGraph stream event through governance."""
846
+ gov_event, is_root, is_start, event_type_label = self._map_event(
847
+ event, thread_id, workflow_id, run_id, root_tracker, buffer
848
+ )
849
+ if gov_event is None:
850
+ return
851
+
852
+ # ── Skip events disabled in config
853
+ if is_start:
854
+ if event_type_label == "ChainStarted" and not self._config.send_chain_start_event:
855
+ return
856
+ # _pre_screen_input already sent WorkflowStarted — skip duplicate from on_chain_start
857
+ if event_type_label == "ChainStarted" and is_root and workflow_started_sent:
858
+ return
859
+ if event_type_label == "ToolStarted" and not self._config.send_tool_start_event:
860
+ return
861
+ if event_type_label == "LLMStarted" and not self._config.send_llm_start_event:
862
+ return
863
+ # _GuardrailsCallbackHandler owns LLMStarted — it fires pre-LLM with redaction.
864
+ # Skip re-sending here to avoid duplicate governance events.
865
+ if event_type_label == "LLMStarted":
866
+ return
867
+ else:
868
+ if event_type_label == "ChainCompleted" and not self._config.send_chain_end_event:
869
+ return
870
+ if event_type_label == "ToolCompleted" and not self._config.send_tool_end_event:
871
+ return
872
+ # Skip LLMCompleted governance event — no ActivityCompleted sent for LLM
873
+ # calls (mirrors Temporal SDK). Fire the LLM span hook instead, routed to
874
+ # the correct existing row so no orphan rows are created.
875
+ if event_type_label == "LLMCompleted":
876
+ if self._config.send_llm_start_event and gov_event.activity_id:
877
+ # Resolve activity_id for the LLM row (pre-screen or callback-UUID)
878
+ llm_activity_id = (
879
+ (llm_activity_map or {}).get(gov_event.activity_id)
880
+ or gov_event.activity_id
881
+ )
882
+ llm_activity_type = gov_event.activity_type or "llm_call"
883
+
884
+ # Close the LLM row in Core with ActivityCompleted
885
+ completed_activity_id = f"{llm_activity_id}-c"
886
+ completed_event = LangChainGovernanceEvent(
887
+ source="workflow-telemetry",
888
+ event_type="LLMCompleted",
889
+ workflow_id=workflow_id,
890
+ run_id=run_id,
891
+ workflow_type=self._config.agent_name or "LangGraphRun",
892
+ task_queue=self._config.task_queue,
893
+ timestamp=rfc3339_now(),
894
+ session_id=self._config.session_id,
895
+ activity_id=completed_activity_id,
896
+ activity_type=llm_activity_type,
897
+ activity_output=gov_event.activity_output,
898
+ status="completed",
899
+ duration_ms=gov_event.duration_ms,
900
+ llm_model=gov_event.llm_model,
901
+ input_tokens=gov_event.input_tokens,
902
+ output_tokens=gov_event.output_tokens,
903
+ total_tokens=gov_event.total_tokens,
904
+ has_tool_calls=gov_event.has_tool_calls,
905
+ completion=gov_event.completion,
906
+ langgraph_node=gov_event.langgraph_node,
907
+ langgraph_step=gov_event.langgraph_step,
908
+ )
909
+
910
+ response = await self._client.evaluate_event(completed_event)
911
+ if response is not None:
912
+ context = lang_graph_event_to_context(event.event, is_root=is_root)
913
+ result = enforce_verdict(response, context)
914
+ if result.requires_hitl:
915
+ await poll_until_decision(
916
+ self._client,
917
+ HITLPollParams(
918
+ workflow_id=workflow_id,
919
+ run_id=run_id,
920
+ activity_id=completed_activity_id,
921
+ activity_type=llm_activity_type,
922
+ ),
923
+ self._config.hitl,
924
+ )
925
+ return
926
+
927
+ # ── Send to OpenBox Core
928
+ response = await self._client.evaluate_event(gov_event)
929
+
930
+ if response is None:
931
+ return
932
+
933
+ # ── Determine context and enforce verdict
934
+ context = lang_graph_event_to_context(event.event, is_root=is_root)
935
+ try:
936
+ result = enforce_verdict(response, context)
937
+ except (GovernanceBlockedError, GovernanceHaltError, GuardrailsValidationError):
938
+ raise
939
+
940
+ # ── HITL polling
941
+ if result.requires_hitl:
942
+ activity_id = gov_event.activity_id or event.run_id
943
+ activity_type = gov_event.activity_type or event.name
944
+ try:
945
+ await poll_until_decision(
946
+ self._client,
947
+ HITLPollParams(
948
+ workflow_id=workflow_id,
949
+ run_id=run_id,
950
+ activity_id=activity_id,
951
+ activity_type=activity_type,
952
+ ),
953
+ self._config.hitl,
954
+ )
955
+ except (ApprovalRejectedError, ApprovalExpiredError, ApprovalTimeoutError) as e:
956
+ raise GovernanceHaltError(str(e)) from e
957
+
958
+ # ─────────────────────────────────────────────────────────────
959
+ # Tool type classification
960
+ # ─────────────────────────────────────────────────────────────
961
+
962
+ def _resolve_tool_type(self, tool_name: str, subagent_name: str | None) -> str | None:
963
+ """Resolve the semantic tool_type for a given tool.
964
+
965
+ Priority:
966
+ 1. Explicit entry in tool_type_map
967
+ 2. "a2a" if subagent_name is set
968
+ 3. None for unknown tools (no classification prefix in the label)
969
+ """
970
+ if tool_name in self._config.tool_type_map:
971
+ return self._config.tool_type_map[tool_name]
972
+ if subagent_name:
973
+ return "a2a"
974
+ return None
975
+
976
+ def _enrich_activity_input(
977
+ self,
978
+ base_input: list[Any] | None,
979
+ tool_type: str | None,
980
+ subagent_name: str | None,
981
+ ) -> list[Any] | None:
982
+ """Append an ``__openbox`` metadata entry to activity_input for Rego policy use.
983
+
984
+ Core forwards ``activity_input`` as-is to ``input.activity_input`` in OPA.
985
+ By appending a sentinel object, Rego policies can classify tools without
986
+ any Core changes:
987
+
988
+ .. code-block:: rego
989
+
990
+ some item in input.activity_input
991
+ meta := item["__openbox"]
992
+ meta.subagent_name == "writer"
993
+
994
+ Only appended when tool_type or subagent_name is set (skips for unclassified tools).
995
+ """
996
+ if tool_type is None and subagent_name is None:
997
+ return base_input
998
+ meta: dict[str, Any] = {}
999
+ if tool_type is not None:
1000
+ meta["tool_type"] = tool_type
1001
+ if subagent_name is not None:
1002
+ meta["subagent_name"] = subagent_name
1003
+ result = list(base_input) if base_input else []
1004
+ result.append({"__openbox": meta})
1005
+ return result
1006
+
1007
+ # ─────────────────────────────────────────────────────────────
1008
+ # Event mapping (LangGraph event → governance event)
1009
+ # ─────────────────────────────────────────────────────────────
1010
+
1011
+ def _map_event(
1012
+ self,
1013
+ event: LangGraphStreamEvent,
1014
+ thread_id: str,
1015
+ workflow_id: str,
1016
+ run_id: str,
1017
+ root_tracker: _RootRunTracker,
1018
+ buffer: _RunBufferManager,
1019
+ ) -> tuple[LangChainGovernanceEvent | None, bool, bool, str]:
1020
+ """Map a LangGraph stream event to a governance event.
1021
+
1022
+ Returns:
1023
+ A 4-tuple of (governance_event | None, is_root, is_start, event_type_label).
1024
+ """
1025
+ ev = event.event
1026
+ event_run_id = event.run_id # LangGraph internal run UUID for this node/tool/llm
1027
+ name = event.name
1028
+ metadata = event.metadata
1029
+ data = event.data
1030
+
1031
+ langgraph_node = metadata.get("langgraph_node")
1032
+ langgraph_step = metadata.get("langgraph_step")
1033
+
1034
+ subagent_name = (
1035
+ self._resolve_subagent_name(event) if self._resolve_subagent_name else None
1036
+ )
1037
+
1038
+ def base(
1039
+ event_type: str,
1040
+ *,
1041
+ is_start: bool,
1042
+ **extra: Any,
1043
+ ) -> tuple[LangChainGovernanceEvent, bool, bool, str]:
1044
+ is_root = root_tracker.root_run_id == event_run_id or (
1045
+ ev == "on_chain_start" and root_tracker.is_root(event_run_id)
1046
+ )
1047
+ gov = LangChainGovernanceEvent(
1048
+ source="workflow-telemetry",
1049
+ event_type=event_type,
1050
+ workflow_id=workflow_id,
1051
+ run_id=run_id,
1052
+ workflow_type=self._config.agent_name or name or "LangGraphRun",
1053
+ task_queue=self._config.task_queue,
1054
+ timestamp=rfc3339_now(),
1055
+ session_id=self._config.session_id,
1056
+ langgraph_node=langgraph_node,
1057
+ langgraph_step=langgraph_step,
1058
+ **extra,
1059
+ )
1060
+ return gov, is_root, is_start, event_type
1061
+
1062
+ if ev == "on_chain_start":
1063
+ is_root = root_tracker.is_root(event_run_id)
1064
+ if is_root:
1065
+ buffer.register(
1066
+ event_run_id, "graph", name, thread_id, langgraph_node, langgraph_step
1067
+ )
1068
+ gov = LangChainGovernanceEvent(
1069
+ source="workflow-telemetry",
1070
+ event_type="ChainStarted",
1071
+ workflow_id=workflow_id,
1072
+ run_id=run_id,
1073
+ workflow_type=self._config.agent_name or name or "LangGraphRun",
1074
+ task_queue=self._config.task_queue,
1075
+ timestamp=rfc3339_now(),
1076
+ session_id=self._config.session_id,
1077
+ activity_id=event_run_id,
1078
+ activity_type=name,
1079
+ activity_input=(
1080
+ [safe_serialize(data.get("input"))]
1081
+ if data.get("input") is not None
1082
+ else None
1083
+ ),
1084
+ langgraph_node=langgraph_node,
1085
+ langgraph_step=langgraph_step,
1086
+ )
1087
+ return gov, True, True, "ChainStarted"
1088
+ # Non-root chain = subgraph node
1089
+ if name in self._config.skip_chain_types:
1090
+ return None, False, True, "ChainStarted"
1091
+ # Skip non-subagent chains — LangGraph fires BOTH on_chain_start
1092
+ # (BaseTool's Runnable layer) AND on_tool_start (Tool layer) for
1093
+ # the same tool invocation with different run_ids, creating
1094
+ # duplicate ActivityStarted events. on_tool_start handles tools
1095
+ # with proper span hook context; only subagent chains need this.
1096
+ if not subagent_name:
1097
+ return None, False, True, "ChainStarted"
1098
+ buffer.register(
1099
+ event_run_id,
1100
+ "subgraph" if subagent_name else "chain",
1101
+ name,
1102
+ thread_id,
1103
+ langgraph_node,
1104
+ langgraph_step,
1105
+ subagent_name,
1106
+ )
1107
+ # Use ToolStarted so to_server_event_type maps to ActivityStarted,
1108
+ # NOT WorkflowStarted — sending WorkflowCompleted for a sub-chain
1109
+ # seals the session in Core and causes all subsequent requests to HALT.
1110
+ chain_tool_type = self._resolve_tool_type(name, subagent_name)
1111
+ chain_base_input = (
1112
+ [safe_serialize(data.get("input"))] if data.get("input") is not None else None
1113
+ )
1114
+ gov = LangChainGovernanceEvent(
1115
+ source="workflow-telemetry",
1116
+ event_type="ToolStarted",
1117
+ workflow_id=workflow_id,
1118
+ run_id=run_id,
1119
+ workflow_type=self._config.agent_name or name or "LangGraphRun",
1120
+ task_queue=self._config.task_queue,
1121
+ timestamp=rfc3339_now(),
1122
+ session_id=self._config.session_id,
1123
+ activity_id=event_run_id,
1124
+ activity_type=name,
1125
+ activity_input=self._enrich_activity_input(
1126
+ chain_base_input, chain_tool_type, subagent_name
1127
+ ),
1128
+ tool_name=name,
1129
+ tool_type=chain_tool_type,
1130
+ langgraph_node=langgraph_node,
1131
+ langgraph_step=langgraph_step,
1132
+ subagent_name=subagent_name,
1133
+ )
1134
+ return gov, False, True, "ChainStarted"
1135
+
1136
+ if ev == "on_chain_end":
1137
+ is_root = root_tracker.root_run_id == event_run_id
1138
+ dur = buffer.duration_ms(event_run_id)
1139
+ buffer.remove(event_run_id)
1140
+ output = data.get("output")
1141
+ serialized_output = (
1142
+ safe_serialize({"result": output})
1143
+ if isinstance(output, str)
1144
+ else safe_serialize(output)
1145
+ )
1146
+ if is_root:
1147
+ gov = LangChainGovernanceEvent(
1148
+ source="workflow-telemetry",
1149
+ event_type="ChainCompleted",
1150
+ workflow_id=workflow_id,
1151
+ run_id=run_id,
1152
+ workflow_type=self._config.agent_name or name or "LangGraphRun",
1153
+ task_queue=self._config.task_queue,
1154
+ timestamp=rfc3339_now(),
1155
+ session_id=self._config.session_id,
1156
+ activity_id=event_run_id,
1157
+ activity_type=name,
1158
+ workflow_output=safe_serialize(output),
1159
+ activity_output=serialized_output,
1160
+ status="completed",
1161
+ duration_ms=dur,
1162
+ langgraph_node=langgraph_node,
1163
+ langgraph_step=langgraph_step,
1164
+ )
1165
+ return gov, True, False, "ChainCompleted"
1166
+ if name in self._config.skip_chain_types:
1167
+ return None, False, False, "ChainCompleted"
1168
+ # Skip non-subagent chains (mirrors on_chain_start skip above)
1169
+ if not subagent_name:
1170
+ return None, False, False, "ChainCompleted"
1171
+ # Use ToolCompleted → ActivityCompleted (not WorkflowCompleted)
1172
+ chain_tool_type = self._resolve_tool_type(name, subagent_name)
1173
+ gov = LangChainGovernanceEvent(
1174
+ source="workflow-telemetry",
1175
+ event_type="ToolCompleted",
1176
+ workflow_id=workflow_id,
1177
+ run_id=run_id,
1178
+ workflow_type=self._config.agent_name or name or "LangGraphRun",
1179
+ task_queue=self._config.task_queue,
1180
+ timestamp=rfc3339_now(),
1181
+ session_id=self._config.session_id,
1182
+ activity_id=event_run_id,
1183
+ activity_type=name,
1184
+ activity_output=serialized_output,
1185
+ tool_name=name,
1186
+ tool_type=chain_tool_type,
1187
+ status="completed",
1188
+ duration_ms=dur,
1189
+ langgraph_node=langgraph_node,
1190
+ langgraph_step=langgraph_step,
1191
+ subagent_name=subagent_name,
1192
+ )
1193
+ return gov, False, False, "ChainCompleted"
1194
+
1195
+ if ev == "on_tool_start":
1196
+ if name in self._config.skip_tool_types:
1197
+ return None, False, True, "ToolStarted"
1198
+
1199
+ # Register activity context with SpanProcessor for hook-level governance
1200
+ # All tools (including subagents) get span-level governance
1201
+ if getattr(self, '_span_processor', None) is not None:
1202
+ activity_context = {
1203
+ "source": "workflow-telemetry",
1204
+ "event_type": "ActivityStarted",
1205
+ "workflow_id": workflow_id,
1206
+ "run_id": run_id,
1207
+ "workflow_type": self._config.agent_name or "LangGraphRun",
1208
+ "task_queue": self._config.task_queue or "langgraph",
1209
+ "activity_id": event_run_id,
1210
+ "activity_type": name,
1211
+ }
1212
+ self._span_processor.set_activity_context(
1213
+ workflow_id, event_run_id, activity_context
1214
+ )
1215
+
1216
+ buffer.register(event_run_id, "tool", name, thread_id, langgraph_node, langgraph_step)
1217
+ buf = buffer.get(event_run_id)
1218
+ if buf is not None:
1219
+ buf.subagent_name = subagent_name
1220
+
1221
+ # Create OTel span to propagate trace context across asyncio.Task boundaries.
1222
+ # Tool execution happens in a spawned Task with a new OTel context — this span
1223
+ # bridges the gap so httpx child spans inherit the correct trace_id.
1224
+ if getattr(self, '_span_processor', None) is not None:
1225
+ parent_ctx = otel_context.get_current()
1226
+ tool_span = _otel_tracer.start_span(
1227
+ f"tool.{name}", context=parent_ctx, kind=otel_trace.SpanKind.INTERNAL,
1228
+ )
1229
+ token = otel_context.attach(otel_trace.set_span_in_context(tool_span))
1230
+ trace_id = tool_span.get_span_context().trace_id
1231
+ if trace_id:
1232
+ self._span_processor.register_trace(trace_id, workflow_id, event_run_id)
1233
+ if buf is not None:
1234
+ buf.otel_span = tool_span
1235
+ buf.otel_token = token
1236
+ tool_input = _unwrap_tool_input(data.get("input"))
1237
+ tool_type = self._resolve_tool_type(name, subagent_name)
1238
+ # NOTE: No internal span here. In the Temporal SDK, @traced spans
1239
+ # fire DURING activity execution — after ActivityStarted is stored
1240
+ # in Core. Firing here would race with the ToolStarted event below
1241
+ # (the hook span may arrive at Core before the parent event exists).
1242
+ # The "completed" internal span fires at on_tool_end instead.
1243
+ gov = LangChainGovernanceEvent(
1244
+ source="workflow-telemetry",
1245
+ event_type="ToolStarted",
1246
+ workflow_id=workflow_id,
1247
+ run_id=run_id,
1248
+ workflow_type=self._config.agent_name or "LangGraphRun",
1249
+ task_queue=self._config.task_queue,
1250
+ timestamp=rfc3339_now(),
1251
+ session_id=self._config.session_id,
1252
+ activity_id=event_run_id,
1253
+ activity_type=name,
1254
+ activity_input=self._enrich_activity_input(
1255
+ [safe_serialize(tool_input)], tool_type, subagent_name
1256
+ ),
1257
+ tool_name=name,
1258
+ tool_type=tool_type,
1259
+ tool_input=safe_serialize(data.get("input")),
1260
+ subagent_name=subagent_name,
1261
+ langgraph_node=langgraph_node,
1262
+ langgraph_step=langgraph_step,
1263
+ )
1264
+ return gov, False, True, "ToolStarted"
1265
+
1266
+ if ev == "on_tool_end":
1267
+ if name in self._config.skip_tool_types:
1268
+ return None, False, False, "ToolCompleted"
1269
+ dur = buffer.duration_ms(event_run_id)
1270
+ buf = buffer.get(event_run_id)
1271
+
1272
+ # End OTel span created in on_tool_start and detach context
1273
+ if buf is not None and buf.otel_span is not None:
1274
+ if buf.otel_token is not None:
1275
+ otel_context.detach(buf.otel_token)
1276
+ buf.otel_span.end()
1277
+
1278
+ # Clear SpanProcessor activity context for all tools
1279
+ if getattr(self, '_span_processor', None) is not None:
1280
+ self._span_processor.clear_activity_context(workflow_id, event_run_id)
1281
+
1282
+ buffer.remove(event_run_id)
1283
+ completed_activity_id = f"{event_run_id}-c"
1284
+ tool_output = data.get("output")
1285
+ serialized_output = (
1286
+ safe_serialize({"result": tool_output})
1287
+ if isinstance(tool_output, str)
1288
+ else safe_serialize(tool_output)
1289
+ )
1290
+ tool_type = self._resolve_tool_type(name, subagent_name)
1291
+ gov = LangChainGovernanceEvent(
1292
+ source="workflow-telemetry",
1293
+ event_type="ToolCompleted",
1294
+ workflow_id=workflow_id,
1295
+ run_id=run_id,
1296
+ workflow_type=self._config.agent_name or "LangGraphRun",
1297
+ task_queue=self._config.task_queue,
1298
+ timestamp=rfc3339_now(),
1299
+ session_id=self._config.session_id,
1300
+ activity_id=completed_activity_id,
1301
+ activity_type=name,
1302
+ activity_output=serialized_output,
1303
+ tool_name=name,
1304
+ tool_type=tool_type,
1305
+ subagent_name=subagent_name,
1306
+ status="completed",
1307
+ duration_ms=dur,
1308
+ langgraph_node=langgraph_node,
1309
+ langgraph_step=langgraph_step,
1310
+ )
1311
+ return gov, False, False, "ToolCompleted"
1312
+
1313
+ if ev == "on_chat_model_start":
1314
+ buffer.register(event_run_id, "llm", name, thread_id, langgraph_node, langgraph_step)
1315
+ # Register activity context with OTel SpanProcessor for hook-level governance
1316
+ if getattr(self, '_span_processor', None) is not None:
1317
+ activity_context = {
1318
+ "source": "workflow-telemetry",
1319
+ "event_type": "ActivityStarted",
1320
+ "workflow_id": workflow_id,
1321
+ "run_id": run_id,
1322
+ "workflow_type": self._config.agent_name or "LangGraphRun",
1323
+ "task_queue": self._config.task_queue or "langgraph",
1324
+ "activity_id": event_run_id,
1325
+ "activity_type": "llm_call",
1326
+ }
1327
+ self._span_processor.set_activity_context(
1328
+ workflow_id, event_run_id, activity_context
1329
+ )
1330
+
1331
+ # Create OTel span to propagate trace context across asyncio.Task boundaries
1332
+ parent_ctx = otel_context.get_current()
1333
+ llm_span = _otel_tracer.start_span(
1334
+ "llm.call", context=parent_ctx, kind=otel_trace.SpanKind.INTERNAL,
1335
+ )
1336
+ token = otel_context.attach(otel_trace.set_span_in_context(llm_span))
1337
+ trace_id = llm_span.get_span_context().trace_id
1338
+ if trace_id:
1339
+ self._span_processor.register_trace(trace_id, workflow_id, event_run_id)
1340
+ buf = buffer.get(event_run_id)
1341
+ if buf is not None:
1342
+ buf.otel_span = llm_span
1343
+ buf.otel_token = token
1344
+ messages = (data.get("input") or {}).get("messages", [])
1345
+ prompt_text = _extract_prompt_from_messages(messages)
1346
+ # Skip sending empty prompts — subagent-internal LLM calls have only
1347
+ # system/tool messages, no human turn. Core's guardrail JSON-parses the
1348
+ # prompt field and returns a parse error ("Expecting value ... char 0") → block.
1349
+ if not prompt_text.strip():
1350
+ return None, False, True, "LLMStarted"
1351
+ # Mark that LLMStarted will be sent — on_chat_model_end uses this to
1352
+ # decide whether to fire the LLM span hook. Without this guard, span
1353
+ # hooks fire for internal subagent LLM calls that have no row in Core,
1354
+ # causing Core to create orphan empty rows (duplicate with no data).
1355
+ buf = buffer.get(event_run_id)
1356
+ if buf is not None:
1357
+ buf.llm_started = True
1358
+ model_name = _extract_model_name_from_event(event) or name
1359
+ gov = LangChainGovernanceEvent(
1360
+ source="workflow-telemetry",
1361
+ event_type="LLMStarted",
1362
+ workflow_id=workflow_id,
1363
+ run_id=run_id,
1364
+ workflow_type=self._config.agent_name or "LangGraphRun",
1365
+ task_queue=self._config.task_queue,
1366
+ timestamp=rfc3339_now(),
1367
+ session_id=self._config.session_id,
1368
+ activity_id=event_run_id,
1369
+ activity_type="llm_call",
1370
+ activity_input=[{"prompt": prompt_text}],
1371
+ llm_model=model_name,
1372
+ prompt=prompt_text,
1373
+ langgraph_node=langgraph_node,
1374
+ langgraph_step=langgraph_step,
1375
+ )
1376
+ return gov, False, True, "LLMStarted"
1377
+
1378
+ if ev == "on_chat_model_end":
1379
+ dur = buffer.duration_ms(event_run_id)
1380
+ buf = buffer.get(event_run_id)
1381
+ llm_started = buf.llm_started if buf else False
1382
+
1383
+ # End OTel span created in on_chat_model_start and detach context
1384
+ if buf is not None and buf.otel_span is not None:
1385
+ if buf.otel_token is not None:
1386
+ otel_context.detach(buf.otel_token)
1387
+ buf.otel_span.end()
1388
+
1389
+ buffer.remove(event_run_id)
1390
+ # Don't clear SpanProcessor activity context here — the tool (parent)
1391
+ # is still active. Clearing would break subagent LLM span attribution.
1392
+ # Context is cleared at on_tool_end instead.
1393
+ # Skip if LLMStarted was never sent (empty/no human-turn prompt).
1394
+ # Firing a hook_trigger span for a non-existent row creates an
1395
+ # orphan empty ActivityStarted row in Core.
1396
+ if not llm_started:
1397
+ return None, False, False, "LLMCompleted"
1398
+ llm_output = data.get("output") or {}
1399
+ input_tokens, output_tokens, total_tokens = _extract_token_usage(llm_output)
1400
+ completion_text = _extract_completion_text(llm_output)
1401
+ model_name = (
1402
+ _extract_model_name_from_output(llm_output)
1403
+ or _extract_model_name_from_event(event)
1404
+ or name
1405
+ )
1406
+ has_tool_calls = bool(_extract_tool_calls(llm_output))
1407
+ # NOTE: No span hook for LLM calls. The user's hard rule:
1408
+ # "every activity started … not the LLM prompt should have a span call"
1409
+ # LLM events are explicitly excluded from the span requirement.
1410
+ # Additionally, the LLMStarted activity_id (from _pre_screen_input or
1411
+ # _GuardrailsCallbackHandler) doesn't reliably match event.run_id here,
1412
+ # so a span hook would create an orphan governance event (duplicate).
1413
+ gov = LangChainGovernanceEvent(
1414
+ source="workflow-telemetry",
1415
+ event_type="LLMCompleted",
1416
+ workflow_id=workflow_id,
1417
+ run_id=run_id,
1418
+ workflow_type=self._config.agent_name or "LangGraphRun",
1419
+ task_queue=self._config.task_queue,
1420
+ timestamp=rfc3339_now(),
1421
+ session_id=self._config.session_id,
1422
+ activity_id=event_run_id,
1423
+ activity_output=safe_serialize(llm_output),
1424
+ status="completed",
1425
+ duration_ms=dur,
1426
+ llm_model=model_name,
1427
+ input_tokens=input_tokens,
1428
+ output_tokens=output_tokens,
1429
+ total_tokens=total_tokens,
1430
+ has_tool_calls=has_tool_calls,
1431
+ completion=completion_text,
1432
+ langgraph_node=langgraph_node,
1433
+ langgraph_step=langgraph_step,
1434
+ )
1435
+ return gov, False, False, "LLMCompleted"
1436
+
1437
+ # Streaming chunks and other events — not governed
1438
+ return None, False, False, ""
1439
+
1440
+
1441
+ # ═══════════════════════════════════════════════════════════════════
1442
+ # Factory
1443
+ # ═══════════════════════════════════════════════════════════════════
1444
+
1445
+ def create_openbox_graph_handler(
1446
+ graph: Any,
1447
+ *,
1448
+ api_url: str,
1449
+ api_key: str,
1450
+ governance_timeout: float = 30.0,
1451
+ validate: bool = True,
1452
+ enable_telemetry: bool = True,
1453
+ sqlalchemy_engine: Any = None,
1454
+ **handler_kwargs: Any,
1455
+ ) -> OpenBoxLangGraphHandler:
1456
+ """Create a fully configured `OpenBoxLangGraphHandler` wrapping a compiled LangGraph graph.
1457
+
1458
+ Calls the synchronous `initialize()` to validate credentials and set up global config,
1459
+ then returns a ready-to-use `OpenBoxLangGraphHandler`.
1460
+
1461
+ Args:
1462
+ graph: A compiled LangGraph graph (e.g. `StateGraph.compile()`).
1463
+ api_url: Base URL of your OpenBox Core instance.
1464
+ api_key: API key in `obx_live_*` or `obx_test_*` format.
1465
+ governance_timeout: HTTP timeout in **seconds** for governance calls (default 30.0).
1466
+ validate: If True, validates the API key against the server on startup.
1467
+ enable_telemetry: Reserved for future HTTP-span telemetry patching.
1468
+ sqlalchemy_engine: Optional SQLAlchemy Engine instance to instrument for DB
1469
+ governance. Required when the engine is created before the handler.
1470
+ **handler_kwargs: Additional keyword arguments forwarded to
1471
+ `OpenBoxLangGraphHandlerOptions`.
1472
+
1473
+ Returns:
1474
+ A configured `OpenBoxLangGraphHandler` ready to govern the graph.
1475
+
1476
+ Example:
1477
+ >>> governed = create_openbox_graph_handler(
1478
+ ... graph=my_graph,
1479
+ ... api_url=os.environ["OPENBOX_URL"],
1480
+ ... api_key=os.environ["OPENBOX_API_KEY"],
1481
+ ... agent_name="MyAgent",
1482
+ ... hitl={"enabled": True, "poll_interval_ms": 5000, "max_wait_ms": 300000},
1483
+ ... )
1484
+ """
1485
+ from openbox_langgraph.config import initialize
1486
+ initialize(
1487
+ api_url=api_url,
1488
+ api_key=api_key,
1489
+ governance_timeout=governance_timeout,
1490
+ validate=validate,
1491
+ )
1492
+
1493
+ options = OpenBoxLangGraphHandlerOptions(
1494
+ api_timeout=governance_timeout,
1495
+ sqlalchemy_engine=sqlalchemy_engine,
1496
+ **{k: v for k, v in handler_kwargs.items() if hasattr(OpenBoxLangGraphHandlerOptions, k)},
1497
+ )
1498
+ return OpenBoxLangGraphHandler(graph, options)
1499
+
1500
+
1501
+ # ═══════════════════════════════════════════════════════════════════
1502
+ # Private helpers
1503
+ # ═══════════════════════════════════════════════════════════════════
1504
+
1505
+ def _extract_thread_id(config: dict[str, Any] | None) -> str:
1506
+ """Extract thread_id from a LangGraph RunnableConfig dict."""
1507
+ if not config:
1508
+ return "default"
1509
+ configurable = config.get("configurable") or {}
1510
+ return configurable.get("thread_id") or "default"
1511
+
1512
+
1513
+ def _unwrap_tool_input(raw: Any) -> Any:
1514
+ """Unwrap potentially double-encoded JSON tool input."""
1515
+ import json
1516
+
1517
+ if not isinstance(raw, str):
1518
+ return raw
1519
+ try:
1520
+ parsed = json.loads(raw)
1521
+ if isinstance(parsed, dict):
1522
+ if list(parsed.keys()) == ["input"] and isinstance(parsed["input"], str):
1523
+ return json.loads(parsed["input"])
1524
+ return parsed
1525
+ except (json.JSONDecodeError, ValueError):
1526
+ pass
1527
+ return raw
1528
+
1529
+
1530
+ def _extract_prompt_from_messages(messages: Any) -> str:
1531
+ """Extract the last human/user message text from a LangChain messages structure."""
1532
+ if not isinstance(messages, (list, tuple)):
1533
+ return ""
1534
+ flat: list[Any] = []
1535
+ for item in messages:
1536
+ if isinstance(item, (list, tuple)):
1537
+ flat.extend(item)
1538
+ else:
1539
+ flat.append(item)
1540
+ for msg in reversed(flat):
1541
+ if hasattr(msg, "content"):
1542
+ content = msg.content
1543
+ elif isinstance(msg, dict):
1544
+ content = msg.get("content", "")
1545
+ else:
1546
+ continue
1547
+ if isinstance(content, str):
1548
+ return content
1549
+ if isinstance(content, list):
1550
+ parts = [
1551
+ p.get("text", "") for p in content
1552
+ if isinstance(p, dict) and p.get("type") == "text"
1553
+ ]
1554
+ return " ".join(parts)
1555
+ return ""
1556
+
1557
+
1558
+ def _extract_model_name_from_event(event: LangGraphStreamEvent) -> str | None:
1559
+ """Extract model name from event metadata."""
1560
+ return (
1561
+ event.metadata.get("ls_model_name")
1562
+ or event.metadata.get("model_name")
1563
+ or None
1564
+ )
1565
+
1566
+
1567
+ def _extract_model_name_from_output(output: Any) -> str | None:
1568
+ """Extract model name from LLM output dict."""
1569
+ if not isinstance(output, dict):
1570
+ return None
1571
+ meta = output.get("response_metadata") or {}
1572
+ return meta.get("model_name") or meta.get("model") or output.get("model") or None
1573
+
1574
+
1575
+ def _extract_token_usage(output: Any) -> tuple[int | None, int | None, int | None]:
1576
+ """Extract (input_tokens, output_tokens, total_tokens) from an LLM output dict."""
1577
+ if not isinstance(output, dict):
1578
+ return None, None, None
1579
+ usage = (
1580
+ output.get("usage_metadata") or output.get("response_metadata", {}).get("usage", {}) or {}
1581
+ )
1582
+ input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens")
1583
+ output_tokens = usage.get("output_tokens") or usage.get("completion_tokens")
1584
+ total = usage.get("total_tokens") or (
1585
+ (input_tokens or 0) + (output_tokens or 0) if (input_tokens or output_tokens) else None
1586
+ )
1587
+ return input_tokens, output_tokens, total
1588
+
1589
+
1590
+ def _extract_completion_text(output: Any) -> str | None:
1591
+ """Extract the assistant completion text from an LLM output dict."""
1592
+ if not isinstance(output, dict):
1593
+ return None
1594
+ # LangChain AIMessage structure
1595
+ content = output.get("content")
1596
+ if isinstance(content, str):
1597
+ return content
1598
+ if isinstance(content, list):
1599
+ parts = [
1600
+ p.get("text", "") for p in content
1601
+ if isinstance(p, dict) and p.get("type") == "text"
1602
+ ]
1603
+ return " ".join(parts) if parts else None
1604
+ return None
1605
+
1606
+
1607
+ def _extract_tool_calls(output: Any) -> list[Any]:
1608
+ """Return tool_calls list from an LLM output dict (empty list if none)."""
1609
+ if not isinstance(output, dict):
1610
+ return []
1611
+ tool_calls = output.get("tool_calls") or []
1612
+ if tool_calls:
1613
+ return tool_calls
1614
+ # LangChain AIMessage wraps tool_calls in additional_kwargs
1615
+ additional = output.get("additional_kwargs") or {}
1616
+ return additional.get("tool_calls") or []