brooder 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brooder/__init__.py +31 -0
- brooder/analysis.py +79 -0
- brooder/cli.py +281 -0
- brooder/config.py +88 -0
- brooder/diffing.py +217 -0
- brooder/errors.py +31 -0
- brooder/integrations/__init__.py +75 -0
- brooder/integrations/anthropic.py +46 -0
- brooder/integrations/base.py +170 -0
- brooder/integrations/bedrock.py +49 -0
- brooder/integrations/claude_agent.py +164 -0
- brooder/integrations/google.py +61 -0
- brooder/integrations/langchain.py +321 -0
- brooder/integrations/openai.py +43 -0
- brooder/integrations/openai_agents.py +208 -0
- brooder/integrations/otel.py +216 -0
- brooder/judges.py +109 -0
- brooder/log.py +33 -0
- brooder/metrics.py +116 -0
- brooder/models.py +148 -0
- brooder/py.typed +1 -0
- brooder/recorder.py +342 -0
- brooder/report.py +261 -0
- brooder/storage.py +150 -0
- brooder-0.1.0.dist-info/METADATA +338 -0
- brooder-0.1.0.dist-info/RECORD +30 -0
- brooder-0.1.0.dist-info/WHEEL +4 -0
- brooder-0.1.0.dist-info/entry_points.txt +2 -0
- brooder-0.1.0.dist-info/licenses/LICENSE +201 -0
- brooder-0.1.0.dist-info/licenses/NOTICE +7 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""LangChain / LangGraph capture — trajectories from a callback handler.
|
|
2
|
+
|
|
3
|
+
Attach Brooder's callback handler to a LangChain or LangGraph run and it records the agent's
|
|
4
|
+
trajectory automatically — no manual ``tool_call``, and no OpenTelemetry instrumentation to wire:
|
|
5
|
+
|
|
6
|
+
import brooder.integrations.langchain as sg_lc
|
|
7
|
+
|
|
8
|
+
handler = sg_lc.callback_handler(agent="support-agent")
|
|
9
|
+
graph.invoke({"messages": [...]}, config={"callbacks": [handler]})
|
|
10
|
+
|
|
11
|
+
Because LangGraph runs on LangChain's callback system, one handler covers both.
|
|
12
|
+
|
|
13
|
+
**How callbacks map to steps** (verified against ``langchain_core`` 0.3.x):
|
|
14
|
+
|
|
15
|
+
- the **root** ``on_chain_start`` (``parent_run_id is None``) opens a run; its input is the case id.
|
|
16
|
+
- ``on_chat_model_start`` / ``on_llm_start`` → a ``TURN`` step.
|
|
17
|
+
- ``on_tool_start`` + ``on_tool_end`` → a ``TOOL`` step (paired by the tool's own ``run_id``).
|
|
18
|
+
- the root ``on_chain_end`` → a ``FINAL`` step and save.
|
|
19
|
+
|
|
20
|
+
Nested LLM/tool calls carry a ``parent_run_id`` chain, so the handler maps every ``run_id`` to its
|
|
21
|
+
root to attach steps to the right run. A sync handler still fires during async runs but may run on
|
|
22
|
+
worker threads, so capture goes through the thread-safe :class:`~brooder.recorder.RunHandle`.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import threading
|
|
29
|
+
from typing import Any, Optional
|
|
30
|
+
|
|
31
|
+
from .. import recorder
|
|
32
|
+
from ..errors import BrooderError
|
|
33
|
+
from ..log import get_logger
|
|
34
|
+
from .base import as_dict, parse_json
|
|
35
|
+
|
|
36
|
+
_log = get_logger()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _coerce(value: Any) -> Any:
|
|
40
|
+
"""Coerce a value into a JSON-safe form (unknown objects become their ``str``)."""
|
|
41
|
+
try:
|
|
42
|
+
return json.loads(json.dumps(value, default=str))
|
|
43
|
+
except (TypeError, ValueError):
|
|
44
|
+
return str(value)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _message_content(message: Any) -> Any:
|
|
48
|
+
"""Return a LangChain message's ``.content`` (or its ``str`` if it isn't a message)."""
|
|
49
|
+
content = getattr(message, "content", None)
|
|
50
|
+
return content if content is not None else str(message)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _extract_input(inputs: Any) -> Any:
|
|
54
|
+
"""Recover a run's inputs (for case identity) from the root chain's ``inputs``."""
|
|
55
|
+
if isinstance(inputs, dict):
|
|
56
|
+
messages = inputs.get("messages")
|
|
57
|
+
if isinstance(messages, list) and messages:
|
|
58
|
+
return _coerce([_message_content(m) for m in messages])
|
|
59
|
+
if len(inputs) == 1:
|
|
60
|
+
return _coerce(next(iter(inputs.values())))
|
|
61
|
+
return _coerce(inputs)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _final_output(outputs: Any) -> Any:
|
|
65
|
+
"""Recover the final answer from the root chain's ``outputs`` (LangGraph state / result)."""
|
|
66
|
+
if isinstance(outputs, dict):
|
|
67
|
+
messages = outputs.get("messages")
|
|
68
|
+
if isinstance(messages, list) and messages:
|
|
69
|
+
return _coerce(_message_content(messages[-1]))
|
|
70
|
+
if len(outputs) == 1:
|
|
71
|
+
return _coerce(next(iter(outputs.values())))
|
|
72
|
+
return _coerce(outputs)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _tool_name(serialized: Any, extra: dict[str, Any]) -> str:
|
|
76
|
+
"""Resolve a tool's name from its ``serialized`` payload, falling back to callback kwargs."""
|
|
77
|
+
name = (serialized or {}).get("name") if isinstance(serialized, dict) else None
|
|
78
|
+
return name or extra.get("name") or "tool"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _tool_args(inputs: Any, input_str: Any) -> dict[str, Any]:
|
|
82
|
+
"""Resolve a tool's arguments — the structured ``inputs`` dict if given, else ``input_str``."""
|
|
83
|
+
if inputs is not None:
|
|
84
|
+
return as_dict(inputs)
|
|
85
|
+
return parse_json(input_str)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _tool_output(output: Any) -> Any:
|
|
89
|
+
"""Extract a tool result: a ``ToolMessage`` ``.content`` (0.2+) or the raw value, JSON-safe."""
|
|
90
|
+
content = getattr(output, "content", None)
|
|
91
|
+
return _coerce(content if content is not None else output)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class _Capture:
|
|
95
|
+
"""The framework-agnostic capture logic behind the callback handler.
|
|
96
|
+
|
|
97
|
+
Kept separate from the ``BaseCallbackHandler`` subclass so it imports and tests without any
|
|
98
|
+
LangChain install. Keys one run per root ``run_id``; maps each nested run id to its root; pairs
|
|
99
|
+
tool start/end by the tool's own ``run_id``. Thread-safe via the recorder's ``RunHandle`` and an
|
|
100
|
+
internal lock over its bookkeeping dicts.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, agent: Optional[str]) -> None:
|
|
104
|
+
self._agent = agent
|
|
105
|
+
self._root_of: dict[str, str] = {}
|
|
106
|
+
self._pending: dict[str, tuple[str, str, dict[str, Any]]] = {}
|
|
107
|
+
self._lock = threading.Lock()
|
|
108
|
+
|
|
109
|
+
def _resolve_root(self, run_id: Any, parent_run_id: Any) -> str:
|
|
110
|
+
"""Record ``run_id``'s place in the run tree and return its root run id."""
|
|
111
|
+
rid = str(run_id)
|
|
112
|
+
pid = str(parent_run_id) if parent_run_id is not None else None
|
|
113
|
+
with self._lock:
|
|
114
|
+
if pid is None:
|
|
115
|
+
self._root_of[rid] = rid
|
|
116
|
+
return rid
|
|
117
|
+
root = self._root_of.get(pid, pid)
|
|
118
|
+
self._root_of[rid] = root
|
|
119
|
+
return root
|
|
120
|
+
|
|
121
|
+
def _cleanup(self, root: str) -> None:
|
|
122
|
+
"""Drop bookkeeping for a finished root run."""
|
|
123
|
+
with self._lock:
|
|
124
|
+
self._root_of = {k: v for k, v in self._root_of.items() if v != root}
|
|
125
|
+
self._pending = {k: v for k, v in self._pending.items() if v[0] != root}
|
|
126
|
+
|
|
127
|
+
def chain_start(self, serialized: Any, inputs: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
128
|
+
"""Open a run when the root chain starts."""
|
|
129
|
+
try:
|
|
130
|
+
root = self._resolve_root(run_id, parent_run_id)
|
|
131
|
+
if parent_run_id is None and recorder.get_run(root) is None:
|
|
132
|
+
recorder.open_run(
|
|
133
|
+
self._agent or "langchain-agent",
|
|
134
|
+
inputs=_extract_input(inputs),
|
|
135
|
+
external_id=root,
|
|
136
|
+
)
|
|
137
|
+
except Exception: # capture must never break the app
|
|
138
|
+
_log.debug("brooder: langchain chain_start capture failed", exc_info=True)
|
|
139
|
+
|
|
140
|
+
def model_start(self, payload: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
141
|
+
"""Append a TURN step for a model call (chat or completion)."""
|
|
142
|
+
try:
|
|
143
|
+
root = self._resolve_root(run_id, parent_run_id)
|
|
144
|
+
handle = recorder.get_run(root)
|
|
145
|
+
if handle is None and parent_run_id is None:
|
|
146
|
+
handle = recorder.open_run(
|
|
147
|
+
self._agent or "langchain-agent",
|
|
148
|
+
inputs=_extract_input(payload),
|
|
149
|
+
external_id=root,
|
|
150
|
+
)
|
|
151
|
+
if handle is not None:
|
|
152
|
+
handle.turn({"provider": "langchain"})
|
|
153
|
+
except Exception:
|
|
154
|
+
_log.debug("brooder: langchain model_start capture failed", exc_info=True)
|
|
155
|
+
|
|
156
|
+
def tool_start(
|
|
157
|
+
self, serialized: Any, input_str: Any, run_id: Any, parent_run_id: Any, inputs: Any
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Stash a tool call, to be emitted with its result at :meth:`tool_end`."""
|
|
160
|
+
try:
|
|
161
|
+
root = self._resolve_root(run_id, parent_run_id)
|
|
162
|
+
name = _tool_name(serialized, {})
|
|
163
|
+
args = _tool_args(inputs, input_str)
|
|
164
|
+
with self._lock:
|
|
165
|
+
self._pending[str(run_id)] = (root, name, args)
|
|
166
|
+
except Exception:
|
|
167
|
+
_log.debug("brooder: langchain tool_start capture failed", exc_info=True)
|
|
168
|
+
|
|
169
|
+
def tool_end(self, output: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
170
|
+
"""Emit the TOOL step for a completed tool call."""
|
|
171
|
+
self._emit_tool(run_id, _tool_output(output))
|
|
172
|
+
|
|
173
|
+
def tool_error(self, error: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
174
|
+
"""Emit the TOOL step for a failed tool call (result is the error)."""
|
|
175
|
+
self._emit_tool(run_id, f"error: {error}")
|
|
176
|
+
|
|
177
|
+
def _emit_tool(self, run_id: Any, result: Any) -> None:
|
|
178
|
+
try:
|
|
179
|
+
with self._lock:
|
|
180
|
+
entry = self._pending.pop(str(run_id), None)
|
|
181
|
+
if entry is None:
|
|
182
|
+
return
|
|
183
|
+
root, name, args = entry
|
|
184
|
+
handle = recorder.get_run(root)
|
|
185
|
+
if handle is not None:
|
|
186
|
+
handle.tool_call(name, args, result=result)
|
|
187
|
+
except Exception:
|
|
188
|
+
_log.debug("brooder: langchain tool_end capture failed", exc_info=True)
|
|
189
|
+
|
|
190
|
+
def chain_end(self, outputs: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
191
|
+
"""Finalize the run when the root chain ends."""
|
|
192
|
+
self._finalize(run_id, parent_run_id, _final_output(outputs))
|
|
193
|
+
|
|
194
|
+
def chain_error(self, error: Any, run_id: Any, parent_run_id: Any) -> None:
|
|
195
|
+
"""Finalize the run when the root chain errors (no final answer)."""
|
|
196
|
+
self._finalize(run_id, parent_run_id, None)
|
|
197
|
+
|
|
198
|
+
def _finalize(self, run_id: Any, parent_run_id: Any, output: Any) -> None:
|
|
199
|
+
try:
|
|
200
|
+
if parent_run_id is not None:
|
|
201
|
+
return # only the root chain delimits the run
|
|
202
|
+
root = str(run_id)
|
|
203
|
+
handle = recorder.get_run(root)
|
|
204
|
+
if handle is not None:
|
|
205
|
+
handle.finish(output)
|
|
206
|
+
self._cleanup(root)
|
|
207
|
+
except Exception:
|
|
208
|
+
_log.debug("brooder: langchain chain_end capture failed", exc_info=True)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
_handler_cls: Optional[type] = None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _build_handler_class() -> type:
|
|
215
|
+
"""Build the ``BaseCallbackHandler`` subclass lazily (needs ``langchain_core``)."""
|
|
216
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
217
|
+
|
|
218
|
+
class BrooderCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
|
|
219
|
+
"""A LangChain callback handler that records the run into a Brooder trajectory."""
|
|
220
|
+
|
|
221
|
+
raise_error = False
|
|
222
|
+
|
|
223
|
+
def __init__(self, agent: Optional[str] = None) -> None:
|
|
224
|
+
"""Create a handler recording into runs named ``agent``."""
|
|
225
|
+
super().__init__()
|
|
226
|
+
self._cap = _Capture(agent)
|
|
227
|
+
|
|
228
|
+
def on_chain_start(
|
|
229
|
+
self, serialized: Any, inputs: Any, *, run_id: Any, parent_run_id: Any = None, **kw: Any
|
|
230
|
+
) -> None:
|
|
231
|
+
"""Forward a chain start to the capture logic."""
|
|
232
|
+
self._cap.chain_start(serialized, inputs, run_id, parent_run_id)
|
|
233
|
+
|
|
234
|
+
def on_chain_end(
|
|
235
|
+
self, outputs: Any, *, run_id: Any, parent_run_id: Any = None, **kw: Any
|
|
236
|
+
) -> None:
|
|
237
|
+
"""Forward a chain end to the capture logic."""
|
|
238
|
+
self._cap.chain_end(outputs, run_id, parent_run_id)
|
|
239
|
+
|
|
240
|
+
def on_chain_error(
|
|
241
|
+
self, error: Any, *, run_id: Any, parent_run_id: Any = None, **kw: Any
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Forward a chain error to the capture logic."""
|
|
244
|
+
self._cap.chain_error(error, run_id, parent_run_id)
|
|
245
|
+
|
|
246
|
+
def on_chat_model_start(
|
|
247
|
+
self,
|
|
248
|
+
serialized: Any,
|
|
249
|
+
messages: Any,
|
|
250
|
+
*,
|
|
251
|
+
run_id: Any,
|
|
252
|
+
parent_run_id: Any = None,
|
|
253
|
+
**kw: Any,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Forward a chat-model start (a model turn) to the capture logic."""
|
|
256
|
+
self._cap.model_start(messages, run_id, parent_run_id)
|
|
257
|
+
|
|
258
|
+
def on_llm_start(
|
|
259
|
+
self,
|
|
260
|
+
serialized: Any,
|
|
261
|
+
prompts: Any,
|
|
262
|
+
*,
|
|
263
|
+
run_id: Any,
|
|
264
|
+
parent_run_id: Any = None,
|
|
265
|
+
**kw: Any,
|
|
266
|
+
) -> None:
|
|
267
|
+
"""Forward a completion-model start (a model turn) to the capture logic."""
|
|
268
|
+
self._cap.model_start(prompts, run_id, parent_run_id)
|
|
269
|
+
|
|
270
|
+
def on_tool_start(
|
|
271
|
+
self,
|
|
272
|
+
serialized: Any,
|
|
273
|
+
input_str: Any,
|
|
274
|
+
*,
|
|
275
|
+
run_id: Any,
|
|
276
|
+
parent_run_id: Any = None,
|
|
277
|
+
inputs: Any = None,
|
|
278
|
+
**kw: Any,
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Forward a tool start to the capture logic."""
|
|
281
|
+
self._cap.tool_start(serialized, input_str, run_id, parent_run_id, inputs)
|
|
282
|
+
|
|
283
|
+
def on_tool_end(
|
|
284
|
+
self, output: Any, *, run_id: Any, parent_run_id: Any = None, **kw: Any
|
|
285
|
+
) -> None:
|
|
286
|
+
"""Forward a tool end to the capture logic."""
|
|
287
|
+
self._cap.tool_end(output, run_id, parent_run_id)
|
|
288
|
+
|
|
289
|
+
def on_tool_error(
|
|
290
|
+
self, error: Any, *, run_id: Any, parent_run_id: Any = None, **kw: Any
|
|
291
|
+
) -> None:
|
|
292
|
+
"""Forward a tool error to the capture logic."""
|
|
293
|
+
self._cap.tool_error(error, run_id, parent_run_id)
|
|
294
|
+
|
|
295
|
+
return BrooderCallbackHandler
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def callback_handler(agent: Optional[str] = None) -> Any:
|
|
299
|
+
"""Build a Brooder LangChain callback handler for ``config={"callbacks": [handler]}``.
|
|
300
|
+
|
|
301
|
+
Records the run's tool trajectory into a Brooder run (see the module docstring for details).
|
|
302
|
+
Covers LangGraph too, since it runs on LangChain's callbacks.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
agent: Logical agent name used to group baselines (defaults to ``"langchain-agent"``).
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
A ``langchain_core`` ``BaseCallbackHandler`` instance.
|
|
309
|
+
|
|
310
|
+
Raises:
|
|
311
|
+
BrooderError: If LangChain (``langchain_core``) is not installed.
|
|
312
|
+
"""
|
|
313
|
+
global _handler_cls
|
|
314
|
+
if _handler_cls is None:
|
|
315
|
+
try:
|
|
316
|
+
_handler_cls = _build_handler_class()
|
|
317
|
+
except ImportError as exc:
|
|
318
|
+
raise BrooderError(
|
|
319
|
+
"langchain.callback_handler() needs LangChain — `pip install langchain-core`"
|
|
320
|
+
) from exc
|
|
321
|
+
return _handler_cls(agent)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""OpenAI and Azure OpenAI auto-capture.
|
|
2
|
+
|
|
3
|
+
Both use the ``openai`` SDK, so one normalizer covers ``OpenAI``, ``AzureOpenAI``, and
|
|
4
|
+
API-compatible clients. Wraps ``client.chat.completions.create``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from .base import NormalizedCall, ToolRequest, get, parse_json, wrap
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _normalize(_kwargs: dict[str, Any], response: Any) -> Optional[NormalizedCall]:
|
|
15
|
+
choices = get(response, "choices") or []
|
|
16
|
+
message = get(choices[0], "message") if choices else None
|
|
17
|
+
tool_calls = [
|
|
18
|
+
ToolRequest(
|
|
19
|
+
name=get(get(tc, "function"), "name") or "",
|
|
20
|
+
arguments=parse_json(get(get(tc, "function"), "arguments")),
|
|
21
|
+
)
|
|
22
|
+
for tc in (get(message, "tool_calls") or [])
|
|
23
|
+
]
|
|
24
|
+
return NormalizedCall(
|
|
25
|
+
provider="openai",
|
|
26
|
+
model=get(response, "model"),
|
|
27
|
+
content=get(message, "content"),
|
|
28
|
+
tool_calls=tool_calls,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def instrument(client: Any, capture_content: bool = False) -> Any:
|
|
33
|
+
"""Instrument an OpenAI/Azure OpenAI client's ``chat.completions.create``.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
client: An ``OpenAI`` or ``AzureOpenAI`` client instance.
|
|
37
|
+
capture_content: Also record assistant text, not just tool calls.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The same client, patched in place.
|
|
41
|
+
"""
|
|
42
|
+
wrap(client.chat.completions, "create", _normalize, capture_content)
|
|
43
|
+
return client
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""OpenAI Agents SDK capture — trajectories from the SDK's tracing.
|
|
2
|
+
|
|
3
|
+
Its tracing is on by default. Install Brooder's trace processor and it records the agent's
|
|
4
|
+
trajectory automatically — no manual ``tool_call``:
|
|
5
|
+
|
|
6
|
+
import brooder.integrations.openai_agents as sg_agents
|
|
7
|
+
sg_agents.install(agent="support-agent") # then run your agent as usual
|
|
8
|
+
|
|
9
|
+
from agents import Agent, Runner
|
|
10
|
+
Runner.run_sync(Agent(name="support", ...), "cancel my order")
|
|
11
|
+
|
|
12
|
+
`install` appends the processor (the default OpenAI exporter keeps working) and needs no OpenAI API
|
|
13
|
+
key to capture.
|
|
14
|
+
|
|
15
|
+
**How spans map to steps** (by ``span_data.type``):
|
|
16
|
+
|
|
17
|
+
- ``generation`` / ``response`` → ``TURN``
|
|
18
|
+
- ``function`` → ``TOOL`` (name, JSON ``input`` args, and the tool ``output``)
|
|
19
|
+
- ``handoff`` → ``TOOL`` named ``"handoff"`` with ``{from, to}`` — control-flow, kept in-trajectory
|
|
20
|
+
- ``guardrail`` → ``TOOL`` named ``"guardrail"`` with ``{name, triggered}`` — so a flip is diffable
|
|
21
|
+
|
|
22
|
+
Spans are buffered per trace and, on ``on_trace_end``, sorted by ``started_at`` and mapped into one
|
|
23
|
+
run — so parallel tool calls / concurrent agents (which interleave callbacks across threads) are
|
|
24
|
+
handled. The initial input and final answer are recovered from the response spans, since the SDK
|
|
25
|
+
exposes no trace-level input/output. Incomplete traces drain on ``force_flush`` / ``shutdown``.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import threading
|
|
32
|
+
from typing import Any, Optional
|
|
33
|
+
|
|
34
|
+
from .. import recorder
|
|
35
|
+
from ..errors import BrooderError
|
|
36
|
+
from ..models import Step, StepKind
|
|
37
|
+
from .base import parse_json
|
|
38
|
+
|
|
39
|
+
# span_data.type values Brooder treats as model turns.
|
|
40
|
+
_TURN_TYPES = frozenset({"generation", "response"})
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _jsonable(value: Any) -> Any:
|
|
44
|
+
"""Coerce a value into a JSON-safe form (unknown objects become their ``str``)."""
|
|
45
|
+
try:
|
|
46
|
+
return json.loads(json.dumps(value, default=str))
|
|
47
|
+
except (TypeError, ValueError):
|
|
48
|
+
return str(value)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _started_at(span: Any) -> str:
|
|
52
|
+
"""Return a span's ISO start time for ordering (ISO strings sort chronologically)."""
|
|
53
|
+
return getattr(span, "started_at", None) or ""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _to_step(span: Any) -> Optional[Step]:
|
|
57
|
+
"""Map one span to a trajectory step, or None for structural / unhandled spans."""
|
|
58
|
+
data = getattr(span, "span_data", None)
|
|
59
|
+
kind = getattr(data, "type", None)
|
|
60
|
+
if kind in _TURN_TYPES:
|
|
61
|
+
return Step(kind=StepKind.TURN, name="turn", args={"provider": "openai"})
|
|
62
|
+
if kind == "function":
|
|
63
|
+
return Step(
|
|
64
|
+
kind=StepKind.TOOL,
|
|
65
|
+
name=getattr(data, "name", None) or "tool",
|
|
66
|
+
args=parse_json(getattr(data, "input", None)),
|
|
67
|
+
result=_jsonable(getattr(data, "output", None)),
|
|
68
|
+
)
|
|
69
|
+
if kind == "handoff":
|
|
70
|
+
return Step(
|
|
71
|
+
kind=StepKind.TOOL,
|
|
72
|
+
name="handoff",
|
|
73
|
+
args={"from": getattr(data, "from_agent", None), "to": getattr(data, "to_agent", None)},
|
|
74
|
+
)
|
|
75
|
+
if kind == "guardrail":
|
|
76
|
+
return Step(
|
|
77
|
+
kind=StepKind.TOOL,
|
|
78
|
+
name="guardrail",
|
|
79
|
+
args={
|
|
80
|
+
"name": getattr(data, "name", None),
|
|
81
|
+
"triggered": bool(getattr(data, "triggered", False)),
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _initial_input(spans: list[Any]) -> Any:
|
|
88
|
+
"""Recover the run's initial input from the earliest response/generation span."""
|
|
89
|
+
for span in spans:
|
|
90
|
+
data = getattr(span, "span_data", None)
|
|
91
|
+
if getattr(data, "type", None) in _TURN_TYPES:
|
|
92
|
+
value = getattr(data, "input", None)
|
|
93
|
+
if value is not None:
|
|
94
|
+
return _jsonable(value)
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _final_output(spans: list[Any]) -> Any:
|
|
99
|
+
"""Recover the final answer from the latest response (``output_text``) or generation span."""
|
|
100
|
+
for span in reversed(spans):
|
|
101
|
+
data = getattr(span, "span_data", None)
|
|
102
|
+
kind = getattr(data, "type", None)
|
|
103
|
+
if kind == "response":
|
|
104
|
+
text = getattr(getattr(data, "response", None), "output_text", None)
|
|
105
|
+
if text:
|
|
106
|
+
return text
|
|
107
|
+
elif kind == "generation":
|
|
108
|
+
output = getattr(data, "output", None)
|
|
109
|
+
if output:
|
|
110
|
+
return _jsonable(output)
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class BrooderTraceProcessor:
|
|
115
|
+
"""An OpenAI Agents SDK ``TracingProcessor`` that ingests traces into Brooder trajectories.
|
|
116
|
+
|
|
117
|
+
Duck-typed against the SDK's ``TracingProcessor`` protocol, so importing this module does not
|
|
118
|
+
require ``openai-agents`` to be installed. Register it with :func:`install` (or add it yourself
|
|
119
|
+
via ``agents.add_trace_processor``).
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
agent: Logical agent name used to group baselines. Defaults to the trace's workflow name,
|
|
123
|
+
then ``"openai-agent"``.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(self, agent: Optional[str] = None) -> None:
|
|
127
|
+
"""Create a processor recording into runs named ``agent`` (see the class docstring)."""
|
|
128
|
+
self._agent = agent
|
|
129
|
+
self._buffers: dict[str, dict[str, Any]] = {}
|
|
130
|
+
self._lock = threading.Lock()
|
|
131
|
+
|
|
132
|
+
def on_trace_start(self, trace: Any) -> None:
|
|
133
|
+
"""Open a per-trace buffer keyed by ``trace_id``."""
|
|
134
|
+
trace_id = getattr(trace, "trace_id", None)
|
|
135
|
+
if trace_id is None:
|
|
136
|
+
return
|
|
137
|
+
with self._lock:
|
|
138
|
+
self._buffers.setdefault(trace_id, {"trace": trace, "spans": []})
|
|
139
|
+
|
|
140
|
+
def on_trace_end(self, trace: Any) -> None:
|
|
141
|
+
"""Finalize the trace's buffered spans into one run."""
|
|
142
|
+
trace_id = getattr(trace, "trace_id", None)
|
|
143
|
+
if trace_id is not None:
|
|
144
|
+
self._finalize(trace_id)
|
|
145
|
+
|
|
146
|
+
def on_span_start(self, span: Any) -> None:
|
|
147
|
+
"""Part of the protocol; capture happens at :meth:`on_span_end`."""
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def on_span_end(self, span: Any) -> None:
|
|
151
|
+
"""Buffer an ended span under its trace id."""
|
|
152
|
+
trace_id = getattr(span, "trace_id", None)
|
|
153
|
+
if trace_id is None or trace_id == "no-op":
|
|
154
|
+
return
|
|
155
|
+
with self._lock:
|
|
156
|
+
entry = self._buffers.setdefault(trace_id, {"trace": None, "spans": []})
|
|
157
|
+
entry["spans"].append(span)
|
|
158
|
+
|
|
159
|
+
def shutdown(self, timeout: Optional[float] = None) -> None:
|
|
160
|
+
"""Drain every buffered (possibly incomplete) trace into a run."""
|
|
161
|
+
for trace_id in list(self._buffers):
|
|
162
|
+
self._finalize(trace_id)
|
|
163
|
+
|
|
164
|
+
def force_flush(self) -> None:
|
|
165
|
+
"""Drain every buffered trace into a run."""
|
|
166
|
+
for trace_id in list(self._buffers):
|
|
167
|
+
self._finalize(trace_id)
|
|
168
|
+
|
|
169
|
+
def _finalize(self, trace_id: str) -> None:
|
|
170
|
+
"""Turn a trace's buffered spans into one recorded run."""
|
|
171
|
+
with self._lock:
|
|
172
|
+
entry = self._buffers.pop(trace_id, None)
|
|
173
|
+
if not entry or not entry["spans"]:
|
|
174
|
+
return
|
|
175
|
+
spans = entry["spans"]
|
|
176
|
+
spans.sort(key=_started_at)
|
|
177
|
+
agent = self._agent or getattr(entry["trace"], "name", None) or "openai-agent"
|
|
178
|
+
handle = recorder.open_run(agent, inputs=_initial_input(spans), external_id=trace_id)
|
|
179
|
+
for span in spans:
|
|
180
|
+
step = _to_step(span)
|
|
181
|
+
if step is not None:
|
|
182
|
+
handle.add_step(step)
|
|
183
|
+
handle.finish(_final_output(spans))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def install(agent: Optional[str] = None) -> BrooderTraceProcessor:
|
|
187
|
+
"""Register a Brooder trace processor with the OpenAI Agents SDK.
|
|
188
|
+
|
|
189
|
+
Appends the processor (the SDK's default exporter keeps running). Requires ``openai-agents``.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
agent: Logical agent name used to group baselines (see :class:`BrooderTraceProcessor`).
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The registered :class:`BrooderTraceProcessor` (useful for tests / manual flushing).
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
BrooderError: If the OpenAI Agents SDK is not installed.
|
|
199
|
+
"""
|
|
200
|
+
try:
|
|
201
|
+
from agents import add_trace_processor
|
|
202
|
+
except ImportError as exc:
|
|
203
|
+
raise BrooderError(
|
|
204
|
+
"openai_agents.install() needs the OpenAI Agents SDK — `pip install openai-agents`"
|
|
205
|
+
) from exc
|
|
206
|
+
processor = BrooderTraceProcessor(agent)
|
|
207
|
+
add_trace_processor(processor)
|
|
208
|
+
return processor
|