agentdebugx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentdebug/__init__.py +65 -0
- agentdebug/adapters/__init__.py +10 -0
- agentdebug/adapters/base.py +22 -0
- agentdebug/adapters/langgraph.py +261 -0
- agentdebug/adapters/otel.py +151 -0
- agentdebug/adapters/raw.py +134 -0
- agentdebug/analyzers.py +152 -0
- agentdebug/attribution.py +230 -0
- agentdebug/cli.py +272 -0
- agentdebug/events.py +114 -0
- agentdebug/instrumentation.py +57 -0
- agentdebug/judges.py +258 -0
- agentdebug/llm.py +165 -0
- agentdebug/models.py +169 -0
- agentdebug/recorder.py +183 -0
- agentdebug/recovery.py +113 -0
- agentdebug/storage.py +167 -0
- agentdebug/taxonomy.py +271 -0
- agentdebug/ui/__init__.py +14 -0
- agentdebug/ui/server.py +260 -0
- agentdebugx-0.1.0.dist-info/METADATA +217 -0
- agentdebugx-0.1.0.dist-info/RECORD +25 -0
- agentdebugx-0.1.0.dist-info/WHEEL +4 -0
- agentdebugx-0.1.0.dist-info/entry_points.txt +3 -0
- agentdebugx-0.1.0.dist-info/licenses/LICENSE +21 -0
agentdebug/__init__.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""AgentDebugX public API.
|
|
2
|
+
|
|
3
|
+
The package is intentionally imported as ``agentdebug`` so existing and future
|
|
4
|
+
agent systems can add debugging with a small surface area. The PyPI
|
|
5
|
+
distribution is named ``agentdebugx``.
|
|
6
|
+
|
|
7
|
+
See ``docs/14_api_reference.md`` for the full public surface and
|
|
8
|
+
``docs/18_comparison_codex_vs_design.md`` for the merged v0.1 scope.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from agentdebug.analyzers import HeuristicAnalyzer
|
|
12
|
+
from agentdebug.attribution import (
|
|
13
|
+
AllAtOnceAttributor,
|
|
14
|
+
AttributionResult,
|
|
15
|
+
Attributor,
|
|
16
|
+
Blame,
|
|
17
|
+
HeuristicAttributor,
|
|
18
|
+
)
|
|
19
|
+
from agentdebug.events import DEFAULT_BUS, BusEvent, EventBus, EventSubscription
|
|
20
|
+
from agentdebug.models import (
|
|
21
|
+
AgentEvent,
|
|
22
|
+
AgentTrajectory,
|
|
23
|
+
Artifact,
|
|
24
|
+
DiagnosticReport,
|
|
25
|
+
EventType,
|
|
26
|
+
FailureFinding,
|
|
27
|
+
FailureMode,
|
|
28
|
+
Modality,
|
|
29
|
+
)
|
|
30
|
+
from agentdebug.recorder import AgentDebug, TraceSession
|
|
31
|
+
from agentdebug.recovery import FixProposal, Recoverer, ReflexionSuggestion
|
|
32
|
+
from agentdebug.storage import JsonlTraceStore, SQLiteTraceStore
|
|
33
|
+
from agentdebug.taxonomy import SEED_FAILURE_MODES, get_failure_mode
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
'AgentDebug',
|
|
37
|
+
'AgentEvent',
|
|
38
|
+
'AgentTrajectory',
|
|
39
|
+
'AllAtOnceAttributor',
|
|
40
|
+
'Artifact',
|
|
41
|
+
'AttributionResult',
|
|
42
|
+
'Attributor',
|
|
43
|
+
'Blame',
|
|
44
|
+
'BusEvent',
|
|
45
|
+
'DEFAULT_BUS',
|
|
46
|
+
'DiagnosticReport',
|
|
47
|
+
'EventBus',
|
|
48
|
+
'EventSubscription',
|
|
49
|
+
'EventType',
|
|
50
|
+
'FailureFinding',
|
|
51
|
+
'FailureMode',
|
|
52
|
+
'FixProposal',
|
|
53
|
+
'HeuristicAnalyzer',
|
|
54
|
+
'HeuristicAttributor',
|
|
55
|
+
'JsonlTraceStore',
|
|
56
|
+
'Modality',
|
|
57
|
+
'Recoverer',
|
|
58
|
+
'ReflexionSuggestion',
|
|
59
|
+
'SEED_FAILURE_MODES',
|
|
60
|
+
'SQLiteTraceStore',
|
|
61
|
+
'TraceSession',
|
|
62
|
+
'get_failure_mode',
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
__version__ = '0.1.0'
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Framework adapters.
|
|
2
|
+
|
|
3
|
+
The base Protocol and status dataclass are always available. Concrete
|
|
4
|
+
adapters live in sibling modules and are imported lazily by name so that
|
|
5
|
+
``import agentdebug.adapters`` never pulls in optional framework deps.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from agentdebug.adapters.base import AdapterStatus, FrameworkAdapter
|
|
9
|
+
|
|
10
|
+
__all__ = ['AdapterStatus', 'FrameworkAdapter']
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Adapter interfaces for agent frameworks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
|
|
8
|
+
from agentdebug.recorder import AgentDebug
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class AdapterStatus:
|
|
13
|
+
framework: str
|
|
14
|
+
implemented: bool
|
|
15
|
+
notes: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FrameworkAdapter(Protocol):
|
|
19
|
+
framework: str
|
|
20
|
+
|
|
21
|
+
def instrument(self, debugger: AgentDebug) -> AdapterStatus:
|
|
22
|
+
...
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""LangChain / LangGraph adapter.
|
|
2
|
+
|
|
3
|
+
Provides ``LangChainCallbackAdapter``, a ``BaseCallbackHandler`` subclass that
|
|
4
|
+
records LangChain/LangGraph events into an AgentDebugX trajectory.
|
|
5
|
+
|
|
6
|
+
We *defer* the import of ``langchain_core`` so importing this module never fails
|
|
7
|
+
when LangChain is not installed. Users see a clear error only if they actually
|
|
8
|
+
construct the adapter without LangChain installed.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any, Dict, Optional, cast
|
|
14
|
+
|
|
15
|
+
from agentdebug.adapters.base import AdapterStatus, FrameworkAdapter
|
|
16
|
+
from agentdebug.models import AgentTrajectory, EventType
|
|
17
|
+
from agentdebug.recorder import AgentDebug
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _load_base_callback_handler() -> type:
|
|
21
|
+
try:
|
|
22
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
23
|
+
except ImportError as exc: # pragma: no cover - exercised in docs
|
|
24
|
+
raise ImportError(
|
|
25
|
+
'LangChainCallbackAdapter requires `langchain_core`. '
|
|
26
|
+
'Install with `pip install langchain-core`.'
|
|
27
|
+
) from exc
|
|
28
|
+
return cast(type, BaseCallbackHandler)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LangChainCallbackAdapter:
|
|
32
|
+
"""Bridge LangChain/LangGraph callbacks into an AgentDebugX trajectory.
|
|
33
|
+
|
|
34
|
+
Usage:
|
|
35
|
+
|
|
36
|
+
>>> from agentdebug import AgentDebug
|
|
37
|
+
>>> from agentdebug.adapters.langgraph import LangChainCallbackAdapter
|
|
38
|
+
>>> debugger = AgentDebug()
|
|
39
|
+
>>> trajectory = debugger.start_trace(goal='...', framework='langgraph')
|
|
40
|
+
>>> handler = LangChainCallbackAdapter(debugger, trajectory)
|
|
41
|
+
>>> graph.invoke(state, config={'callbacks': [handler]})
|
|
42
|
+
>>> debugger.finish_trace(trajectory, success=True)
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
framework = 'langgraph'
|
|
46
|
+
|
|
47
|
+
def __new__(cls, debugger: AgentDebug, trajectory: AgentTrajectory) -> Any:
|
|
48
|
+
base = _load_base_callback_handler()
|
|
49
|
+
# Dynamically build a subclass of BaseCallbackHandler whose hooks call
|
|
50
|
+
# our recording functions. We keep the implementation here to avoid a
|
|
51
|
+
# module-level dependency on langchain_core.
|
|
52
|
+
impl = type(
|
|
53
|
+
'_LangChainAdapterImpl',
|
|
54
|
+
(base,),
|
|
55
|
+
{
|
|
56
|
+
'on_chain_start': cls._on_chain_start,
|
|
57
|
+
'on_chain_end': cls._on_chain_end,
|
|
58
|
+
'on_chain_error': cls._on_chain_error,
|
|
59
|
+
'on_tool_start': cls._on_tool_start,
|
|
60
|
+
'on_tool_end': cls._on_tool_end,
|
|
61
|
+
'on_tool_error': cls._on_tool_error,
|
|
62
|
+
'on_llm_start': cls._on_llm_start,
|
|
63
|
+
'on_llm_end': cls._on_llm_end,
|
|
64
|
+
'on_llm_error': cls._on_llm_error,
|
|
65
|
+
'on_agent_action': cls._on_agent_action,
|
|
66
|
+
'on_agent_finish': cls._on_agent_finish,
|
|
67
|
+
'_step_counter': 0,
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
inst = impl()
|
|
71
|
+
inst._debugger = debugger
|
|
72
|
+
inst._trajectory = trajectory
|
|
73
|
+
return inst
|
|
74
|
+
|
|
75
|
+
# ------------- hook implementations -------------
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def _next_step(handler: Any) -> int:
|
|
79
|
+
handler._step_counter += 1
|
|
80
|
+
return int(handler._step_counter)
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def _record(
|
|
84
|
+
handler: Any,
|
|
85
|
+
*,
|
|
86
|
+
event_type: EventType,
|
|
87
|
+
agent_name: str,
|
|
88
|
+
module: Optional[str],
|
|
89
|
+
input: Any = None,
|
|
90
|
+
output: Any = None,
|
|
91
|
+
error: Optional[str] = None,
|
|
92
|
+
parent_event_id: Optional[str] = None,
|
|
93
|
+
run_id: Optional[Any] = None,
|
|
94
|
+
**metadata: Any,
|
|
95
|
+
) -> None:
|
|
96
|
+
meta: Dict[str, Any] = dict(metadata)
|
|
97
|
+
if run_id is not None:
|
|
98
|
+
meta['run_id'] = str(run_id)
|
|
99
|
+
handler._debugger.record_event(
|
|
100
|
+
handler._trajectory,
|
|
101
|
+
event_type=event_type,
|
|
102
|
+
agent_name=agent_name,
|
|
103
|
+
module=module,
|
|
104
|
+
step_index=LangChainCallbackAdapter._next_step(handler),
|
|
105
|
+
input=input,
|
|
106
|
+
output=output,
|
|
107
|
+
error=error,
|
|
108
|
+
parent_event_id=parent_event_id,
|
|
109
|
+
**meta,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# The keyword arguments accepted by these methods match the signature of
|
|
113
|
+
# BaseCallbackHandler; we use **kwargs for compatibility across LangChain
|
|
114
|
+
# versions which add or rename fields.
|
|
115
|
+
|
|
116
|
+
def _on_chain_start( # type: ignore[no-untyped-def]
|
|
117
|
+
self, serialized, inputs, *, run_id, parent_run_id=None, **kwargs
|
|
118
|
+
):
|
|
119
|
+
LangChainCallbackAdapter._record(
|
|
120
|
+
self,
|
|
121
|
+
event_type=EventType.AGENT_STEP,
|
|
122
|
+
agent_name=str(((serialized or {}).get('name')) or 'chain'),
|
|
123
|
+
module='planning',
|
|
124
|
+
input=inputs,
|
|
125
|
+
run_id=run_id,
|
|
126
|
+
parent_run_id=str(parent_run_id) if parent_run_id else None,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _on_chain_end(self, outputs, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
130
|
+
LangChainCallbackAdapter._record(
|
|
131
|
+
self,
|
|
132
|
+
event_type=EventType.OBSERVATION,
|
|
133
|
+
agent_name='chain',
|
|
134
|
+
module='planning',
|
|
135
|
+
output=outputs,
|
|
136
|
+
run_id=run_id,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _on_chain_error(self, error, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
140
|
+
LangChainCallbackAdapter._record(
|
|
141
|
+
self,
|
|
142
|
+
event_type=EventType.ERROR,
|
|
143
|
+
agent_name='chain',
|
|
144
|
+
module='system',
|
|
145
|
+
error=str(error),
|
|
146
|
+
run_id=run_id,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _on_tool_start( # type: ignore[no-untyped-def]
|
|
150
|
+
self, serialized, input_str, *, run_id, **kwargs
|
|
151
|
+
):
|
|
152
|
+
LangChainCallbackAdapter._record(
|
|
153
|
+
self,
|
|
154
|
+
event_type=EventType.TOOL_CALL,
|
|
155
|
+
agent_name=str(((serialized or {}).get('name')) or 'tool'),
|
|
156
|
+
module='action',
|
|
157
|
+
input=input_str,
|
|
158
|
+
run_id=run_id,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _on_tool_end(self, output, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
162
|
+
LangChainCallbackAdapter._record(
|
|
163
|
+
self,
|
|
164
|
+
event_type=EventType.TOOL_RESULT,
|
|
165
|
+
agent_name='tool',
|
|
166
|
+
module='action',
|
|
167
|
+
output=output,
|
|
168
|
+
run_id=run_id,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def _on_tool_error(self, error, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
172
|
+
LangChainCallbackAdapter._record(
|
|
173
|
+
self,
|
|
174
|
+
event_type=EventType.TOOL_RESULT,
|
|
175
|
+
agent_name='tool',
|
|
176
|
+
module='action',
|
|
177
|
+
error=str(error),
|
|
178
|
+
run_id=run_id,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def _on_llm_start( # type: ignore[no-untyped-def]
|
|
182
|
+
self, serialized, prompts, *, run_id, **kwargs
|
|
183
|
+
):
|
|
184
|
+
LangChainCallbackAdapter._record(
|
|
185
|
+
self,
|
|
186
|
+
event_type=EventType.LLM_CALL,
|
|
187
|
+
agent_name=str(((serialized or {}).get('name')) or 'llm'),
|
|
188
|
+
module='planning',
|
|
189
|
+
input=prompts,
|
|
190
|
+
run_id=run_id,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _on_llm_end(self, response, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
194
|
+
LangChainCallbackAdapter._record(
|
|
195
|
+
self,
|
|
196
|
+
event_type=EventType.LLM_RESPONSE,
|
|
197
|
+
agent_name='llm',
|
|
198
|
+
module='planning',
|
|
199
|
+
output=str(response)[:1000],
|
|
200
|
+
run_id=run_id,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def _on_llm_error(self, error, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
204
|
+
LangChainCallbackAdapter._record(
|
|
205
|
+
self,
|
|
206
|
+
event_type=EventType.ERROR,
|
|
207
|
+
agent_name='llm',
|
|
208
|
+
module='system',
|
|
209
|
+
error=str(error),
|
|
210
|
+
run_id=run_id,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def _on_agent_action(self, action, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
214
|
+
LangChainCallbackAdapter._record(
|
|
215
|
+
self,
|
|
216
|
+
event_type=EventType.PLAN,
|
|
217
|
+
agent_name='agent',
|
|
218
|
+
module='planning',
|
|
219
|
+
output=str(action)[:1000],
|
|
220
|
+
run_id=run_id,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def _on_agent_finish(self, finish, *, run_id, **kwargs): # type: ignore[no-untyped-def]
|
|
224
|
+
LangChainCallbackAdapter._record(
|
|
225
|
+
self,
|
|
226
|
+
event_type=EventType.OBSERVATION,
|
|
227
|
+
agent_name='agent',
|
|
228
|
+
module='planning',
|
|
229
|
+
output=str(finish)[:1000],
|
|
230
|
+
run_id=run_id,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class LangGraphAdapter:
|
|
235
|
+
"""LangGraph adapter, structural :class:`FrameworkAdapter` (Protocol)."""
|
|
236
|
+
|
|
237
|
+
framework = 'langgraph'
|
|
238
|
+
|
|
239
|
+
def instrument(self, debugger: AgentDebug) -> AdapterStatus: # noqa: D401
|
|
240
|
+
try:
|
|
241
|
+
_load_base_callback_handler()
|
|
242
|
+
except ImportError as exc:
|
|
243
|
+
return AdapterStatus(
|
|
244
|
+
framework=self.framework,
|
|
245
|
+
implemented=False,
|
|
246
|
+
notes=str(exc),
|
|
247
|
+
)
|
|
248
|
+
return AdapterStatus(
|
|
249
|
+
framework=self.framework,
|
|
250
|
+
implemented=True,
|
|
251
|
+
notes=(
|
|
252
|
+
'Create a trajectory, then pass '
|
|
253
|
+
'`LangChainCallbackAdapter(debugger, trajectory)` in '
|
|
254
|
+
"config={'callbacks': [handler]} when invoking your graph."
|
|
255
|
+
),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
_: FrameworkAdapter = LangGraphAdapter() # static structural check
|
|
260
|
+
|
|
261
|
+
__all__ = ['LangChainCallbackAdapter', 'LangGraphAdapter']
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Optional OpenTelemetry GenAI export shim.
|
|
2
|
+
|
|
3
|
+
Translates an :class:`AgentTrajectory` into OTel spans following the GenAI
|
|
4
|
+
semantic conventions (``gen_ai.*`` attributes). Best-effort: the entire module
|
|
5
|
+
is a no-op when ``opentelemetry-api`` is not installed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from agentdebug.adapters.base import AdapterStatus, FrameworkAdapter
|
|
13
|
+
from agentdebug.models import AgentEvent, AgentTrajectory, EventType
|
|
14
|
+
from agentdebug.recorder import AgentDebug
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _otel_available() -> bool:
|
|
18
|
+
try:
|
|
19
|
+
import opentelemetry # noqa: F401
|
|
20
|
+
return True
|
|
21
|
+
except ImportError:
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_EVENT_TO_OP: Dict[EventType, str] = {
|
|
26
|
+
EventType.LLM_CALL: 'chat',
|
|
27
|
+
EventType.LLM_RESPONSE: 'chat',
|
|
28
|
+
EventType.TOOL_CALL: 'execute_tool',
|
|
29
|
+
EventType.TOOL_RESULT: 'execute_tool',
|
|
30
|
+
EventType.AGENT_STEP: 'invoke_agent',
|
|
31
|
+
EventType.PLAN: 'invoke_agent',
|
|
32
|
+
EventType.REFLECTION: 'invoke_agent',
|
|
33
|
+
EventType.HANDOFF: 'handoff',
|
|
34
|
+
EventType.MEMORY_READ: 'retrieve_context',
|
|
35
|
+
EventType.MEMORY_WRITE: 'retrieve_context',
|
|
36
|
+
EventType.RUN_START: 'invoke_workflow',
|
|
37
|
+
EventType.RUN_END: 'invoke_workflow',
|
|
38
|
+
EventType.OBSERVATION: 'invoke_agent',
|
|
39
|
+
EventType.ERROR: 'invoke_agent',
|
|
40
|
+
EventType.GUARDRAIL: 'invoke_agent',
|
|
41
|
+
EventType.HUMAN_FEEDBACK: 'invoke_agent',
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def export_trajectory(
|
|
46
|
+
trajectory: AgentTrajectory,
|
|
47
|
+
*,
|
|
48
|
+
tracer_name: str = 'agentdebug',
|
|
49
|
+
tracer_provider: Optional[Any] = None,
|
|
50
|
+
) -> int:
|
|
51
|
+
"""Emit OTel spans for a finished trajectory. Returns the number of spans emitted.
|
|
52
|
+
|
|
53
|
+
Returns 0 (and emits nothing) when OpenTelemetry is not installed.
|
|
54
|
+
"""
|
|
55
|
+
if not _otel_available():
|
|
56
|
+
return 0
|
|
57
|
+
from opentelemetry import trace as ot_trace
|
|
58
|
+
|
|
59
|
+
tracer = (tracer_provider or ot_trace.get_tracer_provider()).get_tracer(tracer_name)
|
|
60
|
+
emitted = 0
|
|
61
|
+
with tracer.start_as_current_span(
|
|
62
|
+
f'agentdebug.run/{trajectory.framework or "unknown"}',
|
|
63
|
+
attributes=_root_attributes(trajectory),
|
|
64
|
+
) as root:
|
|
65
|
+
_set_attr(root, 'agentdebug.trace_id', trajectory.trace_id)
|
|
66
|
+
for event in trajectory.events:
|
|
67
|
+
_emit_event_span(tracer, event)
|
|
68
|
+
emitted += 1
|
|
69
|
+
return emitted
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _emit_event_span(tracer: Any, event: AgentEvent) -> None:
|
|
73
|
+
op = _EVENT_TO_OP.get(event.event_type, 'invoke_agent')
|
|
74
|
+
with tracer.start_as_current_span(
|
|
75
|
+
f'agentdebug.event/{_event_type_value(event.event_type)}',
|
|
76
|
+
attributes=_event_attributes(event, op),
|
|
77
|
+
) as span:
|
|
78
|
+
if event.error:
|
|
79
|
+
_set_attr(span, 'error.message', str(event.error))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _root_attributes(trajectory: AgentTrajectory) -> Dict[str, Any]:
|
|
83
|
+
attrs: Dict[str, Any] = {
|
|
84
|
+
'gen_ai.operation.name': 'invoke_workflow',
|
|
85
|
+
'agentdebug.trace_id': trajectory.trace_id,
|
|
86
|
+
'agentdebug.framework': trajectory.framework or '',
|
|
87
|
+
}
|
|
88
|
+
if trajectory.goal:
|
|
89
|
+
attrs['agentdebug.goal'] = trajectory.goal
|
|
90
|
+
if trajectory.task_id:
|
|
91
|
+
attrs['agentdebug.task_id'] = trajectory.task_id
|
|
92
|
+
return attrs
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _event_attributes(event: AgentEvent, op: str) -> Dict[str, Any]:
|
|
96
|
+
attrs: Dict[str, Any] = {
|
|
97
|
+
'gen_ai.operation.name': op,
|
|
98
|
+
'agentdebug.event_id': event.event_id,
|
|
99
|
+
'agentdebug.event_type': _event_type_value(event.event_type),
|
|
100
|
+
'agentdebug.agent.name': event.agent_name,
|
|
101
|
+
}
|
|
102
|
+
if event.module is not None:
|
|
103
|
+
attrs['agentdebug.module'] = event.module
|
|
104
|
+
if event.step_index is not None:
|
|
105
|
+
attrs['agentdebug.step_index'] = event.step_index
|
|
106
|
+
if event.duration_ms is not None:
|
|
107
|
+
attrs['agentdebug.duration_ms'] = event.duration_ms
|
|
108
|
+
if event.error:
|
|
109
|
+
attrs['error.type'] = 'AgentEventError'
|
|
110
|
+
return attrs
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _set_attr(span: Any, key: str, value: Any) -> None:
|
|
114
|
+
setter = getattr(span, 'set_attribute', None)
|
|
115
|
+
if callable(setter):
|
|
116
|
+
setter(key, value)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _event_type_value(event_type: EventType) -> str:
|
|
120
|
+
value = getattr(event_type, 'value', event_type)
|
|
121
|
+
if isinstance(value, str):
|
|
122
|
+
return value
|
|
123
|
+
return str(value)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class OTelExportAdapter:
|
|
127
|
+
"""OTel export adapter, structural :class:`FrameworkAdapter` (not subclassed)."""
|
|
128
|
+
|
|
129
|
+
framework = 'otel'
|
|
130
|
+
|
|
131
|
+
def instrument(self, debugger: AgentDebug) -> AdapterStatus: # noqa: D401
|
|
132
|
+
if _otel_available():
|
|
133
|
+
return AdapterStatus(
|
|
134
|
+
framework=self.framework,
|
|
135
|
+
implemented=True,
|
|
136
|
+
notes=(
|
|
137
|
+
'OpenTelemetry detected. Call '
|
|
138
|
+
'agentdebug.adapters.otel.export_trajectory(trajectory) '
|
|
139
|
+
'to emit gen_ai.* spans.'
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
return AdapterStatus(
|
|
143
|
+
framework=self.framework,
|
|
144
|
+
implemented=False,
|
|
145
|
+
notes='Install `opentelemetry-api` and `opentelemetry-sdk` to enable.',
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
_: FrameworkAdapter = OTelExportAdapter() # static structural check
|
|
150
|
+
|
|
151
|
+
__all__ = ['OTelExportAdapter', 'export_trajectory']
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Adapter for raw / vanilla ReAct loops.
|
|
2
|
+
|
|
3
|
+
For users who write a ``while not done: ...`` loop against the OpenAI or
|
|
4
|
+
Anthropic SDK directly (no framework), this adapter exposes:
|
|
5
|
+
|
|
6
|
+
* :func:`trace_loop` — decorator that wraps a function in a :class:`TraceSession`
|
|
7
|
+
and exposes the active session via :func:`current_session`.
|
|
8
|
+
* :func:`mark_step` — record a step boundary inside the wrapped function.
|
|
9
|
+
|
|
10
|
+
This is the lowest-overhead integration path: no framework dependency, no
|
|
11
|
+
monkey-patching.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import contextvars
|
|
17
|
+
import time
|
|
18
|
+
from functools import wraps
|
|
19
|
+
from typing import Any, Callable, Optional, TypeVar, cast
|
|
20
|
+
|
|
21
|
+
from agentdebug.adapters.base import AdapterStatus, FrameworkAdapter
|
|
22
|
+
from agentdebug.models import EventType
|
|
23
|
+
from agentdebug.recorder import AgentDebug, TraceSession
|
|
24
|
+
|
|
25
|
+
_current_session: contextvars.ContextVar[Optional[TraceSession]] = contextvars.ContextVar(
|
|
26
|
+
'agentdebug_current_session', default=None
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
F = TypeVar('F', bound=Callable[..., Any])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def current_session() -> Optional[TraceSession]:
|
|
33
|
+
"""Return the active TraceSession in this context, if any."""
|
|
34
|
+
return _current_session.get()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def trace_loop(
|
|
38
|
+
debugger: AgentDebug,
|
|
39
|
+
*,
|
|
40
|
+
goal: Optional[str] = None,
|
|
41
|
+
framework: str = 'raw',
|
|
42
|
+
task_id: Optional[str] = None,
|
|
43
|
+
success_on_return: bool = True,
|
|
44
|
+
) -> Callable[[F], F]:
|
|
45
|
+
"""Wrap a function so its execution becomes one trajectory.
|
|
46
|
+
|
|
47
|
+
Inside the wrapped function, use :func:`current_session` to grab the active
|
|
48
|
+
:class:`TraceSession` for direct ``trace.record(...)`` calls, or use
|
|
49
|
+
:func:`mark_step` for the common case.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def decorator(func: F) -> F:
|
|
53
|
+
@wraps(func)
|
|
54
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
55
|
+
run_goal = goal or func.__name__
|
|
56
|
+
session = debugger.trace(goal=run_goal, framework=framework, task_id=task_id)
|
|
57
|
+
token = _current_session.set(session)
|
|
58
|
+
with session:
|
|
59
|
+
try:
|
|
60
|
+
result = func(*args, **kwargs)
|
|
61
|
+
except Exception:
|
|
62
|
+
raise
|
|
63
|
+
finally:
|
|
64
|
+
_current_session.reset(token)
|
|
65
|
+
# If the function returned normally, session.__exit__ already marked success.
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
return cast(F, wrapper)
|
|
69
|
+
|
|
70
|
+
return decorator
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def mark_step(
|
|
74
|
+
*,
|
|
75
|
+
event_type: EventType = EventType.AGENT_STEP,
|
|
76
|
+
agent_name: str = 'agent',
|
|
77
|
+
step_index: Optional[int] = None,
|
|
78
|
+
module: Optional[str] = None,
|
|
79
|
+
input: Any = None,
|
|
80
|
+
output: Any = None,
|
|
81
|
+
error: Optional[str] = None,
|
|
82
|
+
duration_ms: Optional[float] = None,
|
|
83
|
+
**metadata: Any,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Record a step boundary on the current session (no-op if none active)."""
|
|
86
|
+
session = current_session()
|
|
87
|
+
if session is None:
|
|
88
|
+
return
|
|
89
|
+
session.record(
|
|
90
|
+
event_type=event_type,
|
|
91
|
+
agent_name=agent_name,
|
|
92
|
+
step_index=step_index,
|
|
93
|
+
module=module,
|
|
94
|
+
input=input,
|
|
95
|
+
output=output,
|
|
96
|
+
error=error,
|
|
97
|
+
duration_ms=duration_ms,
|
|
98
|
+
**metadata,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class RawLoopAdapter:
|
|
103
|
+
"""Implements :class:`FrameworkAdapter` via duck-typing (not subclassed because
|
|
104
|
+
``FrameworkAdapter`` is a ``Protocol``)."""
|
|
105
|
+
|
|
106
|
+
framework = 'raw'
|
|
107
|
+
|
|
108
|
+
def instrument(self, debugger: AgentDebug) -> AdapterStatus: # noqa: D401
|
|
109
|
+
# Nothing to attach globally — users opt in by decorating their loop.
|
|
110
|
+
return AdapterStatus(
|
|
111
|
+
framework=self.framework,
|
|
112
|
+
implemented=True,
|
|
113
|
+
notes=(
|
|
114
|
+
'Apply @agentdebug.adapters.raw.trace_loop(debugger) to your '
|
|
115
|
+
'agent loop and call mark_step(...) inside it.'
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_: FrameworkAdapter = RawLoopAdapter() # static structural check
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def timed_block_ms(start: float) -> float:
|
|
124
|
+
"""Helper for adapters: ms since a perf_counter start."""
|
|
125
|
+
return (time.perf_counter() - start) * 1000.0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
__all__ = [
|
|
129
|
+
'RawLoopAdapter',
|
|
130
|
+
'current_session',
|
|
131
|
+
'mark_step',
|
|
132
|
+
'timed_block_ms',
|
|
133
|
+
'trace_loop',
|
|
134
|
+
]
|