agentdebugx 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/PKG-INFO +2 -1
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/21_integrations.md +56 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/22_industry_track_paper_eval_plan.md +25 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/23_status_v0_2.md +6 -2
- agentdebugx-0.2.5/docs/benchmarks/e2e_v0_2_4.md +365 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/pyproject.toml +3 -1
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/__init__.py +1 -1
- agentdebugx-0.2.5/src/agentdebug/adapters/crewai.py +233 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/cli.py +6 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/LICENSE +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/README.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/00_overview.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/01_literature_survey.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/02_architecture.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/03_taxonomy.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/04_trace_schema.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/05_adapters.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/06_detectors.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/07_attribution.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/08_recovery.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/09_error_database.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/10_taxonomy_induction.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/11_multimodal.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/12_ui_dashboard.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/13_class_design.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/14_api_reference.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/15_roadmap.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/16_governance.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/17_claude_code_design_patterns.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/18_comparison_codex_vs_design.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/19_error_hub.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/20_deep_debug.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/ERROR_TAXONOMY.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/OPEN_SOURCE_DEVELOPMENT_PLAN.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/README.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/RESEARCH_SURVEY.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/benchmarks/e2e_v0_2_3.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/benchmarks/v0_1_smoke.json +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/docs/benchmarks/v0_1_smoke.md +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/adapters/__init__.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/adapters/base.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/adapters/langgraph.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/adapters/otel.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/adapters/raw.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/analyzers.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/attribution.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/deep.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/detectors.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/events.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/hub/__init__.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/hub/backend_base.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/hub/backends.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/hub/bundle.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/hub/scrub.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/instrumentation.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/integrations/__init__.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/integrations/claude_skill.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/integrations/openhands.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/judges.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/llm.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/models.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/recorder.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/recovery.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/storage.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/taxonomy.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/traceback.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/ui/__init__.py +0 -0
- {agentdebugx-0.2.4 → agentdebugx-0.2.5}/src/agentdebug/ui/server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentdebugx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: Portable error analysis, tracing, and recovery framework for agentic AI systems. Import as `agentdebug`.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -26,6 +26,7 @@ Classifier: Topic :: Software Development :: Quality Assurance
|
|
|
26
26
|
Classifier: Topic :: System :: Monitoring
|
|
27
27
|
Classifier: Typing :: Typed
|
|
28
28
|
Provides-Extra: all
|
|
29
|
+
Provides-Extra: crewai
|
|
29
30
|
Provides-Extra: hub-hf
|
|
30
31
|
Provides-Extra: langgraph
|
|
31
32
|
Provides-Extra: openhands
|
|
@@ -58,6 +58,62 @@ command:
|
|
|
58
58
|
| "I need a thorough postmortem" | `agentdebug deep <file|trace_id>` |
|
|
59
59
|
| "share this with the team" | `agentdebug hub push <trace_id> --to git:...` |
|
|
60
60
|
|
|
61
|
+
## 1.5 CrewAI integration
|
|
62
|
+
|
|
63
|
+
CrewAI emits a typed event stream via a process-global
|
|
64
|
+
`crewai.events.crewai_event_bus`. `agentdebug.adapters.crewai` ships two
|
|
65
|
+
pieces:
|
|
66
|
+
|
|
67
|
+
- `CrewAIBridge(debugger, trajectory)` — context manager that subscribes
|
|
68
|
+
to the bus and translates events into AgentDebug records.
|
|
69
|
+
- `CrewAIAdapter().instrument(debugger)` — used by `agentdebug doctor` to
|
|
70
|
+
report whether CrewAI is importable.
|
|
71
|
+
|
|
72
|
+
### Recording a Crew run
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from agentdebug import AgentDebug, SQLiteTraceStore
|
|
76
|
+
from agentdebug.adapters.crewai import CrewAIBridge
|
|
77
|
+
|
|
78
|
+
debugger = AgentDebug(store=SQLiteTraceStore('.agentdebug/errors.sqlite'))
|
|
79
|
+
trajectory = debugger.start_trace(goal='build a marketing plan', framework='crewai')
|
|
80
|
+
|
|
81
|
+
with CrewAIBridge(debugger, trajectory):
|
|
82
|
+
crew.kickoff(inputs={...}) # standard CrewAI call
|
|
83
|
+
|
|
84
|
+
debugger.finish_trace(trajectory, success=True)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### What gets recorded
|
|
88
|
+
|
|
89
|
+
| CrewAI event class | AgentDebug `EventType` | Module |
|
|
90
|
+
|---|---|---|
|
|
91
|
+
| `CrewKickoffStartedEvent` | `AGENT_STEP` | planning |
|
|
92
|
+
| `CrewKickoffCompletedEvent` | `OBSERVATION` | planning |
|
|
93
|
+
| `TaskStartedEvent` | `PLAN` | planning |
|
|
94
|
+
| `TaskCompletedEvent` | `OBSERVATION` | planning |
|
|
95
|
+
| `AgentExecutionStartedEvent` | `AGENT_STEP` | planning |
|
|
96
|
+
| `AgentExecutionCompletedEvent` | `OBSERVATION` | reflection |
|
|
97
|
+
| `LLMCallStartedEvent` | `LLM_CALL` | planning |
|
|
98
|
+
| `LLMCallCompletedEvent` | `LLM_RESPONSE` | planning |
|
|
99
|
+
| `ToolUsageStartedEvent` | `TOOL_CALL` | action |
|
|
100
|
+
| `ToolUsageFinishedEvent` | `TOOL_RESULT` | action |
|
|
101
|
+
| `ToolUsageErrorEvent` | `TOOL_RESULT` (with `error`) | action |
|
|
102
|
+
|
|
103
|
+
Event class names not present in the installed CrewAI version are silently
|
|
104
|
+
skipped — useful for forward/backward compatibility.
|
|
105
|
+
|
|
106
|
+
### Install
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pip install 'agentdebugx[crewai]'
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### See also
|
|
113
|
+
|
|
114
|
+
`examples/crewai_demo.py` shows a complete two-agent crew (researcher +
|
|
115
|
+
editor) instrumented with `CrewAIBridge`.
|
|
116
|
+
|
|
61
117
|
## 2. OpenHands integration
|
|
62
118
|
|
|
63
119
|
`agentdebug.integrations.openhands` ships two complementary pieces.
|
|
@@ -233,3 +233,28 @@ Storage/export:
|
|
|
233
233
|
4. Run rule, judge, All-at-Once, and DeepDebug on the same split.
|
|
234
234
|
5. Add a small human/expert review with raw trace vs paired trace view.
|
|
235
235
|
6. Trim main body to 6 pages while keeping appendix rich.
|
|
236
|
+
|
|
237
|
+
## 9. Implemented Experiment Harness
|
|
238
|
+
|
|
239
|
+
The repository now contains an `experiments/` directory for the first paper
|
|
240
|
+
pipeline:
|
|
241
|
+
|
|
242
|
+
- `prepare_who_when.py`: downloads the two public Who&When parquet files from
|
|
243
|
+
Hugging Face and normalizes them into `AgentTrajectory` JSONL plus separate
|
|
244
|
+
gold labels. On 2026-05-16 this produced 184 traces and 4,092 events.
|
|
245
|
+
- `run_e2e_smoke.py`: exercises the public AgentDebugX modules end to end:
|
|
246
|
+
analyzer, detectors, traceback, recoverers, attribution, and Error Hub local
|
|
247
|
+
push/pull. It can optionally call a live OpenAI-compatible endpoint.
|
|
248
|
+
- `run_who_when_eval.py`: runs attribution metrics against Who&When labels,
|
|
249
|
+
including responsible-agent match, exact step match, +/-1 step match, and
|
|
250
|
+
joint agent/step accuracy.
|
|
251
|
+
- `generate_paper_figures_openai.py`: generates the optional paper pipeline
|
|
252
|
+
figure with `gpt-image-2` when `OPENAI_API_KEY` is available.
|
|
253
|
+
|
|
254
|
+
The AgentDebug / AgentErrorBench dataset is currently linked from the upstream
|
|
255
|
+
paper repository as a Google Drive folder rather than a stable direct download
|
|
256
|
+
URL. `prepare_agenterrorbench.py` accepts a local extracted folder under
|
|
257
|
+
`data/agenterrorbench/raw/`, can attempt `gdown` download when the optional
|
|
258
|
+
dependency is installed, preserves environment metadata for ALFWorld, GAIA, and
|
|
259
|
+
WebShop, and outputs the same `AgentTrajectory` + labels format used by the
|
|
260
|
+
Who&When loader.
|
|
@@ -142,8 +142,12 @@ Before v0.3 ships, this doc should record green checkmarks for:
|
|
|
142
142
|
default verifier templates (JSON-schema guard, final-state check,
|
|
143
143
|
tool-result type-check, handoff contract, loop-detector guard) and
|
|
144
144
|
emits per-finding `FixProposal` with rationale + suggested code.
|
|
145
|
-
- [
|
|
146
|
-
|
|
145
|
+
- [x] **One additional framework adapter** — CrewAI adapter shipped in
|
|
146
|
+
0.2.5 (`agentdebug.adapters.crewai`). `CrewAIBridge` context manager
|
|
147
|
+
subscribes to `crewai_event_bus`, translates 11 CrewAI event types
|
|
148
|
+
into `AgentEvent`s. Conformance test mocks the bus and verifies
|
|
149
|
+
every documented event mapping plus the version-skew degradation
|
|
150
|
+
path. `examples/crewai_demo.py` shows a working two-agent crew.
|
|
147
151
|
- [ ] HuggingFace Hub round-trip live test (gated on `HF_TOKEN`).
|
|
148
152
|
- [ ] Bench harness extended with one published-benchmark loader (Who&When
|
|
149
153
|
is the obvious first target — we already cite its method).
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
# AgentDebugX v0.2.3 End-to-End Real-Usage Smoke
|
|
2
|
+
|
|
3
|
+
Scenarios: **3**. LLM model: `gemini-3-flash`. Generated by `scripts/e2e_real_usage.py`.
|
|
4
|
+
|
|
5
|
+
## Per-scenario pipeline status
|
|
6
|
+
|
|
7
|
+
| Scenario | trace_id | OK / Total stages | Failed stages |
|
|
8
|
+
|---|---|---|---|
|
|
9
|
+
| `action_format_then_hallucination` | `trace_cde22f…` | 12 / 12 | — |
|
|
10
|
+
| `multiagent_handoff_loss` | `trace_84fb3a…` | 12 / 12 | — |
|
|
11
|
+
| `planning_loop` | `trace_c59dc1…` | 12 / 12 | — |
|
|
12
|
+
|
|
13
|
+
**UI smoke:** ✅ all endpoints responded
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
GET /healthz -> 200 {"status":"ok"}
|
|
17
|
+
GET /api/v1/traces -> 5 trace(s)
|
|
18
|
+
GET /api/v1/traces/<id> -> 200 events=11 findings=4
|
|
19
|
+
GET /api/v1/taxonomy -> modes=19
|
|
20
|
+
GET / -> 200 content_length=33710 has_brand=True
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## `action_format_then_hallucination`
|
|
24
|
+
|
|
25
|
+
`trace_id=trace_cde22f0eb1ec499aa3bc5b88f472257d`
|
|
26
|
+
|
|
27
|
+
### ✅ `heuristic_analyzer` (0.00s) — 1 finding(s); root=2
|
|
28
|
+
### ✅ `cross_event_detectors` (0.00s) — 0 finding(s) from default_detectors()
|
|
29
|
+
### ✅ `traceback_offline` (0.00s) — rendered
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
33
|
+
trace_id=trace_cde22f0eb1ec499aa3bc5b88f472257d framework=e2e-react goal='Find the latest AgentDebug paper, summarize the method, then email alice@example.com'
|
|
34
|
+
|
|
35
|
+
File "root cause", in trajectory
|
|
36
|
+
Step 2 agent=search_web mode=system.tool_execution_error confidence=0.86
|
|
37
|
+
event_id=evt_1eb26b89fce447fa8d9d908b50741ac3
|
|
38
|
+
error> JSON schema validation failed: missing parameter query
|
|
39
|
+
evidence:
|
|
40
|
+
- JSON schema validation failed: missing parameter query
|
|
41
|
+
suggested: Capture tool stderr/status/latency and classify retryable versus non-retryable failures.
|
|
42
|
+
|
|
43
|
+
AgentFailure[system.tool_execution_error]: Likely root cause: Tool execution error in search_web at step 2.
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### ✅ `reflexion_suggestion` (0.00s) — 1 proposal(s)
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
Reflexion retry hint for system.tool_execution_error at step 2
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### ✅ `critic_recoverer` (0.00s) — 1 verifier proposal(s)
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Add tool_result_typecheck before system.tool_execution_error (step 2, agent search_web)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### ✅ `llm_judge` (14.31s) — 3 finding(s); root=1
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
- action.parameter_error (conf=1.00) step=1 agent=search_web
|
|
62
|
+
- verification.premature_stop (conf=1.00) step=4 agent=planner
|
|
63
|
+
- verification.missing_task_validation (conf=0.90) step=None agent=system
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### ✅ `attribute_heuristic` (0.00s) — method=heuristic agent=search_web step=1 conf=1.00
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
Earliest finding with non-trivial confidence: Parameter error
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### ✅ `attribute_all_at_once` (7.52s) — method=all_at_once agent=search_web step=1 conf=0.90
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
The root cause is the malformed tool call in step 1, where the search_web tool was invoked without the required 'query' parameter, leading to a validation error that derailed the entire process.
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### ✅ `attribute_step_by_step` (23.64s) — method=step_by_step agent=search_web step=1 conf=1.00
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
The agent invoked the search tool without providing a search query, which is a parameter error that prevents the retrieval of the required information.
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### ✅ `attribute_binary_search` (8.01s) — method=binary_search agent=search_web step=2 conf=0.80
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
Binary search located the decisive step within 2 probes over 6 events.
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### ✅ `deep_debug` (28.00s) — 3 finding(s); rounds=6
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
rounds: plan:4588ms / hypothesize:8051ms / verify:h1:2874ms / verify:h2:2392ms / verify:h3:2514ms / refine:7581ms
|
|
94
|
+
summary: The agent failed to provide the required 'query' parameter when calling the search tool, and the planner subsequently ignored this failure, incorrectly claiming the task was complete without summarizing the paper or sending the email.
|
|
95
|
+
|
|
96
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
97
|
+
trace_id=trace_cde22f0eb1ec499aa3bc5b88f472257d framework=e2e-react goal='Find the latest AgentDebug paper, summarize the method, then email alice@example.com'
|
|
98
|
+
|
|
99
|
+
File "root cause", in trajectory
|
|
100
|
+
Step 2 agent=search_web mode=action.parameter_error confidence=1.00
|
|
101
|
+
event_id=evt_1eb26b89fce447fa8d9d908b50741ac3
|
|
102
|
+
error> JSON schema validation failed: missing parameter query
|
|
103
|
+
evidence:
|
|
104
|
+
- JSON schema validation failed: missing parameter query
|
|
105
|
+
- kwargs='{}'
|
|
106
|
+
suggested: Validate parameters against tool schemas and ask for missing user/context fields.
|
|
107
|
+
↓ cascaded to
|
|
108
|
+
File "cascade depth 1", in trajectory
|
|
109
|
+
Step 4 agent=planner mode=reflection.progress_misjudge confidence=1.00
|
|
110
|
+
module=reflection
|
|
111
|
+
event_id=evt_2b76fd64970544908fdb65eee79bbd9d
|
|
112
|
+
output> Final answer: AgentDebug is a popular paper. Done.
|
|
113
|
+
evidence:
|
|
114
|
+
- Final answer: AgentDebug is a popular paper. Done.
|
|
115
|
+
- meta={'metadata': {'final_answer': True}}
|
|
116
|
+
suggested: Add an external task verifier before termination.
|
|
117
|
+
↓ cascaded to
|
|
118
|
+
File "cascade depth 2", in trajectory
|
|
119
|
+
Step ? agent=system mode=verification.missing_task_validation confidence=1.00
|
|
120
|
+
event_id=evt_15fe36c6a23247cea25efb4a34fccedf
|
|
121
|
+
evidence:
|
|
122
|
+
- meta={'success': True}
|
|
123
|
+
suggested: Add final-state validation that is independent of the acting agent.
|
|
124
|
+
|
|
125
|
+
AgentFailure[verification.missing_task_validation]: The agent failed to provide the required 'query' parameter when calling the search tool, and the planner subsequently ignored this failure, incorrectly claiming the task was complete without summarizing the paper or sending the email.
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### ✅ `hub_round_trip` (0.00s) — pushed=/home/kunlunz2/AgentDebugX/.agentdebug/e2e_hub/bundle_e79cadee55524420ba4719ecd2934536 ; bundle_id=bundle_e79cadee55524420ba4719ecd2934536 ; listed=1 ; round-trip ok
|
|
129
|
+
|
|
130
|
+
## `multiagent_handoff_loss`
|
|
131
|
+
|
|
132
|
+
`trace_id=trace_84fb3ac2083c449b8f2ca80503861dc5`
|
|
133
|
+
|
|
134
|
+
### ✅ `heuristic_analyzer` (0.00s) — 1 finding(s); root=2
|
|
135
|
+
### ✅ `cross_event_detectors` (0.00s) — 0 finding(s) from default_detectors()
|
|
136
|
+
### ✅ `traceback_offline` (0.00s) — rendered
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
140
|
+
trace_id=trace_84fb3ac2083c449b8f2ca80503861dc5 framework=e2e-multiagent goal='Find the best paper on agent debugging, prefer the most recent.'
|
|
141
|
+
|
|
142
|
+
File "root cause", in trajectory
|
|
143
|
+
Step 2 agent=researcher mode=multiagent.handoff_loss confidence=0.70
|
|
144
|
+
module=multiagent
|
|
145
|
+
event_id=evt_800ad2ca86644768b07990f0af08e1b6
|
|
146
|
+
output> Please summarize the agent debugging paper.
|
|
147
|
+
evidence:
|
|
148
|
+
- handoff/context signal in event payload
|
|
149
|
+
suggested: Make handoff payloads typed and include goal, constraints, evidence, confidence, and open questions.
|
|
150
|
+
|
|
151
|
+
AgentFailure[multiagent.handoff_loss]: Likely root cause: Handoff context loss in researcher at step 2.
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### ✅ `reflexion_suggestion` (0.00s) — 1 proposal(s)
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
Reflexion retry hint for multiagent.handoff_loss at step 2
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### ✅ `critic_recoverer` (0.00s) — 1 verifier proposal(s)
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
Add handoff_context_contract before multiagent.handoff_loss (step 2, agent researcher)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### ✅ `llm_judge` (9.36s) — 2 finding(s); root=2
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
- multiagent.handoff_loss (conf=1.00) step=2 agent=researcher
|
|
170
|
+
- verification.missing_task_validation (conf=0.90) step=None agent=system
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### ✅ `attribute_heuristic` (0.00s) — method=heuristic agent=researcher step=2 conf=1.00
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
Earliest finding with non-trivial confidence: Handoff context loss
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### ✅ `attribute_all_at_once` (3.72s) — method=all_at_once agent=researcher step=2 conf=1.00
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
The researcher correctly identified Paper A as the most recent in step 1 but failed to specify this choice or the recency constraint during the handoff in step 2, leading the summarizer to pick the wrong paper.
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### ✅ `attribute_step_by_step` (21.99s) — method=step_by_step agent=researcher step=2 conf=1.00
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
The researcher failed to specify which paper to summarize during the handoff, losing the context of the 'most recent' paper identified in the previous step.
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### ✅ `attribute_binary_search` (16.41s) — method=binary_search agent=summarizer step=4 conf=0.90
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
Binary search located the decisive step within 3 probes over 6 events.
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### ✅ `deep_debug` (45.65s) — 3 finding(s); rounds=6
|
|
198
|
+
|
|
199
|
+
```
|
|
200
|
+
rounds: plan:4990ms / hypothesize:7110ms / verify:h1:2939ms / verify:h2:2320ms / verify:h3:5377ms / refine:22907ms
|
|
201
|
+
summary: The researcher agent hallucinated finding specific papers without performing a search and subsequently failed to communicate the paper selection or user constraints to the summarizer, leading to a total failure of the summarization step.
|
|
202
|
+
|
|
203
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
204
|
+
trace_id=trace_84fb3ac2083c449b8f2ca80503861dc5 framework=e2e-multiagent goal='Find the best paper on agent debugging, prefer the most recent.'
|
|
205
|
+
|
|
206
|
+
File "root cause", in trajectory
|
|
207
|
+
Step 1 agent=researcher mode=memory.hallucination confidence=0.95
|
|
208
|
+
module=planning
|
|
209
|
+
event_id=evt_a4f22a565af84663bab9f0460b122ea8
|
|
210
|
+
output> Found two candidate papers: A (May 2025) and B (Mar 2024). A is preferred because it is more recent (per user constraint).
|
|
211
|
+
evidence:
|
|
212
|
+
- Found two candidate papers: A (May 2025) and B (Mar 2024)
|
|
213
|
+
- The trajectory shows a premature handoff... without any evidence of search or data retrieval occurring.
|
|
214
|
+
suggested: Require memory reads to cite the source event or artifact before use.
|
|
215
|
+
↓ cascaded to
|
|
216
|
+
File "cascade depth 1", in trajectory
|
|
217
|
+
Step 2 agent=researcher mode=multiagent.handoff_loss confidence=1.00
|
|
218
|
+
module=multiagent
|
|
219
|
+
event_id=evt_800ad2ca86644768b07990f0af08e1b6
|
|
220
|
+
output> Please summarize the agent debugging paper.
|
|
221
|
+
evidence:
|
|
222
|
+
- Please summarize the agent debugging paper.
|
|
223
|
+
- omitted_context: 'preference for A; recency constraint'
|
|
224
|
+
suggested: Make handoff payloads typed and include goal, constraints, evidence, confidence, and open questions.
|
|
225
|
+
↓ cascaded to
|
|
226
|
+
File "cascade depth 2", in trajectory
|
|
227
|
+
Step 3 agent=summarizer mode=memory.retrieval_failure confidence=0.90
|
|
228
|
+
module=planning
|
|
229
|
+
event_id=evt_8f20248aa2b64a6dad966bd9f336cc77
|
|
230
|
+
input> Please summarize the agent debugging paper.
|
|
231
|
+
evidence:
|
|
232
|
+
- input=Please summarize the agent debugging paper.
|
|
233
|
+
- output=None
|
|
234
|
+
- omitted_context: 'preference for A; recency constraint'
|
|
235
|
+
suggested: Persist the missing state as structured memory and attach it to the next planning step.
|
|
236
|
+
|
|
237
|
+
AgentFailure[memory.retrieval_failure]: The researcher agent hallucinated finding specific papers without performing a search and subsequently failed to communicate the paper selection or user constraints to the summarizer, leading to a total failure of the summarization step.
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### ✅ `hub_round_trip` (0.00s) — pushed=/home/kunlunz2/AgentDebugX/.agentdebug/e2e_hub/bundle_27e79ad1f5454df4b89f9f7fe75bab3e ; bundle_id=bundle_27e79ad1f5454df4b89f9f7fe75bab3e ; listed=3 ; round-trip ok
|
|
241
|
+
|
|
242
|
+
## `planning_loop`
|
|
243
|
+
|
|
244
|
+
`trace_id=trace_c59dc17f26994841ad361176ddf6b7c0`
|
|
245
|
+
|
|
246
|
+
### ✅ `heuristic_analyzer` (0.00s) — 4 finding(s); root=2
|
|
247
|
+
### ✅ `cross_event_detectors` (0.00s) — 3 finding(s) from default_detectors()
|
|
248
|
+
|
|
249
|
+
```
|
|
250
|
+
- planning.inefficient_plan (source=repeated_tool_call)
|
|
251
|
+
- planning.inefficient_plan (source=repeated_state)
|
|
252
|
+
- planning.inefficient_plan (source=repeated_state)
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### ✅ `traceback_offline` (0.00s) — rendered
|
|
256
|
+
|
|
257
|
+
```
|
|
258
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
259
|
+
trace_id=trace_c59dc17f26994841ad361176ddf6b7c0 framework=e2e-browser goal='Submit the checkout form on shop.example.com'
|
|
260
|
+
|
|
261
|
+
File "root cause", in trajectory
|
|
262
|
+
Step 2 agent=browser mode=planning.inefficient_plan confidence=0.67
|
|
263
|
+
event_id=evt_1d189d0f66de4329a6d11f567721eea4
|
|
264
|
+
output> no progress; same checkout screen
|
|
265
|
+
evidence:
|
|
266
|
+
- loop/progress signal in event payload
|
|
267
|
+
suggested: Add loop detection over tool calls and state deltas.
|
|
268
|
+
↓ cascaded to
|
|
269
|
+
File "cascade depth 1", in trajectory
|
|
270
|
+
Step 4 agent=browser mode=planning.inefficient_plan confidence=0.67
|
|
271
|
+
event_id=evt_d6e7dd127fab458896b58bae5cac1954
|
|
272
|
+
output> no progress; same checkout screen
|
|
273
|
+
evidence:
|
|
274
|
+
- loop/progress signal in event payload
|
|
275
|
+
suggested: Add loop detection over tool calls and state deltas.
|
|
276
|
+
↓ cascaded to
|
|
277
|
+
File "cascade depth 2", in trajectory
|
|
278
|
+
Step 6 agent=browser mode=planning.inefficient_plan confidence=0.67
|
|
279
|
+
event_id=evt_c92a15e9cc894bffaff61ec5f6169150
|
|
280
|
+
output> no progress; same checkout screen
|
|
281
|
+
evidence:
|
|
282
|
+
- loop/progress signal in event payload
|
|
283
|
+
suggested: Add loop detection over tool calls and state deltas.
|
|
284
|
+
↓ cascaded to
|
|
285
|
+
File "cascade depth 3", in trajectory
|
|
286
|
+
Step 8 agent=browser mode=planning.inefficient_plan confidence=0.67
|
|
287
|
+
event_id=evt_df07344c3e9e4f7fba9cb3df92a73bfd
|
|
288
|
+
output> no progress; same checkout screen
|
|
289
|
+
evidence:
|
|
290
|
+
- loop/progress signal in event payload
|
|
291
|
+
suggested: Add loop detection over tool calls and state deltas.
|
|
292
|
+
|
|
293
|
+
AgentFailure[planning.inefficient_plan]: Likely root cause: Inefficient plan in browser at step 2.
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### ✅ `reflexion_suggestion` (0.00s) — 4 proposal(s)
|
|
297
|
+
|
|
298
|
+
```
|
|
299
|
+
Reflexion retry hint for planning.inefficient_plan at step 2
|
|
300
|
+
Reflexion retry hint for planning.inefficient_plan at step 4
|
|
301
|
+
Reflexion retry hint for planning.inefficient_plan at step 6
|
|
302
|
+
Reflexion retry hint for planning.inefficient_plan at step 8
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### ✅ `critic_recoverer` (0.00s) — 4 verifier proposal(s)
|
|
306
|
+
|
|
307
|
+
```
|
|
308
|
+
Add loop_detector_guard before planning.inefficient_plan (step 2, agent browser)
|
|
309
|
+
Add loop_detector_guard before planning.inefficient_plan (step 4, agent browser)
|
|
310
|
+
Add loop_detector_guard before planning.inefficient_plan (step 6, agent browser)
|
|
311
|
+
Add loop_detector_guard before planning.inefficient_plan (step 8, agent browser)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### ✅ `llm_judge` (23.10s) — 0 finding(s); root=None
|
|
315
|
+
### ✅ `attribute_heuristic` (0.00s) — method=heuristic (no hypotheses)
|
|
316
|
+
### ✅ `attribute_all_at_once` (5.36s) — method=all_at_once agent=planner step=1 conf=0.90
|
|
317
|
+
|
|
318
|
+
```
|
|
319
|
+
The planner established a flawed strategy of repeatedly clicking the submit button without any logic to handle potential form validation errors or investigate why the submission was failing.
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### ✅ `attribute_step_by_step` (57.37s) — method=step_by_step agent=planner step=1 conf=0.90
|
|
323
|
+
|
|
324
|
+
```
|
|
325
|
+
The planner's strategy is fundamentally flawed as it attempts to click a submit button without first navigating to the website or filling out the required form fields.
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### ✅ `attribute_binary_search` (24.84s) — method=binary_search agent=browser step=4 conf=0.90
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
Binary search located the decisive step within 3 probes over 11 events.
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
### ✅ `deep_debug` (38.32s) — 2 finding(s); rounds=6
|
|
335
|
+
|
|
336
|
+
```
|
|
337
|
+
rounds: plan:3303ms / hypothesize:9554ms / verify:h1:3739ms / verify:h2:4204ms / verify:h3:8529ms / refine:8988ms
|
|
338
|
+
summary: The agent failed to submit the checkout form because it repeatedly attempted to click the submit button without ensuring all required fields (such as address or payment details) were populated, ignoring the lack of progress between attempts.
|
|
339
|
+
|
|
340
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
341
|
+
trace_id=trace_c59dc17f26994841ad361176ddf6b7c0 framework=e2e-browser goal='Submit the checkout form on shop.example.com'
|
|
342
|
+
|
|
343
|
+
File "root cause", in trajectory
|
|
344
|
+
Step 7 agent=browser mode=planning.inefficient_plan confidence=0.95
|
|
345
|
+
event_id=evt_0b22e12ef8bb48bdad049706d6b77d33
|
|
346
|
+
input> {'tool': 'click', 'args': '()', 'kwargs': "{'selector': '#submit'}"}
|
|
347
|
+
evidence:
|
|
348
|
+
- step=5 ... click ... #submit
|
|
349
|
+
- step=7 ... click ... #submit
|
|
350
|
+
- output=no progress; same checkout screen
|
|
351
|
+
suggested: Add loop detection over tool calls and state deltas.
|
|
352
|
+
↓ cascaded to
|
|
353
|
+
File "cascade depth 1", in trajectory
|
|
354
|
+
Step 8 agent=browser mode=planning.constraint_ignorance confidence=1.00
|
|
355
|
+
event_id=evt_df07344c3e9e4f7fba9cb3df92a73bfd
|
|
356
|
+
output> no progress; same checkout screen
|
|
357
|
+
evidence:
|
|
358
|
+
- no progress; same checkout screen
|
|
359
|
+
- Strategy: click #submit until success
|
|
360
|
+
suggested: Compile task and tool constraints into pre-action checks.
|
|
361
|
+
|
|
362
|
+
AgentFailure[planning.constraint_ignorance]: The agent failed to submit the checkout form because it repeatedly attempted to click the submit button without ensuring all required fields (such as address or payment details) were populated, ignoring the lack of progress between attempts.
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
### ✅ `hub_round_trip` (0.01s) — pushed=/home/kunlunz2/AgentDebugX/.agentdebug/e2e_hub/bundle_8f2dab82d8e74638ab8656f834d353ab ; bundle_id=bundle_8f2dab82d8e74638ab8656f834d353ab ; listed=4 ; round-trip ok
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "agentdebugx"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.5"
|
|
4
4
|
description = "Portable error analysis, tracing, and recovery framework for agentic AI systems. Import as `agentdebug`."
|
|
5
5
|
authors = ["ULab @ UIUC <ulab@illinois.edu>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -52,12 +52,14 @@ httpx = ">=0.24,<1.0"
|
|
|
52
52
|
# Optional integrations — install via `pip install agentdebugx[langgraph]` etc.
|
|
53
53
|
[tool.poetry.extras]
|
|
54
54
|
langgraph = ["langchain-core"]
|
|
55
|
+
crewai = ["crewai"]
|
|
55
56
|
otel = ["opentelemetry-api", "opentelemetry-sdk"]
|
|
56
57
|
ui = ["fastapi", "uvicorn"]
|
|
57
58
|
hub-hf = ["huggingface_hub"]
|
|
58
59
|
openhands = ["openhands-ai"]
|
|
59
60
|
all = [
|
|
60
61
|
"langchain-core",
|
|
62
|
+
"crewai",
|
|
61
63
|
"opentelemetry-api",
|
|
62
64
|
"opentelemetry-sdk",
|
|
63
65
|
"fastapi",
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""CrewAI adapter — subscribe to ``crewai_event_bus`` and record into AgentDebug.
|
|
2
|
+
|
|
3
|
+
CrewAI's runtime emits typed events through a process-global event bus
|
|
4
|
+
(``crewai.events.crewai_event_bus``). The bus has ~50 event types covering
|
|
5
|
+
crew lifecycle, task lifecycle, agent execution, LLM calls, tool usage,
|
|
6
|
+
flows, memory operations, A2A messages, MCP calls, knowledge ops, and
|
|
7
|
+
guardrails. This adapter subscribes to the subset that matters for
|
|
8
|
+
debugging and translates each into an :class:`agentdebug.models.AgentEvent`.
|
|
9
|
+
|
|
10
|
+
Why a class (not a free-function listener)? CrewAI's documented gotcha is
|
|
11
|
+
that listeners must be instantiated at module level OR explicitly kept
|
|
12
|
+
alive; otherwise the GC reclaims them before the bus fires. We hide that
|
|
13
|
+
by exposing a context-manager friendly :class:`CrewAIBridge` that holds
|
|
14
|
+
the listener instance for its lifetime.
|
|
15
|
+
|
|
16
|
+
Usage::
|
|
17
|
+
|
|
18
|
+
from agentdebug import AgentDebug, SQLiteTraceStore
|
|
19
|
+
from agentdebug.adapters.crewai import CrewAIBridge
|
|
20
|
+
|
|
21
|
+
debugger = AgentDebug(store=SQLiteTraceStore('.agentdebug/errors.sqlite'))
|
|
22
|
+
trajectory = debugger.start_trace(goal='build a marketing plan', framework='crewai')
|
|
23
|
+
|
|
24
|
+
with CrewAIBridge(debugger, trajectory):
|
|
25
|
+
crew.kickoff(inputs={...}) # normal CrewAI call; events flow into AgentDebug
|
|
26
|
+
|
|
27
|
+
debugger.finish_trace(trajectory, success=True)
|
|
28
|
+
|
|
29
|
+
The bridge defers the ``crewai`` import, so this module is safe to import
|
|
30
|
+
even when CrewAI is not installed.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import logging
|
|
36
|
+
from types import TracebackType
|
|
37
|
+
from typing import Any, List, Literal, Optional, Tuple, Type
|
|
38
|
+
|
|
39
|
+
from agentdebug.adapters.base import AdapterStatus, FrameworkAdapter
|
|
40
|
+
from agentdebug.models import AgentTrajectory, EventType
|
|
41
|
+
from agentdebug.recorder import AgentDebug
|
|
42
|
+
|
|
43
|
+
LOG = logging.getLogger('agentdebug.adapters.crewai')
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _import_crewai_events() -> Any:
|
|
47
|
+
"""Import crewai.events lazily; raise a clear ImportError if missing."""
|
|
48
|
+
try:
|
|
49
|
+
import crewai.events as crewai_events
|
|
50
|
+
except ImportError as exc:
|
|
51
|
+
raise ImportError(
|
|
52
|
+
'CrewAIBridge requires the `crewai` package. '
|
|
53
|
+
"Install with `pip install 'agentdebugx[crewai]'` or `pip install crewai`."
|
|
54
|
+
) from exc
|
|
55
|
+
return crewai_events
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Event-type name → (EventType, module, agent-field-name) mapping. We resolve
|
|
59
|
+
# the class objects at attach-time (after import) so module import stays
|
|
60
|
+
# dep-free; class names are stable across CrewAI versions.
|
|
61
|
+
_EVENT_MAP: List[Tuple[str, EventType, Optional[str], Optional[str]]] = [
|
|
62
|
+
# (CrewAI event class name, agentdebug EventType, module, agent_attr)
|
|
63
|
+
('CrewKickoffStartedEvent', EventType.AGENT_STEP, 'planning', None),
|
|
64
|
+
('CrewKickoffCompletedEvent', EventType.OBSERVATION, 'planning', None),
|
|
65
|
+
('TaskStartedEvent', EventType.PLAN, 'planning', 'agent'),
|
|
66
|
+
('TaskCompletedEvent', EventType.OBSERVATION, 'planning', 'agent'),
|
|
67
|
+
('AgentExecutionStartedEvent', EventType.AGENT_STEP, 'planning', 'agent'),
|
|
68
|
+
('AgentExecutionCompletedEvent', EventType.OBSERVATION, 'reflection', 'agent'),
|
|
69
|
+
('LLMCallStartedEvent', EventType.LLM_CALL, 'planning', 'agent'),
|
|
70
|
+
('LLMCallCompletedEvent', EventType.LLM_RESPONSE, 'planning', 'agent'),
|
|
71
|
+
('ToolUsageStartedEvent', EventType.TOOL_CALL, 'action', 'agent_role'),
|
|
72
|
+
('ToolUsageFinishedEvent', EventType.TOOL_RESULT, 'action', 'agent_role'),
|
|
73
|
+
('ToolUsageErrorEvent', EventType.TOOL_RESULT, 'action', 'agent_role'),
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _attr(obj: Any, *names: str) -> Optional[str]:
|
|
78
|
+
"""Return the first non-None attribute on `obj` from `names`, stringified."""
|
|
79
|
+
for name in names:
|
|
80
|
+
v = getattr(obj, name, None)
|
|
81
|
+
if v is not None:
|
|
82
|
+
try:
|
|
83
|
+
return str(v)
|
|
84
|
+
except Exception:
|
|
85
|
+
continue
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _agent_name(event: Any, fallback: str) -> str:
|
|
90
|
+
# CrewAI events expose the agent in a handful of slightly different fields
|
|
91
|
+
# depending on the event class; try the common ones in order.
|
|
92
|
+
return _attr(event, 'agent_role', 'agent_name', 'agent', 'role', 'name') or fallback
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _output_for(event: Any) -> Any:
|
|
96
|
+
return _attr(event, 'output', 'result', 'response', 'completion', 'tool_response')
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _input_for(event: Any) -> Any:
|
|
100
|
+
return _attr(event, 'prompt', 'input', 'tool_input', 'tool_args', 'description')
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _error_for(event: Any) -> Optional[str]:
|
|
104
|
+
err = _attr(event, 'error', 'exception', 'error_message')
|
|
105
|
+
return err if err else None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class CrewAIBridge:
|
|
109
|
+
"""Subscribe to crewai_event_bus and record events into an AgentDebug trajectory.
|
|
110
|
+
|
|
111
|
+
Behaves as both a context manager (``with CrewAIBridge(...): ...``) and a
|
|
112
|
+
plain object with ``attach()`` / ``detach()``.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
framework = 'crewai'
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
debugger: AgentDebug,
|
|
120
|
+
trajectory: AgentTrajectory,
|
|
121
|
+
*,
|
|
122
|
+
bus: Any = None,
|
|
123
|
+
) -> None:
|
|
124
|
+
self.debugger = debugger
|
|
125
|
+
self.trajectory = trajectory
|
|
126
|
+
self._bus = bus
|
|
127
|
+
self._attached = False
|
|
128
|
+
# Keep refs to closures so they don't get GC'd while bus holds them.
|
|
129
|
+
self._handlers: List[Any] = []
|
|
130
|
+
|
|
131
|
+
def attach(self) -> 'CrewAIBridge':
|
|
132
|
+
if self._attached:
|
|
133
|
+
return self
|
|
134
|
+
ce = _import_crewai_events()
|
|
135
|
+
bus = self._bus or getattr(ce, 'crewai_event_bus', None)
|
|
136
|
+
if bus is None:
|
|
137
|
+
raise RuntimeError('crewai.events.crewai_event_bus is not available')
|
|
138
|
+
for class_name, et, module, _agent_attr in _EVENT_MAP:
|
|
139
|
+
evt_cls = getattr(ce, class_name, None)
|
|
140
|
+
if evt_cls is None:
|
|
141
|
+
LOG.debug('CrewAI event class %s not found; skipping', class_name)
|
|
142
|
+
continue
|
|
143
|
+
handler = self._make_handler(et, module, class_name)
|
|
144
|
+
self._handlers.append(handler)
|
|
145
|
+
bus.on(evt_cls)(handler)
|
|
146
|
+
self._attached = True
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def detach(self) -> None:
|
|
150
|
+
# CrewAI's event bus does not expose a public unsubscribe surface as
|
|
151
|
+
# of the versions we target. Drop our handler references so they
|
|
152
|
+
# become eligible for GC; the bus will skip dead refs.
|
|
153
|
+
self._handlers.clear()
|
|
154
|
+
self._attached = False
|
|
155
|
+
|
|
156
|
+
# Context-manager sugar — equivalent to attach/detach.
|
|
157
|
+
|
|
158
|
+
def __enter__(self) -> 'CrewAIBridge':
|
|
159
|
+
return self.attach()
|
|
160
|
+
|
|
161
|
+
def __exit__(
|
|
162
|
+
self,
|
|
163
|
+
exc_type: Optional[Type[BaseException]],
|
|
164
|
+
exc_value: Optional[BaseException],
|
|
165
|
+
tb: Optional[TracebackType],
|
|
166
|
+
) -> Literal[False]:
|
|
167
|
+
self.detach()
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
# ---- internals ----
|
|
171
|
+
|
|
172
|
+
def _make_handler(
|
|
173
|
+
self,
|
|
174
|
+
event_type: EventType,
|
|
175
|
+
module: Optional[str],
|
|
176
|
+
class_name: str,
|
|
177
|
+
) -> Any:
|
|
178
|
+
debugger = self.debugger
|
|
179
|
+
trajectory = self.trajectory
|
|
180
|
+
|
|
181
|
+
def _on_event(source: Any, event: Any) -> None:
|
|
182
|
+
try:
|
|
183
|
+
# NB: record_event takes metadata via **kwargs, so we pass
|
|
184
|
+
# the per-event tags as flat keyword args. Passing them as
|
|
185
|
+
# `metadata={...}` would wrap them under `metadata['metadata']`.
|
|
186
|
+
debugger.record_event(
|
|
187
|
+
trajectory,
|
|
188
|
+
event_type=event_type,
|
|
189
|
+
agent_name=_agent_name(event, fallback=class_name),
|
|
190
|
+
module=module,
|
|
191
|
+
input=_input_for(event),
|
|
192
|
+
output=_output_for(event),
|
|
193
|
+
error=_error_for(event),
|
|
194
|
+
crewai_event_class=class_name,
|
|
195
|
+
crewai_source_type=type(source).__name__,
|
|
196
|
+
)
|
|
197
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
198
|
+
LOG.warning(
|
|
199
|
+
'CrewAIBridge handler for %s raised: %s', class_name, exc
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return _on_event
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class CrewAIAdapter:
|
|
206
|
+
"""Structural :class:`FrameworkAdapter` — used by `agentdebug doctor`."""
|
|
207
|
+
|
|
208
|
+
framework = 'crewai'
|
|
209
|
+
|
|
210
|
+
def instrument(self, debugger: AgentDebug) -> AdapterStatus: # noqa: D401
|
|
211
|
+
try:
|
|
212
|
+
_import_crewai_events()
|
|
213
|
+
except ImportError as exc:
|
|
214
|
+
return AdapterStatus(
|
|
215
|
+
framework=self.framework,
|
|
216
|
+
implemented=False,
|
|
217
|
+
notes=str(exc),
|
|
218
|
+
)
|
|
219
|
+
return AdapterStatus(
|
|
220
|
+
framework=self.framework,
|
|
221
|
+
implemented=True,
|
|
222
|
+
notes=(
|
|
223
|
+
'Create a trajectory, then use '
|
|
224
|
+
'`with CrewAIBridge(debugger, trajectory): crew.kickoff(...)` '
|
|
225
|
+
'to record every Crew/Task/Agent/LLM/Tool event into AgentDebug.'
|
|
226
|
+
),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
_: FrameworkAdapter = CrewAIAdapter() # static structural check
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
__all__ = ['CrewAIAdapter', 'CrewAIBridge']
|
|
@@ -447,6 +447,12 @@ def _cmd_doctor() -> int:
|
|
|
447
447
|
statuses.append(LangGraphAdapter().instrument(_dummy_debugger()))
|
|
448
448
|
except Exception as exc: # pragma: no cover - defensive
|
|
449
449
|
statuses.append(_status('langgraph', False, str(exc)))
|
|
450
|
+
try:
|
|
451
|
+
from agentdebug.adapters.crewai import CrewAIAdapter
|
|
452
|
+
|
|
453
|
+
statuses.append(CrewAIAdapter().instrument(_dummy_debugger()))
|
|
454
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
455
|
+
statuses.append(_status('crewai', False, str(exc)))
|
|
450
456
|
try:
|
|
451
457
|
from agentdebug.adapters.otel import OTelExportAdapter
|
|
452
458
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|