agent-failure-debugger 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ """
2
+ agent_failure_debugger — Diagnose why your LLM agent failed.
3
+
4
+ Deterministic causal analysis with fix generation.
5
+
6
+ Primary API:
7
+ from agent_failure_debugger import diagnose
8
+ result = diagnose(raw_log, adapter="langchain")
9
+
10
+ from agent_failure_debugger import watch
11
+ graph = watch(workflow.compile(), auto_diagnose=True)
12
+ """
13
+
14
+ __version__ = "0.1.0"
15
+
16
+
17
+ def diagnose(raw_log, adapter="langchain", **kwargs):
18
+ """Diagnose failures from a raw agent log.
19
+
20
+ This is the primary entry point for the tool.
21
+
22
+ Args:
23
+ raw_log: Raw log/response from the agent or service.
24
+ adapter: Adapter name ("langchain", "langsmith", "crewai", "redis_help_demo").
25
+ **kwargs: Passed to run_pipeline (e.g. use_learning, top_k).
26
+
27
+ Returns:
28
+ Dict with: diagnosis, fix, summary, explanation, telemetry, matcher_output.
29
+ """
30
+ from agent_failure_debugger.diagnose import diagnose as _diagnose
31
+ return _diagnose(raw_log, adapter=adapter, **kwargs)
32
+
33
+
34
+ def watch(compiled_graph, **kwargs):
35
+ """Wrap a LangGraph agent for live failure detection.
36
+
37
+ Requires langchain-core: pip install agent-failure-debugger[langchain]
38
+
39
+ Args:
40
+ compiled_graph: A compiled LangGraph graph.
41
+ **kwargs: auto_diagnose (bool), auto_pipeline (bool), verbose (bool).
42
+
43
+ Returns:
44
+ A wrapped graph with Atlas diagnosis injected.
45
+ """
46
+ try:
47
+ from llm_failure_atlas.adapters.callback_handler import watch as _watch
48
+ except ImportError:
49
+ raise ImportError(
50
+ "watch() requires langchain-core. "
51
+ "Install with: pip install agent-failure-debugger[langchain]"
52
+ )
53
+ return _watch(compiled_graph, **kwargs)
@@ -0,0 +1,380 @@
1
+ """
2
+ abstraction.py
3
+
4
+ Phase 14: Abstraction / Pruning / UX layer.
5
+
6
+ Pipeline position:
7
+ matcher → debugger → formatter → abstraction → explainer
8
+
9
+ Three capabilities:
10
+ 1. top-k causal path selection
11
+ 2. failure clustering
12
+ 3. explanation simplification (verbose / standard / brief)
13
+
14
+ This layer does NOT modify diagnostic data.
15
+ It only controls what is displayed and how.
16
+
17
+ GPT review fixes applied:
18
+ ① top-k: winner paths always prioritized before scoring
19
+ ② clustering: representative uses confidence + root_score
20
+ ③ simplification: uses selected_paths not raw alternative_paths
21
+ ④ cluster collapse: same-cluster split by non-cluster is correct (spec)
22
+ ⑤ brief mode: includes representative failure name
23
+ """
24
+
25
+ from agent_failure_debugger.labels import FAILURE_MAP
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Failure clusters
29
+ # ---------------------------------------------------------------------------
30
+
31
+ CLUSTERS = {
32
+ "reasoning_failure": {
33
+ "label": "Reasoning failure",
34
+ "members": [
35
+ "clarification_failure",
36
+ "assumption_invalidation_failure",
37
+ "premature_model_commitment",
38
+ "repair_strategy_failure",
39
+ ],
40
+ },
41
+ "retrieval_failure": {
42
+ "label": "Retrieval failure",
43
+ "members": [
44
+ "semantic_cache_intent_bleeding",
45
+ "prompt_injection_via_retrieval",
46
+ "context_truncation_loss",
47
+ "rag_retrieval_drift",
48
+ ],
49
+ },
50
+ "tool_failure": {
51
+ "label": "Tool failure",
52
+ "members": [
53
+ "agent_tool_call_loop",
54
+ "tool_result_misinterpretation",
55
+ ],
56
+ },
57
+ "output_failure": {
58
+ "label": "Output failure",
59
+ "members": [
60
+ "incorrect_output",
61
+ ],
62
+ },
63
+ "instruction_failure": {
64
+ "label": "Instruction failure",
65
+ "members": [
66
+ "instruction_priority_inversion",
67
+ ],
68
+ },
69
+ }
70
+
71
+ _FAILURE_TO_CLUSTER = {}
72
+ for cid, cdef in CLUSTERS.items():
73
+ for member in cdef["members"]:
74
+ _FAILURE_TO_CLUSTER[member] = cid
75
+
76
+
77
+ def get_cluster(failure_id: str) -> str | None:
78
+ return _FAILURE_TO_CLUSTER.get(failure_id)
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # 1. Top-k path selection (fix ①: winner paths always prioritized)
83
+ # ---------------------------------------------------------------------------
84
+
85
+ def _score_path_simple(path: list, debugger_output: dict) -> float:
86
+ conf_map = {f["id"]: f["confidence"] for f in debugger_output.get("failures", [])}
87
+ root_scores = {r["id"]: r["score"] for r in debugger_output.get("root_ranking", [])}
88
+
89
+ avg_conf = sum(conf_map.get(n, 0) for n in path) / len(path) if path else 0
90
+ root_score = root_scores.get(path[0], 0) if path else 0
91
+ length_bonus = len(path) / 10.0
92
+
93
+ return avg_conf + root_score * 0.3 + length_bonus
94
+
95
+
96
+ def select_top_k(debugger_output: dict, k: int = 2) -> dict:
97
+ """
98
+ Select top-k paths for display.
99
+ Winner paths (containing conflict group winners) are always prioritized.
100
+ """
101
+ all_paths = debugger_output.get("causal_paths", [])
102
+ multi_hop = [p for p in all_paths if len(p) >= 2]
103
+
104
+ if len(multi_hop) <= k:
105
+ return {
106
+ "selected_paths": multi_hop,
107
+ "suppressed_paths": [],
108
+ }
109
+
110
+ # Collect conflict winners
111
+ conflict_winners = set()
112
+ for c in debugger_output.get("conflicts", []):
113
+ conflict_winners.add(c.get("winner", ""))
114
+
115
+ # Partition: winner paths first, then others
116
+ winner_paths = []
117
+ other_paths = []
118
+ for p in multi_hop:
119
+ if any(node in conflict_winners for node in p):
120
+ winner_paths.append(p)
121
+ else:
122
+ other_paths.append(p)
123
+
124
+ # Score within each group
125
+ winner_paths.sort(
126
+ key=lambda p: _score_path_simple(p, debugger_output), reverse=True)
127
+ other_paths.sort(
128
+ key=lambda p: _score_path_simple(p, debugger_output), reverse=True)
129
+
130
+ # Fill selected: winners first, then others
131
+ selected = []
132
+ for p in winner_paths:
133
+ if len(selected) < k:
134
+ selected.append(p)
135
+ for p in other_paths:
136
+ if len(selected) < k:
137
+ selected.append(p)
138
+
139
+ # Everything else is suppressed
140
+ selected_set = [id(p) for p in selected]
141
+ suppressed = [p for p in multi_hop if id(p) not in selected_set]
142
+
143
+ return {
144
+ "selected_paths": selected,
145
+ "suppressed_paths": suppressed,
146
+ }
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # 2. Failure clustering (fix ②: representative uses conf + root_score)
151
+ # ---------------------------------------------------------------------------
152
+
153
+ def cluster_failures(debugger_output: dict) -> list[dict]:
154
+ """
155
+ Group active failures into clusters.
156
+ Representative: highest (confidence + 0.3 * root_score).
157
+ """
158
+ active = {f["id"]: f["confidence"] for f in debugger_output.get("failures", [])}
159
+ root_scores = {r["id"]: r["score"] for r in debugger_output.get("root_ranking", [])}
160
+
161
+ result = []
162
+ for cid, cdef in CLUSTERS.items():
163
+ active_members = [m for m in cdef["members"] if m in active]
164
+ if not active_members:
165
+ continue
166
+
167
+ # Representative: confidence + root importance
168
+ representative = max(
169
+ active_members,
170
+ key=lambda m: active[m] + 0.3 * root_scores.get(m, 0)
171
+ )
172
+
173
+ result.append({
174
+ "cluster": cid,
175
+ "label": cdef["label"],
176
+ "members": active_members,
177
+ "representative": representative,
178
+ "representative_description": FAILURE_MAP.get(representative, ""),
179
+ })
180
+
181
+ return result
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # 3. Explanation simplification
186
+ # ---------------------------------------------------------------------------
187
+
188
+ def _collapse_cluster_sequence(path: list) -> list[dict]:
189
+ """
190
+ Collapse consecutive same-cluster nodes into cluster entries.
191
+ Note: same cluster split by a different cluster stays split (by design).
192
+ """
193
+ if not path:
194
+ return []
195
+
196
+ entries = []
197
+ current_cluster = None
198
+ current_members = []
199
+
200
+ for node in path:
201
+ cluster = get_cluster(node)
202
+ if cluster == current_cluster and current_cluster is not None:
203
+ current_members.append(node)
204
+ else:
205
+ if current_members:
206
+ if current_cluster:
207
+ entries.append({
208
+ "type": "cluster",
209
+ "cluster": current_cluster,
210
+ "label": CLUSTERS[current_cluster]["label"],
211
+ "members": current_members,
212
+ })
213
+ else:
214
+ for m in current_members:
215
+ entries.append({"type": "node", "id": m})
216
+ current_cluster = cluster
217
+ current_members = [node]
218
+
219
+ if current_members:
220
+ if current_cluster:
221
+ entries.append({
222
+ "type": "cluster",
223
+ "cluster": current_cluster,
224
+ "label": CLUSTERS[current_cluster]["label"],
225
+ "members": current_members,
226
+ })
227
+ else:
228
+ for m in current_members:
229
+ entries.append({"type": "node", "id": m})
230
+
231
+ return entries
232
+
233
+
234
+ def _get_cluster_representative(members: list, debugger_output: dict) -> str:
235
+ """Pick the representative from collapsed cluster members."""
236
+ if not members:
237
+ return ""
238
+ active = {f["id"]: f["confidence"] for f in debugger_output.get("failures", [])}
239
+ root_scores = {r["id"]: r["score"] for r in debugger_output.get("root_ranking", [])}
240
+ scored = [(m, active.get(m, 0) + 0.3 * root_scores.get(m, 0)) for m in members]
241
+ return max(scored, key=lambda x: x[1])[0]
242
+
243
+
244
+ def simplify_explanation(debugger_output: dict, selected_paths: list,
245
+ mode: str = "standard") -> dict:
246
+ """
247
+ Generate simplified explanation based on mode.
248
+ Uses selected_paths (from top-k) for alternatives, not raw alternative_paths.
249
+ (fix ③)
250
+ """
251
+ primary = debugger_output.get("primary_path") or []
252
+ explanation = debugger_output.get("explanation", "")
253
+
254
+ # Alternatives = selected paths minus primary (fix ③)
255
+ alternatives = [p for p in selected_paths if p != primary]
256
+
257
+ if mode == "verbose":
258
+ return {
259
+ "display_mode": "verbose",
260
+ "summary_explanation": explanation,
261
+ "detailed_explanation": explanation,
262
+ }
263
+
264
+ collapsed = _collapse_cluster_sequence(primary)
265
+
266
+ # --- Brief mode (fix ⑤: include representative) ---
267
+ if mode == "brief":
268
+ if len(collapsed) >= 2:
269
+ first = collapsed[0]
270
+ last = collapsed[-1]
271
+ first_label = first.get("label", first.get("id", "unknown"))
272
+ last_label = last.get("label", last.get("id", "unknown"))
273
+ # Add representative for context
274
+ first_rep = _get_cluster_representative(
275
+ first.get("members", []), debugger_output)
276
+ rep_desc = FAILURE_MAP.get(first_rep, first_rep) if first_rep else ""
277
+ if rep_desc:
278
+ summary = (
279
+ f"The failure originated in {first_label.lower()} "
280
+ f"({rep_desc}) "
281
+ f"and resulted in {last_label.lower()}."
282
+ )
283
+ else:
284
+ summary = (
285
+ f"The failure originated in {first_label.lower()} "
286
+ f"and resulted in {last_label.lower()}."
287
+ )
288
+ elif len(collapsed) == 1:
289
+ name = collapsed[0].get("label", collapsed[0].get("id", "unknown"))
290
+ summary = f"The failure is categorized as {name.lower()}."
291
+ else:
292
+ summary = "No causal path detected."
293
+
294
+ return {
295
+ "display_mode": "brief",
296
+ "summary_explanation": summary,
297
+ "detailed_explanation": explanation,
298
+ }
299
+
300
+ # --- Standard mode ---
301
+ parts = []
302
+ for i, entry in enumerate(collapsed):
303
+ if entry["type"] == "cluster":
304
+ label = entry["label"]
305
+ members = entry["members"]
306
+ if len(members) == 1:
307
+ desc = FAILURE_MAP.get(members[0], members[0])
308
+ part = f"{label} ({desc})"
309
+ else:
310
+ first = members[0]
311
+ last = members[-1]
312
+ part = f"{label} ({first} → {last})"
313
+ else:
314
+ desc = FAILURE_MAP.get(entry["id"], entry["id"])
315
+ part = f"{entry['id']} ({desc})"
316
+
317
+ if i == 0:
318
+ parts.append(part)
319
+ else:
320
+ parts.append(f"leading to {part}")
321
+
322
+ summary = ", ".join(parts) + "." if parts else "No causal path detected."
323
+
324
+ # Alternatives from selected_paths only (fix ③)
325
+ alt_summaries = []
326
+ for alt in alternatives:
327
+ alt_collapsed = _collapse_cluster_sequence(alt)
328
+ alt_labels = [
329
+ e.get("label", e.get("id", "?")) for e in alt_collapsed
330
+ ]
331
+ alt_summaries.append(" → ".join(alt_labels))
332
+
333
+ standard_explanation = f"Primary: {summary}"
334
+ if alt_summaries:
335
+ for alt in alt_summaries:
336
+ standard_explanation += f"\nAlternative: {alt}"
337
+
338
+ return {
339
+ "display_mode": "standard",
340
+ "summary_explanation": standard_explanation,
341
+ "detailed_explanation": explanation,
342
+ }
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # Orchestrator
347
+ # ---------------------------------------------------------------------------
348
+
349
+ def abstract(debugger_output: dict, top_k: int = 2,
350
+ mode: str = "standard") -> dict:
351
+ """
352
+ Full abstraction pipeline:
353
+ 1. top-k path selection
354
+ 2. failure clustering
355
+ 3. explanation simplification (uses selected_paths)
356
+ """
357
+ path_selection = select_top_k(debugger_output, k=top_k)
358
+ clusters = cluster_failures(debugger_output)
359
+ simplified = simplify_explanation(
360
+ debugger_output,
361
+ selected_paths=path_selection["selected_paths"],
362
+ mode=mode,
363
+ )
364
+
365
+ return {
366
+ # Original data (preserved)
367
+ "root_candidates": debugger_output.get("root_candidates", []),
368
+ "root_ranking": debugger_output.get("root_ranking", []),
369
+ "failures": debugger_output.get("failures", []),
370
+ "causal_links": debugger_output.get("causal_links", []),
371
+ "causal_paths": debugger_output.get("causal_paths", []),
372
+ "primary_path": debugger_output.get("primary_path"),
373
+ "conflicts": debugger_output.get("conflicts", []),
374
+ "evidence": debugger_output.get("evidence", []),
375
+ # Abstraction layer
376
+ "selected_paths": path_selection["selected_paths"],
377
+ "suppressed_paths": path_selection["suppressed_paths"],
378
+ "clusters": clusters,
379
+ **simplified,
380
+ }