agentdebugx 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/PKG-INFO +5 -1
  2. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/README.md +4 -0
  3. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/20_deep_debug.md +46 -0
  4. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/pyproject.toml +1 -1
  5. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/__init__.py +5 -1
  6. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/cli.py +42 -0
  7. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/deep.py +23 -2
  8. agentdebugx-0.2.1/src/agentdebug/traceback.py +302 -0
  9. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/ui/server.py +130 -10
  10. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/LICENSE +0 -0
  11. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/00_overview.md +0 -0
  12. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/01_literature_survey.md +0 -0
  13. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/02_architecture.md +0 -0
  14. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/03_taxonomy.md +0 -0
  15. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/04_trace_schema.md +0 -0
  16. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/05_adapters.md +0 -0
  17. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/06_detectors.md +0 -0
  18. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/07_attribution.md +0 -0
  19. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/08_recovery.md +0 -0
  20. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/09_error_database.md +0 -0
  21. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/10_taxonomy_induction.md +0 -0
  22. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/11_multimodal.md +0 -0
  23. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/12_ui_dashboard.md +0 -0
  24. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/13_class_design.md +0 -0
  25. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/14_api_reference.md +0 -0
  26. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/15_roadmap.md +0 -0
  27. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/16_governance.md +0 -0
  28. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/17_claude_code_design_patterns.md +0 -0
  29. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/18_comparison_codex_vs_design.md +0 -0
  30. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/19_error_hub.md +0 -0
  31. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/21_integrations.md +0 -0
  32. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/ERROR_TAXONOMY.md +0 -0
  33. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/OPEN_SOURCE_DEVELOPMENT_PLAN.md +0 -0
  34. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/README.md +0 -0
  35. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/RESEARCH_SURVEY.md +0 -0
  36. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/benchmarks/v0_1_smoke.json +0 -0
  37. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/benchmarks/v0_1_smoke.md +0 -0
  38. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/__init__.py +0 -0
  39. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/base.py +0 -0
  40. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/langgraph.py +0 -0
  41. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/otel.py +0 -0
  42. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/raw.py +0 -0
  43. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/analyzers.py +0 -0
  44. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/attribution.py +0 -0
  45. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/events.py +0 -0
  46. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/__init__.py +0 -0
  47. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/backend_base.py +0 -0
  48. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/backends.py +0 -0
  49. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/bundle.py +0 -0
  50. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/scrub.py +0 -0
  51. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/instrumentation.py +0 -0
  52. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/__init__.py +0 -0
  53. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/claude_skill.py +0 -0
  54. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/openhands.py +0 -0
  55. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/judges.py +0 -0
  56. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/llm.py +0 -0
  57. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/models.py +0 -0
  58. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/recorder.py +0 -0
  59. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/recovery.py +0 -0
  60. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/storage.py +0 -0
  61. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/taxonomy.py +0 -0
  62. {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/ui/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentdebugx
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Portable error analysis, tracing, and recovery framework for agentic AI systems. Import as `agentdebug`.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -187,6 +187,10 @@ agentdebug serve --store-sqlite .agentdebug/errors.sqlite
187
187
  # DeepDebug — iterative multi-turn analysis (plan -> hypothesize -> verify -> refine)
188
188
  agentdebug deep <trajectory.json>
189
189
 
190
+ # Render the cascade as a Python-traceback (root cause first, manifested failure last)
191
+ agentdebug deep <trajectory.json> --traceback
192
+ agentdebug analyze <trajectory.json> --traceback # works without an LLM too
193
+
190
194
  # Error Hub: package + push a trace to a Git remote or HF dataset
191
195
  agentdebug hub push <trace_id> \
192
196
  --to git:git@github.com:your-org/agentdebug-bundles.git#bundles \
@@ -147,6 +147,10 @@ agentdebug serve --store-sqlite .agentdebug/errors.sqlite
147
147
  # DeepDebug — iterative multi-turn analysis (plan -> hypothesize -> verify -> refine)
148
148
  agentdebug deep <trajectory.json>
149
149
 
150
+ # Render the cascade as a Python-traceback (root cause first, manifested failure last)
151
+ agentdebug deep <trajectory.json> --traceback
152
+ agentdebug analyze <trajectory.json> --traceback # works without an LLM too
153
+
150
154
  # Error Hub: package + push a trace to a Git remote or HF dataset
151
155
  agentdebug hub push <trace_id> \
152
156
  --to git:git@github.com:your-org/agentdebug-bundles.git#bundles \
@@ -115,6 +115,52 @@ rounds : plan (4.6s) hypothesize (11.0s)
115
115
  The single-pass `LLMJudgeAnalyzer` on the same trace returned only the first
116
116
  finding. DeepDebug recovered the full cascade and selected the upstream cause.
117
117
 
118
+ ## 6.1 AgentTraceback — Python-traceback-style cascade view
119
+
120
+ Once DeepDebug has populated `finding.metadata['cascading_from_event_id']`,
121
+ `agentdebug.traceback.format_traceback(report, trajectory)` renders the
122
+ cascade in a layout that mirrors Python's `Traceback (most recent call last)`
123
+ — root cause first, manifested failure last, with arrows between hops:
124
+
125
+ ```text
126
+ AgentTraceback (root cause first, manifested failure last):
127
+ trace_id=trace_… framework=live-cascade-demo goal='Find latest paper, summarize, then email …'
128
+
129
+ File "root cause", in trajectory
130
+ Step 3 agent=search mode=action.parameter_error confidence=1.00
131
+ module=action
132
+ error> JSON schema validation failed: missing parameter query
133
+ evidence:
134
+ - args={}
135
+ suggested: Validate parameters against tool schemas before execution.
136
+ ↓ cascaded to
137
+ File "cascade depth 1", in trajectory
138
+ Step 4 agent=planner mode=verification.premature_stop confidence=1.00
139
+ output> Final answer: AgentDebug is a popular paper.
140
+ ↓ cascaded to
141
+ File "cascade depth 1", in trajectory
142
+ Step 4 agent=planner mode=memory.hallucination confidence=0.95
143
+ output> Final answer: AgentDebug is a popular paper.
144
+
145
+ AgentFailure[memory.hallucination]: The search agent failed to provide the
146
+ required 'query' parameter in its tool call, leading to a tool error. The
147
+ planner then hallucinated a generic fact about the paper and prematurely
148
+ terminated the task without completing the summary or email steps.
149
+ ```
150
+
151
+ CLI:
152
+
153
+ ```bash
154
+ agentdebug deep <trajectory.json> --traceback # render to stdout
155
+ agentdebug analyze <trajectory.json> --traceback # works for rule analyzer too
156
+ agentdebug judge <traj|trace_id> --attribute --traceback
157
+ ```
158
+
159
+ When DeepDebug isn't available (heuristic analyzer or single-pass judge),
160
+ the renderer falls back to **step-index ordering** — the earliest finding
161
+ becomes the root and later findings cascade from it. This means
162
+ `--traceback` works on any analyzer in the pipeline, not just DeepDebug.
163
+
118
164
  ## 7. Failure modes
119
165
 
120
166
  - **Cost blowout** — if `max_hypotheses_to_verify` is high and verify is
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "agentdebugx"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  description = "Portable error analysis, tracing, and recovery framework for agentic AI systems. Import as `agentdebug`."
5
5
  authors = ["ULab @ UIUC <ulab@illinois.edu>"]
6
6
  license = "MIT"
@@ -29,6 +29,7 @@ from agentdebug.models import (
29
29
  )
30
30
  from agentdebug.recorder import AgentDebug, TraceSession
31
31
  from agentdebug.recovery import FixProposal, Recoverer, ReflexionSuggestion
32
+ from agentdebug.traceback import CascadeFrame, build_cascade, format_traceback
32
33
  from agentdebug.storage import JsonlTraceStore, SQLiteTraceStore
33
34
  from agentdebug.taxonomy import SEED_FAILURE_MODES, get_failure_mode
34
35
 
@@ -42,6 +43,9 @@ __all__ = [
42
43
  'Attributor',
43
44
  'Blame',
44
45
  'BusEvent',
46
+ 'CascadeFrame',
47
+ 'build_cascade',
48
+ 'format_traceback',
45
49
  'DEFAULT_BUS',
46
50
  'DiagnosticReport',
47
51
  'EventBus',
@@ -62,4 +66,4 @@ __all__ = [
62
66
  'get_failure_mode',
63
67
  ]
64
68
 
65
- __version__ = '0.2.0'
69
+ __version__ = '0.2.1'
@@ -36,6 +36,15 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
36
36
  action='store_true',
37
37
  help='Also emit Reflexion-style retry suggestions for each finding',
38
38
  )
39
+ p_analyze.add_argument(
40
+ '--traceback',
41
+ action='store_true',
42
+ help='Render a Python-traceback-style cascade view instead of JSON',
43
+ )
44
+ p_analyze.add_argument(
45
+ '--no-color', action='store_true',
46
+ help='Disable ANSI colors in --traceback output (default: auto)',
47
+ )
39
48
 
40
49
  p_list = sub.add_parser('list', help='List trace IDs in a store')
41
50
  _add_store_args(p_list)
@@ -131,6 +140,11 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
131
140
  p_deep.add_argument('--base-url', dest='base_url')
132
141
  p_deep.add_argument('--api-key', dest='api_key')
133
142
  p_deep.add_argument('--out', help='Optional output path for the report JSON')
143
+ p_deep.add_argument(
144
+ '--traceback', action='store_true',
145
+ help='Render a Python-traceback-style cascade view to stdout',
146
+ )
147
+ p_deep.add_argument('--no-color', action='store_true')
134
148
 
135
149
  args = parser.parse_args(argv)
136
150
  if args.command == 'analyze':
@@ -161,6 +175,14 @@ def _cmd_analyze(args: argparse.Namespace) -> int:
161
175
  trajectory_path = Path(args.trajectory)
162
176
  trajectory = trajectory_from_json(trajectory_path.read_text(encoding='utf-8'))
163
177
  report = HeuristicAnalyzer().analyze(trajectory)
178
+ if args.traceback:
179
+ from agentdebug.traceback import format_traceback
180
+
181
+ text = format_traceback(
182
+ report, trajectory, use_color=not args.no_color and sys.stdout.isatty()
183
+ )
184
+ _emit(text, args.out)
185
+ return 0
164
186
  rendered = model_to_json(report, indent=2)
165
187
  if args.suggest:
166
188
  proposals = ReflexionSuggestion().suggest(trajectory, report)
@@ -232,6 +254,17 @@ def _cmd_judge(args: argparse.Namespace) -> int:
232
254
  if args.attribute:
233
255
  blame = AllAtOnceAttributor(llm=llm).attribute(trajectory, report.findings)
234
256
  rendered = _augment_with_blame(rendered, blame)
257
+ if args.traceback:
258
+ from agentdebug.traceback import format_traceback
259
+
260
+ rendered = (
261
+ rendered
262
+ + '\n\n# === AgentTraceback ===\n'
263
+ + format_traceback(
264
+ report, trajectory,
265
+ use_color=not args.no_color and sys.stdout.isatty(),
266
+ )
267
+ )
235
268
  _emit(rendered, args.out)
236
269
  return 0
237
270
 
@@ -394,6 +427,15 @@ def _cmd_deep(args: argparse.Namespace) -> int:
394
427
  for r in result.rounds:
395
428
  print(f' {r.name:>20} {r.duration_ms:>6} ms', file=sys.stderr)
396
429
  _emit(out_text, args.out)
430
+ if args.traceback:
431
+ from agentdebug.traceback import format_traceback
432
+
433
+ text = format_traceback(
434
+ result.report, trajectory,
435
+ use_color=not args.no_color and sys.stdout.isatty(),
436
+ )
437
+ print()
438
+ print(text)
397
439
  return 0
398
440
 
399
441
 
@@ -200,6 +200,11 @@ class DeepDebugAnalyzer:
200
200
 
201
201
  def analyze(self, trajectory: AgentTrajectory) -> DeepDebugResult:
202
202
  rounds: List[DeepDebugRound] = []
203
+ # Per-event-id lookup of the most recent verified cascade predecessor;
204
+ # populated by _verify and consumed in _compose_report so the cascade
205
+ # info survives the verify -> refine handoff even when the refine LLM
206
+ # doesn't echo it back verbatim.
207
+ self._cascade_lookup: Dict[str, str] = {}
203
208
 
204
209
  plan = self._plan(trajectory, rounds)
205
210
  raw_focus = plan.get('focus_event_ids') or []
@@ -309,6 +314,10 @@ class DeepDebugAnalyzer:
309
314
  hypothesis.cascading_from_event_id = self._opt_str(
310
315
  parsed.get('cascading_from_event_id')
311
316
  )
317
+ if hypothesis.event_id and hypothesis.cascading_from_event_id:
318
+ self._cascade_lookup[hypothesis.event_id] = (
319
+ hypothesis.cascading_from_event_id
320
+ )
312
321
 
313
322
  def _refine(
314
323
  self,
@@ -427,16 +436,28 @@ class DeepDebugAnalyzer:
427
436
  mode = SEED_FAILURE_MODES.get(mid)
428
437
  if mode is None:
429
438
  continue
439
+ event_id = self._opt_str(raw.get('event_id'))
440
+ # Carry the cascade predecessor we extracted in verify so the
441
+ # AgentTraceback renderer can chain findings.
442
+ cascade_from: Optional[str] = None
443
+ if event_id is not None:
444
+ cascade_from = self._cascade_lookup.get(event_id)
445
+ cascade_from_raw = self._opt_str(raw.get('cascading_from_event_id'))
446
+ if cascade_from_raw:
447
+ cascade_from = cascade_from_raw
448
+ finding_metadata: Dict[str, Any] = {'source': 'deep_debug'}
449
+ if cascade_from:
450
+ finding_metadata['cascading_from_event_id'] = cascade_from
430
451
  findings.append(FailureFinding(
431
452
  finding_id=new_id('finding'),
432
453
  failure_mode=mode,
433
- event_id=self._opt_str(raw.get('event_id')),
454
+ event_id=event_id,
434
455
  agent_name=self._opt_str(raw.get('agent_name')),
435
456
  step_index=self._opt_int(raw.get('step_index')),
436
457
  confidence=self._opt_float(raw.get('confidence'), 0.5),
437
458
  evidence=self._str_list(raw.get('evidence')),
438
459
  suggestion=self._suggestion(mode),
439
- metadata={'source': 'deep_debug'},
460
+ metadata=finding_metadata,
440
461
  ))
441
462
  root = parsed.get('root_cause') or {}
442
463
  report = DiagnosticReport(
@@ -0,0 +1,302 @@
1
+ """Python-traceback-style rendering of cascading agent failures.
2
+
3
+ A diagnostic report contains a *set* of findings; what users actually want is
4
+ a *chain* that shows how a single root cause cascaded through later steps,
5
+ ending at the manifested failure — exactly the way a Python traceback walks
6
+ from the outermost frame to the raised exception.
7
+
8
+ Two inputs feed the chain:
9
+
10
+ * DeepDebug findings populate ``finding.metadata['cascading_from_event_id']``
11
+ with the predecessor's event ID, so the cascade is explicit and verified.
12
+ * Heuristic / single-shot LLM judges don't compute a cascade, so we fall
13
+ back to **step-index ordering** with the earliest finding as the root.
14
+
15
+ Public API::
16
+
17
+ from agentdebug.traceback import format_traceback
18
+ print(format_traceback(report, trajectory))
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from dataclasses import dataclass, field
24
+ from typing import Dict, List, Optional
25
+
26
+ from agentdebug.models import AgentEvent, AgentTrajectory, DiagnosticReport, FailureFinding
27
+
28
+
29
+ @dataclass
30
+ class CascadeFrame:
31
+ """One frame in the cascade — analogous to one Python traceback line."""
32
+
33
+ finding: FailureFinding
34
+ event: Optional[AgentEvent]
35
+ cascades_from_event_id: Optional[str] = None
36
+ depth: int = 0 # 0 = root cause; deepest = manifested failure
37
+ children_event_ids: List[str] = field(default_factory=list)
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Cascade construction
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def build_cascade(
45
+ report: DiagnosticReport,
46
+ trajectory: Optional[AgentTrajectory] = None,
47
+ ) -> List[CascadeFrame]:
48
+ """Build an ordered chain of frames (root → manifested) from a report.
49
+
50
+ Uses ``finding.metadata['cascading_from_event_id']`` when present;
51
+ otherwise falls back to step-index ordering.
52
+ """
53
+ if not report.findings:
54
+ return []
55
+
56
+ events_by_id: Dict[str, AgentEvent] = {}
57
+ if trajectory is not None:
58
+ for evt in trajectory.events:
59
+ events_by_id[evt.event_id] = evt
60
+
61
+ # Group findings by event_id so duplicates collapse cleanly.
62
+ by_event: Dict[str, List[FailureFinding]] = {}
63
+ orphans: List[FailureFinding] = []
64
+ for f in report.findings:
65
+ if f.event_id:
66
+ by_event.setdefault(f.event_id, []).append(f)
67
+ else:
68
+ orphans.append(f)
69
+
70
+ # Extract predecessor links.
71
+ predecessor: Dict[str, Optional[str]] = {}
72
+ for event_id, findings in by_event.items():
73
+ # Best predecessor wins (any non-null among the findings on this event).
74
+ cand: Optional[str] = None
75
+ for f in findings:
76
+ meta_value = f.metadata.get('cascading_from_event_id')
77
+ if isinstance(meta_value, str) and meta_value:
78
+ cand = meta_value
79
+ break
80
+ predecessor[event_id] = cand
81
+
82
+ # Determine root: prefer report.root_cause_event_id, else the finding with
83
+ # the smallest step_index (None pushed to the end), then highest confidence.
84
+ root_event_id: Optional[str] = report.root_cause_event_id
85
+ if root_event_id not in by_event:
86
+ ordered = sorted(
87
+ by_event.items(),
88
+ key=lambda kv: (
89
+ _min_step(kv[1]) is None,
90
+ _min_step(kv[1]) if _min_step(kv[1]) is not None else 10**9,
91
+ -max(f.confidence for f in kv[1]),
92
+ ),
93
+ )
94
+ root_event_id = ordered[0][0] if ordered else None
95
+
96
+ chain_event_ids: List[str] = []
97
+ if root_event_id is not None:
98
+ # Walk descendants from root using the predecessor map (reverse it).
99
+ descendants: Dict[str, List[str]] = {}
100
+ for child, parent in predecessor.items():
101
+ if parent and parent in by_event:
102
+ descendants.setdefault(parent, []).append(child)
103
+
104
+ visited: set[str] = set()
105
+
106
+ def dfs(node: str) -> None:
107
+ if node in visited:
108
+ return
109
+ visited.add(node)
110
+ chain_event_ids.append(node)
111
+ # Sort children by step_index so we walk forward in time.
112
+ children = sorted(
113
+ descendants.get(node, []),
114
+ key=lambda eid: _min_step(by_event[eid]) or 10**9,
115
+ )
116
+ for child in children:
117
+ dfs(child)
118
+
119
+ dfs(root_event_id)
120
+
121
+ # Any disconnected findings: append by step order at the end.
122
+ leftover = [
123
+ eid for eid in by_event
124
+ if eid not in visited
125
+ ]
126
+ leftover.sort(
127
+ key=lambda eid: _min_step(by_event[eid]) or 10**9
128
+ )
129
+ chain_event_ids.extend(leftover)
130
+ else:
131
+ # No structural cascade info — fall back to step order.
132
+ chain_event_ids = sorted(
133
+ by_event.keys(),
134
+ key=lambda eid: _min_step(by_event[eid]) or 10**9,
135
+ )
136
+
137
+ frames: List[CascadeFrame] = []
138
+ for depth, event_id in enumerate(chain_event_ids):
139
+ # If multiple findings on this event, emit one frame per finding but
140
+ # group them — earliest-confidence-tiebreak first.
141
+ ranked = sorted(
142
+ by_event[event_id],
143
+ key=lambda f: -f.confidence,
144
+ )
145
+ for f in ranked:
146
+ frames.append(CascadeFrame(
147
+ finding=f,
148
+ event=events_by_id.get(event_id),
149
+ cascades_from_event_id=predecessor.get(event_id),
150
+ depth=depth,
151
+ ))
152
+ # Orphan findings (no event_id) appended at the end.
153
+ for f in orphans:
154
+ frames.append(CascadeFrame(
155
+ finding=f, event=None, cascades_from_event_id=None,
156
+ depth=len(chain_event_ids),
157
+ ))
158
+ return frames
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Formatting
163
+ # ---------------------------------------------------------------------------
164
+
165
+ def format_traceback(
166
+ report: DiagnosticReport,
167
+ trajectory: Optional[AgentTrajectory] = None,
168
+ *,
169
+ use_color: bool = False,
170
+ indent: str = ' ',
171
+ ) -> str:
172
+ """Render a cascading agent-failure traceback.
173
+
174
+ Output mirrors Python's traceback shape: a header, frames ordered
175
+ *root → manifested*, then a final summary line that names the failure.
176
+ """
177
+ frames = build_cascade(report, trajectory)
178
+ if not frames:
179
+ return _wrap_color(
180
+ 'AgentTraceback: no findings recorded.',
181
+ 'muted',
182
+ use_color,
183
+ )
184
+
185
+ lines: List[str] = []
186
+ header = 'AgentTraceback (root cause first, manifested failure last):'
187
+ lines.append(_wrap_color(header, 'header', use_color))
188
+ if trajectory is not None:
189
+ meta = []
190
+ if trajectory.trace_id:
191
+ meta.append(f'trace_id={trajectory.trace_id}')
192
+ if trajectory.framework:
193
+ meta.append(f'framework={trajectory.framework}')
194
+ if trajectory.goal:
195
+ meta.append(f'goal={trajectory.goal!r}')
196
+ if meta:
197
+ lines.append(indent + _wrap_color(' '.join(meta), 'meta', use_color))
198
+ lines.append('')
199
+
200
+ for idx, frame in enumerate(frames):
201
+ lines.extend(_format_frame(frame, indent=indent, use_color=use_color))
202
+ if idx < len(frames) - 1:
203
+ lines.append(indent + _wrap_color('↓ cascaded to', 'arrow', use_color))
204
+
205
+ # Tail summary — analogue to "TypeError: ..." in Python tracebacks.
206
+ final = frames[-1].finding
207
+ summary = report.summary or final.failure_mode.name
208
+ tail = (
209
+ f'AgentFailure[{final.failure_mode.mode_id}]: '
210
+ f'{summary}'
211
+ )
212
+ lines.append('')
213
+ lines.append(_wrap_color(tail, 'failure', use_color))
214
+ return '\n'.join(lines)
215
+
216
+
217
+ def _format_frame(
218
+ frame: CascadeFrame, *, indent: str, use_color: bool
219
+ ) -> List[str]:
220
+ f = frame.finding
221
+ event = frame.event
222
+ role = 'root cause' if frame.depth == 0 else f'cascade depth {frame.depth}'
223
+ header_parts = [
224
+ f'Step {f.step_index if f.step_index is not None else "?"}',
225
+ f'agent={f.agent_name or "?"}',
226
+ f'mode={f.failure_mode.mode_id}',
227
+ f'confidence={f.confidence:.2f}',
228
+ ]
229
+ header = f' File "{role}", in trajectory'
230
+ sub = f' {" ".join(header_parts)}'
231
+
232
+ lines: List[str] = [
233
+ indent + _wrap_color(header, 'frame', use_color),
234
+ indent + _wrap_color(sub, 'frame-meta', use_color),
235
+ ]
236
+ if event is not None:
237
+ if event.module:
238
+ lines.append(indent + f' module={event.module}')
239
+ if event.event_id:
240
+ lines.append(indent + f' event_id={event.event_id}')
241
+ if event.input is not None and str(event.input).strip():
242
+ lines.append(indent + f' input> {_truncate(event.input)}')
243
+ if event.output is not None and str(event.output).strip():
244
+ lines.append(indent + f' output> {_truncate(event.output)}')
245
+ if event.error:
246
+ lines.append(
247
+ indent
248
+ + _wrap_color(f' error> {_truncate(event.error)}', 'error', use_color)
249
+ )
250
+ if f.evidence:
251
+ lines.append(indent + ' evidence:')
252
+ for ev in f.evidence:
253
+ lines.append(indent + f' - {_truncate(ev, 220)}')
254
+ if f.suggestion:
255
+ lines.append(
256
+ indent
257
+ + _wrap_color(f' suggested: {_truncate(f.suggestion, 220)}', 'suggestion', use_color)
258
+ )
259
+ return lines
260
+
261
+
262
+ def _truncate(value: object, max_chars: int = 160) -> str:
263
+ text = '' if value is None else str(value)
264
+ text = text.replace('\n', ' ')
265
+ if len(text) > max_chars:
266
+ return text[:max_chars] + '…'
267
+ return text
268
+
269
+
270
+ def _min_step(findings: List[FailureFinding]) -> Optional[int]:
271
+ steps = [f.step_index for f in findings if f.step_index is not None]
272
+ return min(steps) if steps else None
273
+
274
+
275
+ # ---------------------------------------------------------------------------
276
+ # Tiny ANSI colorization (no dep)
277
+ # ---------------------------------------------------------------------------
278
+
279
+ _PALETTE = {
280
+ 'header': '\033[1;37m', # bold white
281
+ 'meta': '\033[2m', # dim
282
+ 'frame': '\033[1;36m', # cyan, bold
283
+ 'frame-meta': '\033[36m', # cyan
284
+ 'arrow': '\033[2;33m', # dim yellow
285
+ 'error': '\033[31m', # red
286
+ 'suggestion': '\033[32m', # green
287
+ 'failure': '\033[1;31m', # bold red
288
+ 'muted': '\033[2m',
289
+ }
290
+ _RESET = '\033[0m'
291
+
292
+
293
+ def _wrap_color(text: str, style: str, use_color: bool) -> str:
294
+ if not use_color:
295
+ return text
296
+ code = _PALETTE.get(style)
297
+ if not code:
298
+ return text
299
+ return f'{code}{text}{_RESET}'
300
+
301
+
302
+ __all__ = ['CascadeFrame', 'build_cascade', 'format_traceback']
@@ -84,7 +84,19 @@ def build_app(store: TraceStore) -> Any:
84
84
 
85
85
  @app.get('/', response_class=HTMLResponse)
86
86
  def index() -> str:
87
- return _INDEX_HTML
87
+ bootstrap: Dict[str, Any] = {'traces': [], 'selected': None}
88
+ trace_ids = store.list_traces()
89
+ bootstrap['traces'] = trace_ids
90
+ if trace_ids:
91
+ trajectory = store.load_trajectory(trace_ids[0])
92
+ if trajectory is not None:
93
+ report = HeuristicAnalyzer().analyze(trajectory)
94
+ bootstrap['selected'] = {
95
+ 'trajectory': _to_dict(trajectory),
96
+ 'report': _to_dict(report),
97
+ }
98
+ payload = json.dumps(bootstrap).replace('</', '<\\/')
99
+ return _INDEX_HTML.replace('__BOOTSTRAP_JSON__', payload)
88
100
 
89
101
  return app
90
102
 
@@ -161,7 +173,7 @@ _INDEX_HTML = """<!doctype html>
161
173
  }
162
174
  .side-section-title {
163
175
  color:var(--muted2); text-transform:uppercase; font-size:11px;
164
- font-weight:760; letter-spacing:.08em; margin:8px 0 8px;
176
+ font-weight:760; letter-spacing:0; margin:8px 0 8px;
165
177
  }
166
178
  .run-list { list-style:none; padding:0; margin:0; display:flex; flex-direction:column; gap:8px; }
167
179
  .run {
@@ -213,13 +225,13 @@ _INDEX_HTML = """<!doctype html>
213
225
  }
214
226
  .hero-main { padding:18px; }
215
227
  .kicker { color:var(--cyan); font-size:11px; text-transform:uppercase;
216
- letter-spacing:.12em; font-weight:800; }
228
+ letter-spacing:0; font-weight:800; }
217
229
  h1 { margin:8px 0 8px; font-size:26px; line-height:1.15; letter-spacing:0; }
218
230
  .goal { color:var(--muted); font-size:13px; line-height:1.45; max-width:92ch; }
219
231
  .meta-line { display:flex; gap:8px; flex-wrap:wrap; margin-top:15px; }
220
232
  .stats { display:grid; grid-template-columns:repeat(2, minmax(0,1fr)); gap:10px; padding:12px; }
221
233
  .stat { background:var(--panel2); border:1px solid #303434; border-radius:8px; padding:12px; }
222
- .stat-label { color:var(--muted2); font-size:11px; text-transform:uppercase; letter-spacing:.08em; }
234
+ .stat-label { color:var(--muted2); font-size:11px; text-transform:uppercase; letter-spacing:0; }
223
235
  .stat-value { margin-top:7px; font-size:22px; line-height:1; font-weight:760; }
224
236
  .stat-value.bad { color:var(--rose); }
225
237
  .stat-value.warn { color:var(--amber); }
@@ -233,6 +245,16 @@ _INDEX_HTML = """<!doctype html>
233
245
  .panel-title { font-size:13px; font-weight:760; }
234
246
  .panel-body { padding:14px; }
235
247
  .timeline { display:flex; flex-direction:column; gap:10px; }
248
+ .trace-legend {
249
+ display:grid; grid-template-columns:minmax(0,1fr) minmax(0,1fr); gap:10px;
250
+ margin-bottom:10px;
251
+ }
252
+ .legend-cell {
253
+ border:1px solid #303434; background:#171919; border-radius:8px; padding:10px;
254
+ min-width:0;
255
+ }
256
+ .legend-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
257
+ .legend-title { margin-top:4px; font-size:13px; font-weight:760; }
236
258
  .event {
237
259
  display:grid; grid-template-columns:58px minmax(0,1fr); gap:12px;
238
260
  border:1px solid #2c302f; border-radius:8px; background:#1a1c1c; padding:12px;
@@ -247,9 +269,31 @@ _INDEX_HTML = """<!doctype html>
247
269
  .event-title { display:flex; align-items:center; justify-content:space-between; gap:10px; }
248
270
  .event-agent { font-size:14px; font-weight:760; }
249
271
  .event-type { color:var(--muted); font-size:12px; font-family:ui-monospace, monospace; }
272
+ .trace-pair {
273
+ margin-top:9px; display:grid; grid-template-columns:minmax(0,1fr) minmax(0,1fr);
274
+ gap:8px;
275
+ }
276
+ .lane {
277
+ min-width:0; border:1px solid #2b2f2e; background:#151717; border-radius:8px;
278
+ padding:10px;
279
+ }
280
+ .lane.agent-lane { border-color:#33403f; }
281
+ .lane.debug-lane { border-color:#3a3430; background:#181713; }
282
+ .event.root .lane.debug-lane { border-color:#80612d; background:#211a11; }
283
+ .lane-head { display:flex; align-items:center; justify-content:space-between; gap:8px; }
284
+ .lane-label {
285
+ color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0;
286
+ }
287
+ .lane-title { margin-top:6px; color:#f1f2ee; font-size:13px; line-height:1.3; font-weight:720; }
288
+ .lane-copy { margin-top:7px; color:#d9ddd5; font-size:12px; line-height:1.45; overflow-wrap:anywhere; }
289
+ .lane-meta { margin-top:9px; display:flex; gap:6px; flex-wrap:wrap; }
290
+ .trace-link {
291
+ margin-top:8px; color:var(--muted); font-size:11px; line-height:1.4;
292
+ font-family:ui-monospace, SFMono-Regular, Consolas, monospace;
293
+ }
250
294
  .event-grid { margin-top:8px; display:grid; grid-template-columns:1fr 1fr; gap:8px; }
251
295
  .field { min-width:0; border:1px solid #2b2f2e; background:#151717; border-radius:8px; padding:9px; }
252
- .field-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:.08em; }
296
+ .field-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
253
297
  .field-value { margin-top:5px; color:#d9ddd5; font-size:12px; line-height:1.4;
254
298
  overflow-wrap:anywhere; }
255
299
  .field.error { border-color:#66333a; background:#211619; }
@@ -265,7 +309,7 @@ _INDEX_HTML = """<!doctype html>
265
309
  .root-card { border-left:4px solid var(--amber); }
266
310
  .root-grid { display:grid; grid-template-columns:repeat(3,minmax(0,1fr)); gap:8px; margin-top:10px; }
267
311
  .mini { border:1px solid #303434; border-radius:8px; padding:9px; background:#171919; min-width:0; }
268
- .mini-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:.08em; }
312
+ .mini-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
269
313
  .mini-value { margin-top:6px; font-size:13px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
270
314
  .flow { display:grid; gap:8px; }
271
315
  .flow-item {
@@ -284,6 +328,7 @@ _INDEX_HTML = """<!doctype html>
284
328
  .sidebar { border-right:0; border-bottom:1px solid var(--line); }
285
329
  .workspace { height:auto; }
286
330
  .topbar { position:static; }
331
+ .trace-legend, .trace-pair { grid-template-columns:1fr; }
287
332
  }
288
333
  </style>
289
334
  </head>
@@ -324,6 +369,7 @@ _INDEX_HTML = """<!doctype html>
324
369
  </section>
325
370
  </div>
326
371
  <script>
372
+ const BOOTSTRAP = __BOOTSTRAP_JSON__;
327
373
  async function api(path) {
328
374
  const r = await fetch(path);
329
375
  if (!r.ok) throw new Error('HTTP ' + r.status);
@@ -367,6 +413,20 @@ async function loadTraceList() {
367
413
  document.getElementById('detail').innerHTML = '<div class="empty">No traces in store.</div>';
368
414
  }
369
415
  }
416
+ function renderTraceList(traceIds, selectedId) {
417
+ const ul = document.getElementById('trace-list');
418
+ ul.innerHTML = '';
419
+ document.getElementById('trace-count').textContent = traceIds.length + ' trace' + (traceIds.length === 1 ? '' : 's') + ' in local store';
420
+ traceIds.forEach((tid) => {
421
+ const li = document.createElement('li');
422
+ li.className = 'run' + (tid === selectedId ? ' active' : '');
423
+ li.innerHTML = '<div class="run-id">' + escapeHtml(tid) + '</div>' +
424
+ '<div class="run-meta"><span class="chip bad">failed</span><span class="chip">SQLite</span></div>';
425
+ li.dataset.tid = tid;
426
+ li.onclick = () => { selectTrace(tid, li); };
427
+ ul.appendChild(li);
428
+ });
429
+ }
370
430
  async function selectTrace(tid, li) {
371
431
  document.querySelectorAll('.run').forEach(el => el.classList.remove('active'));
372
432
  li.classList.add('active');
@@ -403,8 +463,11 @@ function renderTrace(traj, report) {
403
463
  html += '</div></div>';
404
464
 
405
465
  html += '<div class="layout">';
406
- html += '<div class="panel"><div class="panel-head"><div class="panel-title">Execution Timeline</div><span class="chip">who / when / evidence</span></div><div class="panel-body"><div class="timeline">';
407
- for (const ev of events) html += renderEvent(ev, ev.event_id === rootId);
466
+ html += '<div class="panel"><div class="panel-head"><div class="panel-title">Agent Trace + Error Trace Alignment</div><span class="chip">native span -> diagnosis</span></div><div class="panel-body">';
467
+ html += '<div class="trace-legend"><div class="legend-cell"><div class="legend-label">Agent native trace</div><div class="legend-title">What the agent logged, thought, called, or observed.</div></div>';
468
+ html += '<div class="legend-cell"><div class="legend-label">AgentDebugX error trace</div><div class="legend-title">Normalized failure signal, attribution, and repair hint for human review.</div></div></div>';
469
+ html += '<div class="timeline">';
470
+ for (const ev of events) html += renderEvent(ev, ev.event_id === rootId, findingForEvent(findings, ev.event_id));
408
471
  html += '</div></div></div>';
409
472
 
410
473
  html += '<div class="rail">';
@@ -440,13 +503,61 @@ function mini(label, value) {
440
503
  function flow(n, text) {
441
504
  return '<div class="flow-item"><div class="flow-dot">' + n + '</div><div>' + escapeHtml(text) + '</div></div>';
442
505
  }
443
- function renderEvent(ev, isRoot) {
506
+ function findingForEvent(findings, eventId) {
507
+ return (findings || []).find(f => f.event_id === eventId) || null;
508
+ }
509
+ function nativeTrace(ev) {
510
+ const meta = ev.metadata || {};
511
+ const native = meta.native_trace || {};
512
+ const fallback = truncate(ev.output || ev.input || ev.error || 'Recorded framework event.', 180);
513
+ const tags = Array.isArray(native.tags) ? native.tags : [ev.module || 'module', ev.event_type || 'event'];
514
+ return {
515
+ span: native.span_id || ev.event_id || '-',
516
+ title: native.title || ((ev.agent_name || 'agent') + ' / ' + (ev.event_type || 'event')),
517
+ body: native.message || fallback,
518
+ tags: tags,
519
+ state: native.state || native.tool || ''
520
+ };
521
+ }
522
+ function errorTrace(ev, finding) {
523
+ const meta = ev.metadata || {};
524
+ const overlay = meta.error_trace || {};
525
+ const mode = overlay.failure_mode || finding?.failure_mode?.mode_id || (eventProblem(ev) ? 'unclassified.signal' : 'context');
526
+ const title = overlay.title || finding?.failure_mode?.name || (eventProblem(ev) ? 'Failure signal detected' : 'Context event');
527
+ const body = overlay.human_readout || finding?.suggestion || (eventProblem(ev)
528
+ ? 'AgentDebugX keeps this event in the failure trace because it contains an error, lost-context signal, or invalid state transition.'
529
+ : 'No local failure signal; shown to preserve the causal path for the reviewer.');
530
+ const severity = overlay.severity || (finding ? 'high' : (eventProblem(ev) ? 'medium' : 'context'));
531
+ const repair = overlay.repair || finding?.suggestion || '';
532
+ return {mode, title, body, severity, repair};
533
+ }
534
+ function severityClass(severity) {
535
+ if (severity === 'critical' || severity === 'high') return 'bad';
536
+ if (severity === 'medium') return 'warn';
537
+ if (severity === 'context') return '';
538
+ return 'cyan';
539
+ }
540
+ function renderEvent(ev, isRoot, finding) {
541
+ const native = nativeTrace(ev);
542
+ const debug = errorTrace(ev, finding);
444
543
  let html = '<div class="event ' + (isRoot ? 'root' : '') + '">';
445
544
  html += '<div class="step-index">' + escapeHtml(ev.step_index ?? '-') + '</div>';
446
545
  html += '<div><div class="event-title"><div><div class="event-agent">' + escapeHtml(ev.agent_name || 'agent') + '</div>';
447
546
  html += '<div class="event-type">' + escapeHtml(ev.event_type || '') + ' / ' + escapeHtml(ev.module || 'module') + '</div></div>';
448
547
  html += isRoot ? '<span class="chip warn">root candidate</span>' : (eventProblem(ev) ? '<span class="chip bad">signal</span>' : '<span class="chip good">ok</span>');
449
- html += '</div><div class="event-grid">';
548
+ html += '</div><div class="trace-pair">';
549
+ html += '<div class="lane agent-lane"><div class="lane-head"><div class="lane-label">Agent native trace</div><span class="chip">' + escapeHtml(native.span) + '</span></div>';
550
+ html += '<div class="lane-title">' + escapeHtml(native.title) + '</div>';
551
+ html += '<div class="lane-copy">' + escapeHtml(native.body) + '</div>';
552
+ html += '<div class="lane-meta">' + (native.tags || []).map(t => '<span class="chip">' + escapeHtml(t) + '</span>').join('') + '</div>';
553
+ if (native.state) html += '<div class="trace-link">' + escapeHtml(native.state) + '</div>';
554
+ html += '</div>';
555
+ html += '<div class="lane debug-lane"><div class="lane-head"><div class="lane-label">AgentDebugX error trace</div><span class="chip ' + severityClass(debug.severity) + '">' + escapeHtml(debug.severity) + '</span></div>';
556
+ html += '<div class="lane-title">' + escapeHtml(debug.title) + '</div>';
557
+ html += '<div class="lane-copy">' + escapeHtml(debug.body) + '</div>';
558
+ html += '<div class="lane-meta"><span class="chip ' + (finding ? familyClass(finding.failure_mode?.family) : '') + '">' + escapeHtml(debug.mode) + '</span></div>';
559
+ if (debug.repair) html += '<div class="trace-link">repair: ' + escapeHtml(debug.repair) + '</div>';
560
+ html += '</div></div><div class="event-grid">';
450
561
  html += field('Input', truncate(ev.input, 132), false);
451
562
  html += field('Output', truncate(ev.output, 132), false);
452
563
  html += field('Error', truncate(ev.error, 132), Boolean(ev.error));
@@ -469,6 +580,15 @@ function renderFinding(f) {
469
580
  html += '</div>';
470
581
  return html;
471
582
  }
583
+ if (BOOTSTRAP && BOOTSTRAP.traces) {
584
+ const selected = BOOTSTRAP.selected ? BOOTSTRAP.selected.trajectory.trace_id : null;
585
+ renderTraceList(BOOTSTRAP.traces, selected);
586
+ if (BOOTSTRAP.selected) {
587
+ renderTrace(BOOTSTRAP.selected.trajectory, BOOTSTRAP.selected.report);
588
+ } else {
589
+ document.getElementById('detail').innerHTML = '<div class="empty">No traces in store.</div>';
590
+ }
591
+ }
472
592
  loadTraceList();
473
593
  </script>
474
594
  </body>
File without changes
File without changes