agentdebugx 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/PKG-INFO +5 -1
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/README.md +4 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/20_deep_debug.md +46 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/pyproject.toml +1 -1
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/__init__.py +5 -1
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/cli.py +42 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/deep.py +23 -2
- agentdebugx-0.2.1/src/agentdebug/traceback.py +302 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/ui/server.py +130 -10
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/LICENSE +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/00_overview.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/01_literature_survey.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/02_architecture.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/03_taxonomy.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/04_trace_schema.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/05_adapters.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/06_detectors.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/07_attribution.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/08_recovery.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/09_error_database.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/10_taxonomy_induction.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/11_multimodal.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/12_ui_dashboard.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/13_class_design.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/14_api_reference.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/15_roadmap.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/16_governance.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/17_claude_code_design_patterns.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/18_comparison_codex_vs_design.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/19_error_hub.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/21_integrations.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/ERROR_TAXONOMY.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/OPEN_SOURCE_DEVELOPMENT_PLAN.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/README.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/RESEARCH_SURVEY.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/benchmarks/v0_1_smoke.json +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/docs/benchmarks/v0_1_smoke.md +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/__init__.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/base.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/langgraph.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/otel.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/adapters/raw.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/analyzers.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/attribution.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/events.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/__init__.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/backend_base.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/backends.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/bundle.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/hub/scrub.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/instrumentation.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/__init__.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/claude_skill.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/integrations/openhands.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/judges.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/llm.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/models.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/recorder.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/recovery.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/storage.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/taxonomy.py +0 -0
- {agentdebugx-0.2.0 → agentdebugx-0.2.1}/src/agentdebug/ui/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentdebugx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Portable error analysis, tracing, and recovery framework for agentic AI systems. Import as `agentdebug`.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -187,6 +187,10 @@ agentdebug serve --store-sqlite .agentdebug/errors.sqlite
|
|
|
187
187
|
# DeepDebug — iterative multi-turn analysis (plan -> hypothesize -> verify -> refine)
|
|
188
188
|
agentdebug deep <trajectory.json>
|
|
189
189
|
|
|
190
|
+
# Render the cascade as a Python-traceback (root cause first, manifested failure last)
|
|
191
|
+
agentdebug deep <trajectory.json> --traceback
|
|
192
|
+
agentdebug analyze <trajectory.json> --traceback # works without an LLM too
|
|
193
|
+
|
|
190
194
|
# Error Hub: package + push a trace to a Git remote or HF dataset
|
|
191
195
|
agentdebug hub push <trace_id> \
|
|
192
196
|
--to git:git@github.com:your-org/agentdebug-bundles.git#bundles \
|
|
@@ -147,6 +147,10 @@ agentdebug serve --store-sqlite .agentdebug/errors.sqlite
|
|
|
147
147
|
# DeepDebug — iterative multi-turn analysis (plan -> hypothesize -> verify -> refine)
|
|
148
148
|
agentdebug deep <trajectory.json>
|
|
149
149
|
|
|
150
|
+
# Render the cascade as a Python-traceback (root cause first, manifested failure last)
|
|
151
|
+
agentdebug deep <trajectory.json> --traceback
|
|
152
|
+
agentdebug analyze <trajectory.json> --traceback # works without an LLM too
|
|
153
|
+
|
|
150
154
|
# Error Hub: package + push a trace to a Git remote or HF dataset
|
|
151
155
|
agentdebug hub push <trace_id> \
|
|
152
156
|
--to git:git@github.com:your-org/agentdebug-bundles.git#bundles \
|
|
@@ -115,6 +115,52 @@ rounds : plan (4.6s) hypothesize (11.0s)
|
|
|
115
115
|
The single-pass `LLMJudgeAnalyzer` on the same trace returned only the first
|
|
116
116
|
finding. DeepDebug recovered the full cascade and selected the upstream cause.
|
|
117
117
|
|
|
118
|
+
## 6.1 AgentTraceback — Python-traceback-style cascade view
|
|
119
|
+
|
|
120
|
+
Once DeepDebug has populated `finding.metadata['cascading_from_event_id']`,
|
|
121
|
+
`agentdebug.traceback.format_traceback(report, trajectory)` renders the
|
|
122
|
+
cascade in a layout that mirrors Python's `Traceback (most recent call last)`
|
|
123
|
+
— root cause first, manifested failure last, with arrows between hops:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
AgentTraceback (root cause first, manifested failure last):
|
|
127
|
+
trace_id=trace_… framework=live-cascade-demo goal='Find latest paper, summarize, then email …'
|
|
128
|
+
|
|
129
|
+
File "root cause", in trajectory
|
|
130
|
+
Step 3 agent=search mode=action.parameter_error confidence=1.00
|
|
131
|
+
module=action
|
|
132
|
+
error> JSON schema validation failed: missing parameter query
|
|
133
|
+
evidence:
|
|
134
|
+
- args={}
|
|
135
|
+
suggested: Validate parameters against tool schemas before execution.
|
|
136
|
+
↓ cascaded to
|
|
137
|
+
File "cascade depth 1", in trajectory
|
|
138
|
+
Step 4 agent=planner mode=verification.premature_stop confidence=1.00
|
|
139
|
+
output> Final answer: AgentDebug is a popular paper.
|
|
140
|
+
↓ cascaded to
|
|
141
|
+
File "cascade depth 1", in trajectory
|
|
142
|
+
Step 4 agent=planner mode=memory.hallucination confidence=0.95
|
|
143
|
+
output> Final answer: AgentDebug is a popular paper.
|
|
144
|
+
|
|
145
|
+
AgentFailure[memory.hallucination]: The search agent failed to provide the
|
|
146
|
+
required 'query' parameter in its tool call, leading to a tool error. The
|
|
147
|
+
planner then hallucinated a generic fact about the paper and prematurely
|
|
148
|
+
terminated the task without completing the summary or email steps.
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
CLI:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
agentdebug deep <trajectory.json> --traceback # render to stdout
|
|
155
|
+
agentdebug analyze <trajectory.json> --traceback # works for rule analyzer too
|
|
156
|
+
agentdebug judge <traj|trace_id> --attribute --traceback
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
When DeepDebug isn't available (heuristic analyzer or single-pass judge),
|
|
160
|
+
the renderer falls back to **step-index ordering** — the earliest finding
|
|
161
|
+
becomes the root and later findings cascade from it. This means
|
|
162
|
+
`--traceback` works on any analyzer in the pipeline, not just DeepDebug.
|
|
163
|
+
|
|
118
164
|
## 7. Failure modes
|
|
119
165
|
|
|
120
166
|
- **Cost blowout** — if `max_hypotheses_to_verify` is high and verify is
|
|
@@ -29,6 +29,7 @@ from agentdebug.models import (
|
|
|
29
29
|
)
|
|
30
30
|
from agentdebug.recorder import AgentDebug, TraceSession
|
|
31
31
|
from agentdebug.recovery import FixProposal, Recoverer, ReflexionSuggestion
|
|
32
|
+
from agentdebug.traceback import CascadeFrame, build_cascade, format_traceback
|
|
32
33
|
from agentdebug.storage import JsonlTraceStore, SQLiteTraceStore
|
|
33
34
|
from agentdebug.taxonomy import SEED_FAILURE_MODES, get_failure_mode
|
|
34
35
|
|
|
@@ -42,6 +43,9 @@ __all__ = [
|
|
|
42
43
|
'Attributor',
|
|
43
44
|
'Blame',
|
|
44
45
|
'BusEvent',
|
|
46
|
+
'CascadeFrame',
|
|
47
|
+
'build_cascade',
|
|
48
|
+
'format_traceback',
|
|
45
49
|
'DEFAULT_BUS',
|
|
46
50
|
'DiagnosticReport',
|
|
47
51
|
'EventBus',
|
|
@@ -62,4 +66,4 @@ __all__ = [
|
|
|
62
66
|
'get_failure_mode',
|
|
63
67
|
]
|
|
64
68
|
|
|
65
|
-
__version__ = '0.2.
|
|
69
|
+
__version__ = '0.2.1'
|
|
@@ -36,6 +36,15 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
|
36
36
|
action='store_true',
|
|
37
37
|
help='Also emit Reflexion-style retry suggestions for each finding',
|
|
38
38
|
)
|
|
39
|
+
p_analyze.add_argument(
|
|
40
|
+
'--traceback',
|
|
41
|
+
action='store_true',
|
|
42
|
+
help='Render a Python-traceback-style cascade view instead of JSON',
|
|
43
|
+
)
|
|
44
|
+
p_analyze.add_argument(
|
|
45
|
+
'--no-color', action='store_true',
|
|
46
|
+
help='Disable ANSI colors in --traceback output (default: auto)',
|
|
47
|
+
)
|
|
39
48
|
|
|
40
49
|
p_list = sub.add_parser('list', help='List trace IDs in a store')
|
|
41
50
|
_add_store_args(p_list)
|
|
@@ -131,6 +140,11 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
|
131
140
|
p_deep.add_argument('--base-url', dest='base_url')
|
|
132
141
|
p_deep.add_argument('--api-key', dest='api_key')
|
|
133
142
|
p_deep.add_argument('--out', help='Optional output path for the report JSON')
|
|
143
|
+
p_deep.add_argument(
|
|
144
|
+
'--traceback', action='store_true',
|
|
145
|
+
help='Render a Python-traceback-style cascade view to stdout',
|
|
146
|
+
)
|
|
147
|
+
p_deep.add_argument('--no-color', action='store_true')
|
|
134
148
|
|
|
135
149
|
args = parser.parse_args(argv)
|
|
136
150
|
if args.command == 'analyze':
|
|
@@ -161,6 +175,14 @@ def _cmd_analyze(args: argparse.Namespace) -> int:
|
|
|
161
175
|
trajectory_path = Path(args.trajectory)
|
|
162
176
|
trajectory = trajectory_from_json(trajectory_path.read_text(encoding='utf-8'))
|
|
163
177
|
report = HeuristicAnalyzer().analyze(trajectory)
|
|
178
|
+
if args.traceback:
|
|
179
|
+
from agentdebug.traceback import format_traceback
|
|
180
|
+
|
|
181
|
+
text = format_traceback(
|
|
182
|
+
report, trajectory, use_color=not args.no_color and sys.stdout.isatty()
|
|
183
|
+
)
|
|
184
|
+
_emit(text, args.out)
|
|
185
|
+
return 0
|
|
164
186
|
rendered = model_to_json(report, indent=2)
|
|
165
187
|
if args.suggest:
|
|
166
188
|
proposals = ReflexionSuggestion().suggest(trajectory, report)
|
|
@@ -232,6 +254,17 @@ def _cmd_judge(args: argparse.Namespace) -> int:
|
|
|
232
254
|
if args.attribute:
|
|
233
255
|
blame = AllAtOnceAttributor(llm=llm).attribute(trajectory, report.findings)
|
|
234
256
|
rendered = _augment_with_blame(rendered, blame)
|
|
257
|
+
if args.traceback:
|
|
258
|
+
from agentdebug.traceback import format_traceback
|
|
259
|
+
|
|
260
|
+
rendered = (
|
|
261
|
+
rendered
|
|
262
|
+
+ '\n\n# === AgentTraceback ===\n'
|
|
263
|
+
+ format_traceback(
|
|
264
|
+
report, trajectory,
|
|
265
|
+
use_color=not args.no_color and sys.stdout.isatty(),
|
|
266
|
+
)
|
|
267
|
+
)
|
|
235
268
|
_emit(rendered, args.out)
|
|
236
269
|
return 0
|
|
237
270
|
|
|
@@ -394,6 +427,15 @@ def _cmd_deep(args: argparse.Namespace) -> int:
|
|
|
394
427
|
for r in result.rounds:
|
|
395
428
|
print(f' {r.name:>20} {r.duration_ms:>6} ms', file=sys.stderr)
|
|
396
429
|
_emit(out_text, args.out)
|
|
430
|
+
if args.traceback:
|
|
431
|
+
from agentdebug.traceback import format_traceback
|
|
432
|
+
|
|
433
|
+
text = format_traceback(
|
|
434
|
+
result.report, trajectory,
|
|
435
|
+
use_color=not args.no_color and sys.stdout.isatty(),
|
|
436
|
+
)
|
|
437
|
+
print()
|
|
438
|
+
print(text)
|
|
397
439
|
return 0
|
|
398
440
|
|
|
399
441
|
|
|
@@ -200,6 +200,11 @@ class DeepDebugAnalyzer:
|
|
|
200
200
|
|
|
201
201
|
def analyze(self, trajectory: AgentTrajectory) -> DeepDebugResult:
|
|
202
202
|
rounds: List[DeepDebugRound] = []
|
|
203
|
+
# Per-event-id lookup of the most recent verified cascade predecessor;
|
|
204
|
+
# populated by _verify and consumed in _compose_report so the cascade
|
|
205
|
+
# info survives the verify -> refine handoff even when the refine LLM
|
|
206
|
+
# doesn't echo it back verbatim.
|
|
207
|
+
self._cascade_lookup: Dict[str, str] = {}
|
|
203
208
|
|
|
204
209
|
plan = self._plan(trajectory, rounds)
|
|
205
210
|
raw_focus = plan.get('focus_event_ids') or []
|
|
@@ -309,6 +314,10 @@ class DeepDebugAnalyzer:
|
|
|
309
314
|
hypothesis.cascading_from_event_id = self._opt_str(
|
|
310
315
|
parsed.get('cascading_from_event_id')
|
|
311
316
|
)
|
|
317
|
+
if hypothesis.event_id and hypothesis.cascading_from_event_id:
|
|
318
|
+
self._cascade_lookup[hypothesis.event_id] = (
|
|
319
|
+
hypothesis.cascading_from_event_id
|
|
320
|
+
)
|
|
312
321
|
|
|
313
322
|
def _refine(
|
|
314
323
|
self,
|
|
@@ -427,16 +436,28 @@ class DeepDebugAnalyzer:
|
|
|
427
436
|
mode = SEED_FAILURE_MODES.get(mid)
|
|
428
437
|
if mode is None:
|
|
429
438
|
continue
|
|
439
|
+
event_id = self._opt_str(raw.get('event_id'))
|
|
440
|
+
# Carry the cascade predecessor we extracted in verify so the
|
|
441
|
+
# AgentTraceback renderer can chain findings.
|
|
442
|
+
cascade_from: Optional[str] = None
|
|
443
|
+
if event_id is not None:
|
|
444
|
+
cascade_from = self._cascade_lookup.get(event_id)
|
|
445
|
+
cascade_from_raw = self._opt_str(raw.get('cascading_from_event_id'))
|
|
446
|
+
if cascade_from_raw:
|
|
447
|
+
cascade_from = cascade_from_raw
|
|
448
|
+
finding_metadata: Dict[str, Any] = {'source': 'deep_debug'}
|
|
449
|
+
if cascade_from:
|
|
450
|
+
finding_metadata['cascading_from_event_id'] = cascade_from
|
|
430
451
|
findings.append(FailureFinding(
|
|
431
452
|
finding_id=new_id('finding'),
|
|
432
453
|
failure_mode=mode,
|
|
433
|
-
event_id=
|
|
454
|
+
event_id=event_id,
|
|
434
455
|
agent_name=self._opt_str(raw.get('agent_name')),
|
|
435
456
|
step_index=self._opt_int(raw.get('step_index')),
|
|
436
457
|
confidence=self._opt_float(raw.get('confidence'), 0.5),
|
|
437
458
|
evidence=self._str_list(raw.get('evidence')),
|
|
438
459
|
suggestion=self._suggestion(mode),
|
|
439
|
-
metadata=
|
|
460
|
+
metadata=finding_metadata,
|
|
440
461
|
))
|
|
441
462
|
root = parsed.get('root_cause') or {}
|
|
442
463
|
report = DiagnosticReport(
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Python-traceback-style rendering of cascading agent failures.
|
|
2
|
+
|
|
3
|
+
A diagnostic report contains a *set* of findings; what users actually want is
|
|
4
|
+
a *chain* that shows how a single root cause cascaded through later steps,
|
|
5
|
+
ending at the manifested failure — exactly the way a Python traceback walks
|
|
6
|
+
from the outermost frame to the raised exception.
|
|
7
|
+
|
|
8
|
+
Two inputs feed the chain:
|
|
9
|
+
|
|
10
|
+
* DeepDebug findings populate ``finding.metadata['cascading_from_event_id']``
|
|
11
|
+
with the predecessor's event ID, so the cascade is explicit and verified.
|
|
12
|
+
* Heuristic / single-shot LLM judges don't compute a cascade, so we fall
|
|
13
|
+
back to **step-index ordering** with the earliest finding as the root.
|
|
14
|
+
|
|
15
|
+
Public API::
|
|
16
|
+
|
|
17
|
+
from agentdebug.traceback import format_traceback
|
|
18
|
+
print(format_traceback(report, trajectory))
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from typing import Dict, List, Optional
|
|
25
|
+
|
|
26
|
+
from agentdebug.models import AgentEvent, AgentTrajectory, DiagnosticReport, FailureFinding
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class CascadeFrame:
|
|
31
|
+
"""One frame in the cascade — analogous to one Python traceback line."""
|
|
32
|
+
|
|
33
|
+
finding: FailureFinding
|
|
34
|
+
event: Optional[AgentEvent]
|
|
35
|
+
cascades_from_event_id: Optional[str] = None
|
|
36
|
+
depth: int = 0 # 0 = root cause; deepest = manifested failure
|
|
37
|
+
children_event_ids: List[str] = field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Cascade construction
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
def build_cascade(
|
|
45
|
+
report: DiagnosticReport,
|
|
46
|
+
trajectory: Optional[AgentTrajectory] = None,
|
|
47
|
+
) -> List[CascadeFrame]:
|
|
48
|
+
"""Build an ordered chain of frames (root → manifested) from a report.
|
|
49
|
+
|
|
50
|
+
Uses ``finding.metadata['cascading_from_event_id']`` when present;
|
|
51
|
+
otherwise falls back to step-index ordering.
|
|
52
|
+
"""
|
|
53
|
+
if not report.findings:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
events_by_id: Dict[str, AgentEvent] = {}
|
|
57
|
+
if trajectory is not None:
|
|
58
|
+
for evt in trajectory.events:
|
|
59
|
+
events_by_id[evt.event_id] = evt
|
|
60
|
+
|
|
61
|
+
# Group findings by event_id so duplicates collapse cleanly.
|
|
62
|
+
by_event: Dict[str, List[FailureFinding]] = {}
|
|
63
|
+
orphans: List[FailureFinding] = []
|
|
64
|
+
for f in report.findings:
|
|
65
|
+
if f.event_id:
|
|
66
|
+
by_event.setdefault(f.event_id, []).append(f)
|
|
67
|
+
else:
|
|
68
|
+
orphans.append(f)
|
|
69
|
+
|
|
70
|
+
# Extract predecessor links.
|
|
71
|
+
predecessor: Dict[str, Optional[str]] = {}
|
|
72
|
+
for event_id, findings in by_event.items():
|
|
73
|
+
# Best predecessor wins (any non-null among the findings on this event).
|
|
74
|
+
cand: Optional[str] = None
|
|
75
|
+
for f in findings:
|
|
76
|
+
meta_value = f.metadata.get('cascading_from_event_id')
|
|
77
|
+
if isinstance(meta_value, str) and meta_value:
|
|
78
|
+
cand = meta_value
|
|
79
|
+
break
|
|
80
|
+
predecessor[event_id] = cand
|
|
81
|
+
|
|
82
|
+
# Determine root: prefer report.root_cause_event_id, else the finding with
|
|
83
|
+
# the smallest step_index (None pushed to the end), then highest confidence.
|
|
84
|
+
root_event_id: Optional[str] = report.root_cause_event_id
|
|
85
|
+
if root_event_id not in by_event:
|
|
86
|
+
ordered = sorted(
|
|
87
|
+
by_event.items(),
|
|
88
|
+
key=lambda kv: (
|
|
89
|
+
_min_step(kv[1]) is None,
|
|
90
|
+
_min_step(kv[1]) if _min_step(kv[1]) is not None else 10**9,
|
|
91
|
+
-max(f.confidence for f in kv[1]),
|
|
92
|
+
),
|
|
93
|
+
)
|
|
94
|
+
root_event_id = ordered[0][0] if ordered else None
|
|
95
|
+
|
|
96
|
+
chain_event_ids: List[str] = []
|
|
97
|
+
if root_event_id is not None:
|
|
98
|
+
# Walk descendants from root using the predecessor map (reverse it).
|
|
99
|
+
descendants: Dict[str, List[str]] = {}
|
|
100
|
+
for child, parent in predecessor.items():
|
|
101
|
+
if parent and parent in by_event:
|
|
102
|
+
descendants.setdefault(parent, []).append(child)
|
|
103
|
+
|
|
104
|
+
visited: set[str] = set()
|
|
105
|
+
|
|
106
|
+
def dfs(node: str) -> None:
|
|
107
|
+
if node in visited:
|
|
108
|
+
return
|
|
109
|
+
visited.add(node)
|
|
110
|
+
chain_event_ids.append(node)
|
|
111
|
+
# Sort children by step_index so we walk forward in time.
|
|
112
|
+
children = sorted(
|
|
113
|
+
descendants.get(node, []),
|
|
114
|
+
key=lambda eid: _min_step(by_event[eid]) or 10**9,
|
|
115
|
+
)
|
|
116
|
+
for child in children:
|
|
117
|
+
dfs(child)
|
|
118
|
+
|
|
119
|
+
dfs(root_event_id)
|
|
120
|
+
|
|
121
|
+
# Any disconnected findings: append by step order at the end.
|
|
122
|
+
leftover = [
|
|
123
|
+
eid for eid in by_event
|
|
124
|
+
if eid not in visited
|
|
125
|
+
]
|
|
126
|
+
leftover.sort(
|
|
127
|
+
key=lambda eid: _min_step(by_event[eid]) or 10**9
|
|
128
|
+
)
|
|
129
|
+
chain_event_ids.extend(leftover)
|
|
130
|
+
else:
|
|
131
|
+
# No structural cascade info — fall back to step order.
|
|
132
|
+
chain_event_ids = sorted(
|
|
133
|
+
by_event.keys(),
|
|
134
|
+
key=lambda eid: _min_step(by_event[eid]) or 10**9,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
frames: List[CascadeFrame] = []
|
|
138
|
+
for depth, event_id in enumerate(chain_event_ids):
|
|
139
|
+
# If multiple findings on this event, emit one frame per finding but
|
|
140
|
+
# group them — earliest-confidence-tiebreak first.
|
|
141
|
+
ranked = sorted(
|
|
142
|
+
by_event[event_id],
|
|
143
|
+
key=lambda f: -f.confidence,
|
|
144
|
+
)
|
|
145
|
+
for f in ranked:
|
|
146
|
+
frames.append(CascadeFrame(
|
|
147
|
+
finding=f,
|
|
148
|
+
event=events_by_id.get(event_id),
|
|
149
|
+
cascades_from_event_id=predecessor.get(event_id),
|
|
150
|
+
depth=depth,
|
|
151
|
+
))
|
|
152
|
+
# Orphan findings (no event_id) appended at the end.
|
|
153
|
+
for f in orphans:
|
|
154
|
+
frames.append(CascadeFrame(
|
|
155
|
+
finding=f, event=None, cascades_from_event_id=None,
|
|
156
|
+
depth=len(chain_event_ids),
|
|
157
|
+
))
|
|
158
|
+
return frames
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Formatting
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
def format_traceback(
|
|
166
|
+
report: DiagnosticReport,
|
|
167
|
+
trajectory: Optional[AgentTrajectory] = None,
|
|
168
|
+
*,
|
|
169
|
+
use_color: bool = False,
|
|
170
|
+
indent: str = ' ',
|
|
171
|
+
) -> str:
|
|
172
|
+
"""Render a cascading agent-failure traceback.
|
|
173
|
+
|
|
174
|
+
Output mirrors Python's traceback shape: a header, frames ordered
|
|
175
|
+
*root → manifested*, then a final summary line that names the failure.
|
|
176
|
+
"""
|
|
177
|
+
frames = build_cascade(report, trajectory)
|
|
178
|
+
if not frames:
|
|
179
|
+
return _wrap_color(
|
|
180
|
+
'AgentTraceback: no findings recorded.',
|
|
181
|
+
'muted',
|
|
182
|
+
use_color,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
lines: List[str] = []
|
|
186
|
+
header = 'AgentTraceback (root cause first, manifested failure last):'
|
|
187
|
+
lines.append(_wrap_color(header, 'header', use_color))
|
|
188
|
+
if trajectory is not None:
|
|
189
|
+
meta = []
|
|
190
|
+
if trajectory.trace_id:
|
|
191
|
+
meta.append(f'trace_id={trajectory.trace_id}')
|
|
192
|
+
if trajectory.framework:
|
|
193
|
+
meta.append(f'framework={trajectory.framework}')
|
|
194
|
+
if trajectory.goal:
|
|
195
|
+
meta.append(f'goal={trajectory.goal!r}')
|
|
196
|
+
if meta:
|
|
197
|
+
lines.append(indent + _wrap_color(' '.join(meta), 'meta', use_color))
|
|
198
|
+
lines.append('')
|
|
199
|
+
|
|
200
|
+
for idx, frame in enumerate(frames):
|
|
201
|
+
lines.extend(_format_frame(frame, indent=indent, use_color=use_color))
|
|
202
|
+
if idx < len(frames) - 1:
|
|
203
|
+
lines.append(indent + _wrap_color('↓ cascaded to', 'arrow', use_color))
|
|
204
|
+
|
|
205
|
+
# Tail summary — analogue to "TypeError: ..." in Python tracebacks.
|
|
206
|
+
final = frames[-1].finding
|
|
207
|
+
summary = report.summary or final.failure_mode.name
|
|
208
|
+
tail = (
|
|
209
|
+
f'AgentFailure[{final.failure_mode.mode_id}]: '
|
|
210
|
+
f'{summary}'
|
|
211
|
+
)
|
|
212
|
+
lines.append('')
|
|
213
|
+
lines.append(_wrap_color(tail, 'failure', use_color))
|
|
214
|
+
return '\n'.join(lines)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _format_frame(
|
|
218
|
+
frame: CascadeFrame, *, indent: str, use_color: bool
|
|
219
|
+
) -> List[str]:
|
|
220
|
+
f = frame.finding
|
|
221
|
+
event = frame.event
|
|
222
|
+
role = 'root cause' if frame.depth == 0 else f'cascade depth {frame.depth}'
|
|
223
|
+
header_parts = [
|
|
224
|
+
f'Step {f.step_index if f.step_index is not None else "?"}',
|
|
225
|
+
f'agent={f.agent_name or "?"}',
|
|
226
|
+
f'mode={f.failure_mode.mode_id}',
|
|
227
|
+
f'confidence={f.confidence:.2f}',
|
|
228
|
+
]
|
|
229
|
+
header = f' File "{role}", in trajectory'
|
|
230
|
+
sub = f' {" ".join(header_parts)}'
|
|
231
|
+
|
|
232
|
+
lines: List[str] = [
|
|
233
|
+
indent + _wrap_color(header, 'frame', use_color),
|
|
234
|
+
indent + _wrap_color(sub, 'frame-meta', use_color),
|
|
235
|
+
]
|
|
236
|
+
if event is not None:
|
|
237
|
+
if event.module:
|
|
238
|
+
lines.append(indent + f' module={event.module}')
|
|
239
|
+
if event.event_id:
|
|
240
|
+
lines.append(indent + f' event_id={event.event_id}')
|
|
241
|
+
if event.input is not None and str(event.input).strip():
|
|
242
|
+
lines.append(indent + f' input> {_truncate(event.input)}')
|
|
243
|
+
if event.output is not None and str(event.output).strip():
|
|
244
|
+
lines.append(indent + f' output> {_truncate(event.output)}')
|
|
245
|
+
if event.error:
|
|
246
|
+
lines.append(
|
|
247
|
+
indent
|
|
248
|
+
+ _wrap_color(f' error> {_truncate(event.error)}', 'error', use_color)
|
|
249
|
+
)
|
|
250
|
+
if f.evidence:
|
|
251
|
+
lines.append(indent + ' evidence:')
|
|
252
|
+
for ev in f.evidence:
|
|
253
|
+
lines.append(indent + f' - {_truncate(ev, 220)}')
|
|
254
|
+
if f.suggestion:
|
|
255
|
+
lines.append(
|
|
256
|
+
indent
|
|
257
|
+
+ _wrap_color(f' suggested: {_truncate(f.suggestion, 220)}', 'suggestion', use_color)
|
|
258
|
+
)
|
|
259
|
+
return lines
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _truncate(value: object, max_chars: int = 160) -> str:
|
|
263
|
+
text = '' if value is None else str(value)
|
|
264
|
+
text = text.replace('\n', ' ')
|
|
265
|
+
if len(text) > max_chars:
|
|
266
|
+
return text[:max_chars] + '…'
|
|
267
|
+
return text
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _min_step(findings: List[FailureFinding]) -> Optional[int]:
|
|
271
|
+
steps = [f.step_index for f in findings if f.step_index is not None]
|
|
272
|
+
return min(steps) if steps else None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# ---------------------------------------------------------------------------
|
|
276
|
+
# Tiny ANSI colorization (no dep)
|
|
277
|
+
# ---------------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
_PALETTE = {
|
|
280
|
+
'header': '\033[1;37m', # bold white
|
|
281
|
+
'meta': '\033[2m', # dim
|
|
282
|
+
'frame': '\033[1;36m', # cyan, bold
|
|
283
|
+
'frame-meta': '\033[36m', # cyan
|
|
284
|
+
'arrow': '\033[2;33m', # dim yellow
|
|
285
|
+
'error': '\033[31m', # red
|
|
286
|
+
'suggestion': '\033[32m', # green
|
|
287
|
+
'failure': '\033[1;31m', # bold red
|
|
288
|
+
'muted': '\033[2m',
|
|
289
|
+
}
|
|
290
|
+
_RESET = '\033[0m'
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _wrap_color(text: str, style: str, use_color: bool) -> str:
|
|
294
|
+
if not use_color:
|
|
295
|
+
return text
|
|
296
|
+
code = _PALETTE.get(style)
|
|
297
|
+
if not code:
|
|
298
|
+
return text
|
|
299
|
+
return f'{code}{text}{_RESET}'
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
__all__ = ['CascadeFrame', 'build_cascade', 'format_traceback']
|
|
@@ -84,7 +84,19 @@ def build_app(store: TraceStore) -> Any:
|
|
|
84
84
|
|
|
85
85
|
@app.get('/', response_class=HTMLResponse)
|
|
86
86
|
def index() -> str:
|
|
87
|
-
|
|
87
|
+
bootstrap: Dict[str, Any] = {'traces': [], 'selected': None}
|
|
88
|
+
trace_ids = store.list_traces()
|
|
89
|
+
bootstrap['traces'] = trace_ids
|
|
90
|
+
if trace_ids:
|
|
91
|
+
trajectory = store.load_trajectory(trace_ids[0])
|
|
92
|
+
if trajectory is not None:
|
|
93
|
+
report = HeuristicAnalyzer().analyze(trajectory)
|
|
94
|
+
bootstrap['selected'] = {
|
|
95
|
+
'trajectory': _to_dict(trajectory),
|
|
96
|
+
'report': _to_dict(report),
|
|
97
|
+
}
|
|
98
|
+
payload = json.dumps(bootstrap).replace('</', '<\\/')
|
|
99
|
+
return _INDEX_HTML.replace('__BOOTSTRAP_JSON__', payload)
|
|
88
100
|
|
|
89
101
|
return app
|
|
90
102
|
|
|
@@ -161,7 +173,7 @@ _INDEX_HTML = """<!doctype html>
|
|
|
161
173
|
}
|
|
162
174
|
.side-section-title {
|
|
163
175
|
color:var(--muted2); text-transform:uppercase; font-size:11px;
|
|
164
|
-
font-weight:760; letter-spacing
|
|
176
|
+
font-weight:760; letter-spacing:0; margin:8px 0 8px;
|
|
165
177
|
}
|
|
166
178
|
.run-list { list-style:none; padding:0; margin:0; display:flex; flex-direction:column; gap:8px; }
|
|
167
179
|
.run {
|
|
@@ -213,13 +225,13 @@ _INDEX_HTML = """<!doctype html>
|
|
|
213
225
|
}
|
|
214
226
|
.hero-main { padding:18px; }
|
|
215
227
|
.kicker { color:var(--cyan); font-size:11px; text-transform:uppercase;
|
|
216
|
-
letter-spacing
|
|
228
|
+
letter-spacing:0; font-weight:800; }
|
|
217
229
|
h1 { margin:8px 0 8px; font-size:26px; line-height:1.15; letter-spacing:0; }
|
|
218
230
|
.goal { color:var(--muted); font-size:13px; line-height:1.45; max-width:92ch; }
|
|
219
231
|
.meta-line { display:flex; gap:8px; flex-wrap:wrap; margin-top:15px; }
|
|
220
232
|
.stats { display:grid; grid-template-columns:repeat(2, minmax(0,1fr)); gap:10px; padding:12px; }
|
|
221
233
|
.stat { background:var(--panel2); border:1px solid #303434; border-radius:8px; padding:12px; }
|
|
222
|
-
.stat-label { color:var(--muted2); font-size:11px; text-transform:uppercase; letter-spacing
|
|
234
|
+
.stat-label { color:var(--muted2); font-size:11px; text-transform:uppercase; letter-spacing:0; }
|
|
223
235
|
.stat-value { margin-top:7px; font-size:22px; line-height:1; font-weight:760; }
|
|
224
236
|
.stat-value.bad { color:var(--rose); }
|
|
225
237
|
.stat-value.warn { color:var(--amber); }
|
|
@@ -233,6 +245,16 @@ _INDEX_HTML = """<!doctype html>
|
|
|
233
245
|
.panel-title { font-size:13px; font-weight:760; }
|
|
234
246
|
.panel-body { padding:14px; }
|
|
235
247
|
.timeline { display:flex; flex-direction:column; gap:10px; }
|
|
248
|
+
.trace-legend {
|
|
249
|
+
display:grid; grid-template-columns:minmax(0,1fr) minmax(0,1fr); gap:10px;
|
|
250
|
+
margin-bottom:10px;
|
|
251
|
+
}
|
|
252
|
+
.legend-cell {
|
|
253
|
+
border:1px solid #303434; background:#171919; border-radius:8px; padding:10px;
|
|
254
|
+
min-width:0;
|
|
255
|
+
}
|
|
256
|
+
.legend-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
|
|
257
|
+
.legend-title { margin-top:4px; font-size:13px; font-weight:760; }
|
|
236
258
|
.event {
|
|
237
259
|
display:grid; grid-template-columns:58px minmax(0,1fr); gap:12px;
|
|
238
260
|
border:1px solid #2c302f; border-radius:8px; background:#1a1c1c; padding:12px;
|
|
@@ -247,9 +269,31 @@ _INDEX_HTML = """<!doctype html>
|
|
|
247
269
|
.event-title { display:flex; align-items:center; justify-content:space-between; gap:10px; }
|
|
248
270
|
.event-agent { font-size:14px; font-weight:760; }
|
|
249
271
|
.event-type { color:var(--muted); font-size:12px; font-family:ui-monospace, monospace; }
|
|
272
|
+
.trace-pair {
|
|
273
|
+
margin-top:9px; display:grid; grid-template-columns:minmax(0,1fr) minmax(0,1fr);
|
|
274
|
+
gap:8px;
|
|
275
|
+
}
|
|
276
|
+
.lane {
|
|
277
|
+
min-width:0; border:1px solid #2b2f2e; background:#151717; border-radius:8px;
|
|
278
|
+
padding:10px;
|
|
279
|
+
}
|
|
280
|
+
.lane.agent-lane { border-color:#33403f; }
|
|
281
|
+
.lane.debug-lane { border-color:#3a3430; background:#181713; }
|
|
282
|
+
.event.root .lane.debug-lane { border-color:#80612d; background:#211a11; }
|
|
283
|
+
.lane-head { display:flex; align-items:center; justify-content:space-between; gap:8px; }
|
|
284
|
+
.lane-label {
|
|
285
|
+
color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0;
|
|
286
|
+
}
|
|
287
|
+
.lane-title { margin-top:6px; color:#f1f2ee; font-size:13px; line-height:1.3; font-weight:720; }
|
|
288
|
+
.lane-copy { margin-top:7px; color:#d9ddd5; font-size:12px; line-height:1.45; overflow-wrap:anywhere; }
|
|
289
|
+
.lane-meta { margin-top:9px; display:flex; gap:6px; flex-wrap:wrap; }
|
|
290
|
+
.trace-link {
|
|
291
|
+
margin-top:8px; color:var(--muted); font-size:11px; line-height:1.4;
|
|
292
|
+
font-family:ui-monospace, SFMono-Regular, Consolas, monospace;
|
|
293
|
+
}
|
|
250
294
|
.event-grid { margin-top:8px; display:grid; grid-template-columns:1fr 1fr; gap:8px; }
|
|
251
295
|
.field { min-width:0; border:1px solid #2b2f2e; background:#151717; border-radius:8px; padding:9px; }
|
|
252
|
-
.field-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing
|
|
296
|
+
.field-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
|
|
253
297
|
.field-value { margin-top:5px; color:#d9ddd5; font-size:12px; line-height:1.4;
|
|
254
298
|
overflow-wrap:anywhere; }
|
|
255
299
|
.field.error { border-color:#66333a; background:#211619; }
|
|
@@ -265,7 +309,7 @@ _INDEX_HTML = """<!doctype html>
|
|
|
265
309
|
.root-card { border-left:4px solid var(--amber); }
|
|
266
310
|
.root-grid { display:grid; grid-template-columns:repeat(3,minmax(0,1fr)); gap:8px; margin-top:10px; }
|
|
267
311
|
.mini { border:1px solid #303434; border-radius:8px; padding:9px; background:#171919; min-width:0; }
|
|
268
|
-
.mini-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing
|
|
312
|
+
.mini-label { color:var(--muted2); font-size:10px; text-transform:uppercase; letter-spacing:0; }
|
|
269
313
|
.mini-value { margin-top:6px; font-size:13px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
|
270
314
|
.flow { display:grid; gap:8px; }
|
|
271
315
|
.flow-item {
|
|
@@ -284,6 +328,7 @@ _INDEX_HTML = """<!doctype html>
|
|
|
284
328
|
.sidebar { border-right:0; border-bottom:1px solid var(--line); }
|
|
285
329
|
.workspace { height:auto; }
|
|
286
330
|
.topbar { position:static; }
|
|
331
|
+
.trace-legend, .trace-pair { grid-template-columns:1fr; }
|
|
287
332
|
}
|
|
288
333
|
</style>
|
|
289
334
|
</head>
|
|
@@ -324,6 +369,7 @@ _INDEX_HTML = """<!doctype html>
|
|
|
324
369
|
</section>
|
|
325
370
|
</div>
|
|
326
371
|
<script>
|
|
372
|
+
const BOOTSTRAP = __BOOTSTRAP_JSON__;
|
|
327
373
|
async function api(path) {
|
|
328
374
|
const r = await fetch(path);
|
|
329
375
|
if (!r.ok) throw new Error('HTTP ' + r.status);
|
|
@@ -367,6 +413,20 @@ async function loadTraceList() {
|
|
|
367
413
|
document.getElementById('detail').innerHTML = '<div class="empty">No traces in store.</div>';
|
|
368
414
|
}
|
|
369
415
|
}
|
|
416
|
+
function renderTraceList(traceIds, selectedId) {
|
|
417
|
+
const ul = document.getElementById('trace-list');
|
|
418
|
+
ul.innerHTML = '';
|
|
419
|
+
document.getElementById('trace-count').textContent = traceIds.length + ' trace' + (traceIds.length === 1 ? '' : 's') + ' in local store';
|
|
420
|
+
traceIds.forEach((tid) => {
|
|
421
|
+
const li = document.createElement('li');
|
|
422
|
+
li.className = 'run' + (tid === selectedId ? ' active' : '');
|
|
423
|
+
li.innerHTML = '<div class="run-id">' + escapeHtml(tid) + '</div>' +
|
|
424
|
+
'<div class="run-meta"><span class="chip bad">failed</span><span class="chip">SQLite</span></div>';
|
|
425
|
+
li.dataset.tid = tid;
|
|
426
|
+
li.onclick = () => { selectTrace(tid, li); };
|
|
427
|
+
ul.appendChild(li);
|
|
428
|
+
});
|
|
429
|
+
}
|
|
370
430
|
async function selectTrace(tid, li) {
|
|
371
431
|
document.querySelectorAll('.run').forEach(el => el.classList.remove('active'));
|
|
372
432
|
li.classList.add('active');
|
|
@@ -403,8 +463,11 @@ function renderTrace(traj, report) {
|
|
|
403
463
|
html += '</div></div>';
|
|
404
464
|
|
|
405
465
|
html += '<div class="layout">';
|
|
406
|
-
html += '<div class="panel"><div class="panel-head"><div class="panel-title">
|
|
407
|
-
|
|
466
|
+
html += '<div class="panel"><div class="panel-head"><div class="panel-title">Agent Trace + Error Trace Alignment</div><span class="chip">native span -> diagnosis</span></div><div class="panel-body">';
|
|
467
|
+
html += '<div class="trace-legend"><div class="legend-cell"><div class="legend-label">Agent native trace</div><div class="legend-title">What the agent logged, thought, called, or observed.</div></div>';
|
|
468
|
+
html += '<div class="legend-cell"><div class="legend-label">AgentDebugX error trace</div><div class="legend-title">Normalized failure signal, attribution, and repair hint for human review.</div></div></div>';
|
|
469
|
+
html += '<div class="timeline">';
|
|
470
|
+
for (const ev of events) html += renderEvent(ev, ev.event_id === rootId, findingForEvent(findings, ev.event_id));
|
|
408
471
|
html += '</div></div></div>';
|
|
409
472
|
|
|
410
473
|
html += '<div class="rail">';
|
|
@@ -440,13 +503,61 @@ function mini(label, value) {
|
|
|
440
503
|
function flow(n, text) {
|
|
441
504
|
return '<div class="flow-item"><div class="flow-dot">' + n + '</div><div>' + escapeHtml(text) + '</div></div>';
|
|
442
505
|
}
|
|
443
|
-
function
|
|
506
|
+
function findingForEvent(findings, eventId) {
|
|
507
|
+
return (findings || []).find(f => f.event_id === eventId) || null;
|
|
508
|
+
}
|
|
509
|
+
function nativeTrace(ev) {
|
|
510
|
+
const meta = ev.metadata || {};
|
|
511
|
+
const native = meta.native_trace || {};
|
|
512
|
+
const fallback = truncate(ev.output || ev.input || ev.error || 'Recorded framework event.', 180);
|
|
513
|
+
const tags = Array.isArray(native.tags) ? native.tags : [ev.module || 'module', ev.event_type || 'event'];
|
|
514
|
+
return {
|
|
515
|
+
span: native.span_id || ev.event_id || '-',
|
|
516
|
+
title: native.title || ((ev.agent_name || 'agent') + ' / ' + (ev.event_type || 'event')),
|
|
517
|
+
body: native.message || fallback,
|
|
518
|
+
tags: tags,
|
|
519
|
+
state: native.state || native.tool || ''
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
function errorTrace(ev, finding) {
|
|
523
|
+
const meta = ev.metadata || {};
|
|
524
|
+
const overlay = meta.error_trace || {};
|
|
525
|
+
const mode = overlay.failure_mode || finding?.failure_mode?.mode_id || (eventProblem(ev) ? 'unclassified.signal' : 'context');
|
|
526
|
+
const title = overlay.title || finding?.failure_mode?.name || (eventProblem(ev) ? 'Failure signal detected' : 'Context event');
|
|
527
|
+
const body = overlay.human_readout || finding?.suggestion || (eventProblem(ev)
|
|
528
|
+
? 'AgentDebugX keeps this event in the failure trace because it contains an error, lost-context signal, or invalid state transition.'
|
|
529
|
+
: 'No local failure signal; shown to preserve the causal path for the reviewer.');
|
|
530
|
+
const severity = overlay.severity || (finding ? 'high' : (eventProblem(ev) ? 'medium' : 'context'));
|
|
531
|
+
const repair = overlay.repair || finding?.suggestion || '';
|
|
532
|
+
return {mode, title, body, severity, repair};
|
|
533
|
+
}
|
|
534
|
+
function severityClass(severity) {
|
|
535
|
+
if (severity === 'critical' || severity === 'high') return 'bad';
|
|
536
|
+
if (severity === 'medium') return 'warn';
|
|
537
|
+
if (severity === 'context') return '';
|
|
538
|
+
return 'cyan';
|
|
539
|
+
}
|
|
540
|
+
function renderEvent(ev, isRoot, finding) {
|
|
541
|
+
const native = nativeTrace(ev);
|
|
542
|
+
const debug = errorTrace(ev, finding);
|
|
444
543
|
let html = '<div class="event ' + (isRoot ? 'root' : '') + '">';
|
|
445
544
|
html += '<div class="step-index">' + escapeHtml(ev.step_index ?? '-') + '</div>';
|
|
446
545
|
html += '<div><div class="event-title"><div><div class="event-agent">' + escapeHtml(ev.agent_name || 'agent') + '</div>';
|
|
447
546
|
html += '<div class="event-type">' + escapeHtml(ev.event_type || '') + ' / ' + escapeHtml(ev.module || 'module') + '</div></div>';
|
|
448
547
|
html += isRoot ? '<span class="chip warn">root candidate</span>' : (eventProblem(ev) ? '<span class="chip bad">signal</span>' : '<span class="chip good">ok</span>');
|
|
449
|
-
html += '</div><div class="
|
|
548
|
+
html += '</div><div class="trace-pair">';
|
|
549
|
+
html += '<div class="lane agent-lane"><div class="lane-head"><div class="lane-label">Agent native trace</div><span class="chip">' + escapeHtml(native.span) + '</span></div>';
|
|
550
|
+
html += '<div class="lane-title">' + escapeHtml(native.title) + '</div>';
|
|
551
|
+
html += '<div class="lane-copy">' + escapeHtml(native.body) + '</div>';
|
|
552
|
+
html += '<div class="lane-meta">' + (native.tags || []).map(t => '<span class="chip">' + escapeHtml(t) + '</span>').join('') + '</div>';
|
|
553
|
+
if (native.state) html += '<div class="trace-link">' + escapeHtml(native.state) + '</div>';
|
|
554
|
+
html += '</div>';
|
|
555
|
+
html += '<div class="lane debug-lane"><div class="lane-head"><div class="lane-label">AgentDebugX error trace</div><span class="chip ' + severityClass(debug.severity) + '">' + escapeHtml(debug.severity) + '</span></div>';
|
|
556
|
+
html += '<div class="lane-title">' + escapeHtml(debug.title) + '</div>';
|
|
557
|
+
html += '<div class="lane-copy">' + escapeHtml(debug.body) + '</div>';
|
|
558
|
+
html += '<div class="lane-meta"><span class="chip ' + (finding ? familyClass(finding.failure_mode?.family) : '') + '">' + escapeHtml(debug.mode) + '</span></div>';
|
|
559
|
+
if (debug.repair) html += '<div class="trace-link">repair: ' + escapeHtml(debug.repair) + '</div>';
|
|
560
|
+
html += '</div></div><div class="event-grid">';
|
|
450
561
|
html += field('Input', truncate(ev.input, 132), false);
|
|
451
562
|
html += field('Output', truncate(ev.output, 132), false);
|
|
452
563
|
html += field('Error', truncate(ev.error, 132), Boolean(ev.error));
|
|
@@ -469,6 +580,15 @@ function renderFinding(f) {
|
|
|
469
580
|
html += '</div>';
|
|
470
581
|
return html;
|
|
471
582
|
}
|
|
583
|
+
if (BOOTSTRAP && BOOTSTRAP.traces) {
|
|
584
|
+
const selected = BOOTSTRAP.selected ? BOOTSTRAP.selected.trajectory.trace_id : null;
|
|
585
|
+
renderTraceList(BOOTSTRAP.traces, selected);
|
|
586
|
+
if (BOOTSTRAP.selected) {
|
|
587
|
+
renderTrace(BOOTSTRAP.selected.trajectory, BOOTSTRAP.selected.report);
|
|
588
|
+
} else {
|
|
589
|
+
document.getElementById('detail').innerHTML = '<div class="empty">No traces in store.</div>';
|
|
590
|
+
}
|
|
591
|
+
}
|
|
472
592
|
loadTraceList();
|
|
473
593
|
</script>
|
|
474
594
|
</body>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|