agentdebugx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentdebug/__init__.py +65 -0
- agentdebug/adapters/__init__.py +10 -0
- agentdebug/adapters/base.py +22 -0
- agentdebug/adapters/langgraph.py +261 -0
- agentdebug/adapters/otel.py +151 -0
- agentdebug/adapters/raw.py +134 -0
- agentdebug/analyzers.py +152 -0
- agentdebug/attribution.py +230 -0
- agentdebug/cli.py +272 -0
- agentdebug/events.py +114 -0
- agentdebug/instrumentation.py +57 -0
- agentdebug/judges.py +258 -0
- agentdebug/llm.py +165 -0
- agentdebug/models.py +169 -0
- agentdebug/recorder.py +183 -0
- agentdebug/recovery.py +113 -0
- agentdebug/storage.py +167 -0
- agentdebug/taxonomy.py +271 -0
- agentdebug/ui/__init__.py +14 -0
- agentdebug/ui/server.py +260 -0
- agentdebugx-0.1.0.dist-info/METADATA +217 -0
- agentdebugx-0.1.0.dist-info/RECORD +25 -0
- agentdebugx-0.1.0.dist-info/WHEEL +4 -0
- agentdebugx-0.1.0.dist-info/entry_points.txt +3 -0
- agentdebugx-0.1.0.dist-info/licenses/LICENSE +21 -0
agentdebug/models.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Core data models for portable agent debugging traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Dict, List, Optional, cast
|
|
8
|
+
from uuid import uuid4
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
JsonDict = Dict[str, Any]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def utc_now() -> datetime:
|
|
16
|
+
"""Return a timezone-aware UTC timestamp."""
|
|
17
|
+
return datetime.now(timezone.utc)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def new_id(prefix: str) -> str:
|
|
21
|
+
"""Create a short stable identifier for trace objects."""
|
|
22
|
+
return f'{prefix}_{uuid4().hex}'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class EventType(str, Enum):
|
|
26
|
+
RUN_START = 'run.start'
|
|
27
|
+
RUN_END = 'run.end'
|
|
28
|
+
AGENT_STEP = 'agent.step'
|
|
29
|
+
LLM_CALL = 'llm.call'
|
|
30
|
+
LLM_RESPONSE = 'llm.response'
|
|
31
|
+
TOOL_CALL = 'tool.call'
|
|
32
|
+
TOOL_RESULT = 'tool.result'
|
|
33
|
+
MEMORY_READ = 'memory.read'
|
|
34
|
+
MEMORY_WRITE = 'memory.write'
|
|
35
|
+
REFLECTION = 'reflection'
|
|
36
|
+
PLAN = 'plan'
|
|
37
|
+
HANDOFF = 'handoff'
|
|
38
|
+
GUARDRAIL = 'guardrail'
|
|
39
|
+
OBSERVATION = 'observation'
|
|
40
|
+
ERROR = 'error'
|
|
41
|
+
HUMAN_FEEDBACK = 'human.feedback'
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Modality(str, Enum):
|
|
45
|
+
TEXT = 'text'
|
|
46
|
+
IMAGE = 'image'
|
|
47
|
+
AUDIO = 'audio'
|
|
48
|
+
VIDEO = 'video'
|
|
49
|
+
UI = 'ui'
|
|
50
|
+
FILE = 'file'
|
|
51
|
+
TOOL = 'tool'
|
|
52
|
+
STATE = 'state'
|
|
53
|
+
OTHER = 'other'
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Artifact(BaseModel):
|
|
57
|
+
"""A payload linked to an event, including multimodal evidence."""
|
|
58
|
+
|
|
59
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
60
|
+
|
|
61
|
+
uri: str
|
|
62
|
+
modality: Modality = Modality.OTHER
|
|
63
|
+
media_type: Optional[str] = None
|
|
64
|
+
description: Optional[str] = None
|
|
65
|
+
metadata: JsonDict = Field(default_factory=dict)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class AgentEvent(BaseModel):
|
|
69
|
+
"""A normalized event emitted by an agentic system."""
|
|
70
|
+
|
|
71
|
+
model_config = ConfigDict(use_enum_values=True)
|
|
72
|
+
|
|
73
|
+
event_id: str = Field(default_factory=lambda: new_id('evt'))
|
|
74
|
+
trace_id: str
|
|
75
|
+
parent_event_id: Optional[str] = None
|
|
76
|
+
agent_name: str = 'agent'
|
|
77
|
+
event_type: EventType = EventType.AGENT_STEP
|
|
78
|
+
module: Optional[str] = None
|
|
79
|
+
step_index: Optional[int] = None
|
|
80
|
+
timestamp: datetime = Field(default_factory=utc_now)
|
|
81
|
+
input: Any = None
|
|
82
|
+
output: Any = None
|
|
83
|
+
error: Optional[str] = None
|
|
84
|
+
duration_ms: Optional[float] = None
|
|
85
|
+
metadata: JsonDict = Field(default_factory=dict)
|
|
86
|
+
artifacts: List[Artifact] = Field(default_factory=list)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class AgentTrajectory(BaseModel):
|
|
90
|
+
"""Portable trajectory IR for single-agent and multi-agent runs."""
|
|
91
|
+
|
|
92
|
+
trace_id: str = Field(default_factory=lambda: new_id('trace'))
|
|
93
|
+
task_id: Optional[str] = None
|
|
94
|
+
goal: Optional[str] = None
|
|
95
|
+
framework: Optional[str] = None
|
|
96
|
+
started_at: datetime = Field(default_factory=utc_now)
|
|
97
|
+
ended_at: Optional[datetime] = None
|
|
98
|
+
metadata: JsonDict = Field(default_factory=dict)
|
|
99
|
+
events: List[AgentEvent] = Field(default_factory=list)
|
|
100
|
+
|
|
101
|
+
def add_event(self, event: AgentEvent) -> AgentEvent:
|
|
102
|
+
self.events.append(event)
|
|
103
|
+
return event
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class FailureMode(BaseModel):
|
|
107
|
+
"""A seed or generated taxonomy node."""
|
|
108
|
+
|
|
109
|
+
mode_id: str
|
|
110
|
+
name: str
|
|
111
|
+
family: str
|
|
112
|
+
description: str
|
|
113
|
+
signals: List[str] = Field(default_factory=list)
|
|
114
|
+
suggestion_templates: List[str] = Field(default_factory=list)
|
|
115
|
+
source: Optional[str] = None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class FailureFinding(BaseModel):
|
|
119
|
+
"""A localized failure diagnosis for one event or trajectory region."""
|
|
120
|
+
|
|
121
|
+
finding_id: str = Field(default_factory=lambda: new_id('finding'))
|
|
122
|
+
failure_mode: FailureMode
|
|
123
|
+
event_id: Optional[str] = None
|
|
124
|
+
agent_name: Optional[str] = None
|
|
125
|
+
step_index: Optional[int] = None
|
|
126
|
+
confidence: float = 0.0
|
|
127
|
+
evidence: List[str] = Field(default_factory=list)
|
|
128
|
+
suggestion: Optional[str] = None
|
|
129
|
+
metadata: JsonDict = Field(default_factory=dict)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class DiagnosticReport(BaseModel):
|
|
133
|
+
"""Structured output from an analyzer."""
|
|
134
|
+
|
|
135
|
+
report_id: str = Field(default_factory=lambda: new_id('report'))
|
|
136
|
+
trace_id: str
|
|
137
|
+
task_id: Optional[str] = None
|
|
138
|
+
generated_at: datetime = Field(default_factory=utc_now)
|
|
139
|
+
root_cause_event_id: Optional[str] = None
|
|
140
|
+
root_cause_agent: Optional[str] = None
|
|
141
|
+
root_cause_step_index: Optional[int] = None
|
|
142
|
+
findings: List[FailureFinding] = Field(default_factory=list)
|
|
143
|
+
summary: str = 'No failure was detected.'
|
|
144
|
+
suggestions: List[str] = Field(default_factory=list)
|
|
145
|
+
metadata: JsonDict = Field(default_factory=dict)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def model_to_json(model: BaseModel, indent: Optional[int] = None) -> str:
|
|
149
|
+
"""Serialize a Pydantic model across v1 and v2 runtimes."""
|
|
150
|
+
dumper = getattr(model, 'model_dump_json', None)
|
|
151
|
+
if callable(dumper):
|
|
152
|
+
return str(dumper(indent=indent))
|
|
153
|
+
return str(model.json(indent=indent))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def trajectory_from_json(payload: str) -> AgentTrajectory:
|
|
157
|
+
"""Parse an AgentTrajectory across Pydantic v1 and v2 runtimes."""
|
|
158
|
+
validator = getattr(AgentTrajectory, 'model_validate_json', None)
|
|
159
|
+
if callable(validator):
|
|
160
|
+
return cast(AgentTrajectory, validator(payload))
|
|
161
|
+
return AgentTrajectory.parse_raw(payload)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def report_from_json(payload: str) -> DiagnosticReport:
|
|
165
|
+
"""Parse a DiagnosticReport across Pydantic v1 and v2 runtimes."""
|
|
166
|
+
validator = getattr(DiagnosticReport, 'model_validate_json', None)
|
|
167
|
+
if callable(validator):
|
|
168
|
+
return cast(DiagnosticReport, validator(payload))
|
|
169
|
+
return DiagnosticReport.parse_raw(payload)
|
agentdebug/recorder.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""High-level recording API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from types import TracebackType
|
|
6
|
+
from typing import Any, Literal, Optional, Type, Union
|
|
7
|
+
|
|
8
|
+
from agentdebug.analyzers import HeuristicAnalyzer
|
|
9
|
+
from agentdebug.models import AgentEvent, AgentTrajectory, DiagnosticReport, EventType, utc_now
|
|
10
|
+
from agentdebug.storage import JsonlTraceStore, TraceStore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AgentDebug:
|
|
14
|
+
"""Main entry point for embedding AgentDebugX in an agent system."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
store: Optional[TraceStore] = None,
|
|
19
|
+
analyzer: Optional[HeuristicAnalyzer] = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
self.store = store or JsonlTraceStore()
|
|
22
|
+
self.analyzer = analyzer or HeuristicAnalyzer()
|
|
23
|
+
|
|
24
|
+
def start_trace(
|
|
25
|
+
self,
|
|
26
|
+
goal: Optional[str] = None,
|
|
27
|
+
task_id: Optional[str] = None,
|
|
28
|
+
framework: Optional[str] = None,
|
|
29
|
+
trace_id: Optional[str] = None,
|
|
30
|
+
**metadata: Any,
|
|
31
|
+
) -> AgentTrajectory:
|
|
32
|
+
trajectory = AgentTrajectory(
|
|
33
|
+
goal=goal,
|
|
34
|
+
task_id=task_id,
|
|
35
|
+
framework=framework,
|
|
36
|
+
metadata=metadata,
|
|
37
|
+
)
|
|
38
|
+
if trace_id is not None:
|
|
39
|
+
trajectory.trace_id = trace_id
|
|
40
|
+
self.record_event(
|
|
41
|
+
trajectory,
|
|
42
|
+
event_type=EventType.RUN_START,
|
|
43
|
+
agent_name='system',
|
|
44
|
+
output={'goal': goal, 'framework': framework},
|
|
45
|
+
)
|
|
46
|
+
return trajectory
|
|
47
|
+
|
|
48
|
+
def record_event(
|
|
49
|
+
self,
|
|
50
|
+
trajectory: AgentTrajectory,
|
|
51
|
+
event_type: Union[EventType, str] = EventType.AGENT_STEP,
|
|
52
|
+
agent_name: str = 'agent',
|
|
53
|
+
module: Optional[str] = None,
|
|
54
|
+
step_index: Optional[int] = None,
|
|
55
|
+
parent_event_id: Optional[str] = None,
|
|
56
|
+
input: Any = None,
|
|
57
|
+
output: Any = None,
|
|
58
|
+
error: Optional[str] = None,
|
|
59
|
+
duration_ms: Optional[float] = None,
|
|
60
|
+
**metadata: Any,
|
|
61
|
+
) -> AgentEvent:
|
|
62
|
+
if isinstance(event_type, str):
|
|
63
|
+
event_type = EventType(event_type)
|
|
64
|
+
event = AgentEvent(
|
|
65
|
+
trace_id=trajectory.trace_id,
|
|
66
|
+
parent_event_id=parent_event_id,
|
|
67
|
+
agent_name=agent_name,
|
|
68
|
+
event_type=event_type,
|
|
69
|
+
module=module,
|
|
70
|
+
step_index=step_index,
|
|
71
|
+
input=input,
|
|
72
|
+
output=output,
|
|
73
|
+
error=error,
|
|
74
|
+
duration_ms=duration_ms,
|
|
75
|
+
metadata=metadata,
|
|
76
|
+
)
|
|
77
|
+
return trajectory.add_event(event)
|
|
78
|
+
|
|
79
|
+
def finish_trace(
|
|
80
|
+
self,
|
|
81
|
+
trajectory: AgentTrajectory,
|
|
82
|
+
success: bool,
|
|
83
|
+
output: Any = None,
|
|
84
|
+
error: Optional[str] = None,
|
|
85
|
+
**metadata: Any,
|
|
86
|
+
) -> AgentTrajectory:
|
|
87
|
+
trajectory.ended_at = utc_now()
|
|
88
|
+
self.record_event(
|
|
89
|
+
trajectory,
|
|
90
|
+
event_type=EventType.RUN_END if success else EventType.ERROR,
|
|
91
|
+
agent_name='system',
|
|
92
|
+
output=output,
|
|
93
|
+
error=error,
|
|
94
|
+
success=success,
|
|
95
|
+
**metadata,
|
|
96
|
+
)
|
|
97
|
+
self.store.save_trajectory(trajectory)
|
|
98
|
+
return trajectory
|
|
99
|
+
|
|
100
|
+
def analyze(self, trajectory: AgentTrajectory) -> DiagnosticReport:
|
|
101
|
+
report = self.analyzer.analyze(trajectory)
|
|
102
|
+
save_report = getattr(self.store, 'save_report', None)
|
|
103
|
+
if callable(save_report):
|
|
104
|
+
save_report(report)
|
|
105
|
+
return report
|
|
106
|
+
|
|
107
|
+
def analyze_trace(self, trace_id: str) -> DiagnosticReport:
|
|
108
|
+
trajectory = self.store.load_trajectory(trace_id)
|
|
109
|
+
if trajectory is None:
|
|
110
|
+
raise KeyError(f'Unknown trace_id: {trace_id}')
|
|
111
|
+
return self.analyze(trajectory)
|
|
112
|
+
|
|
113
|
+
def trace(
|
|
114
|
+
self,
|
|
115
|
+
goal: Optional[str] = None,
|
|
116
|
+
task_id: Optional[str] = None,
|
|
117
|
+
framework: Optional[str] = None,
|
|
118
|
+
**metadata: Any,
|
|
119
|
+
) -> 'TraceSession':
|
|
120
|
+
trajectory = self.start_trace(
|
|
121
|
+
goal=goal,
|
|
122
|
+
task_id=task_id,
|
|
123
|
+
framework=framework,
|
|
124
|
+
**metadata,
|
|
125
|
+
)
|
|
126
|
+
return TraceSession(debugger=self, trajectory=trajectory)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TraceSession:
|
|
130
|
+
"""Context manager around a trajectory."""
|
|
131
|
+
|
|
132
|
+
def __init__(self, debugger: AgentDebug, trajectory: AgentTrajectory) -> None:
|
|
133
|
+
self.debugger = debugger
|
|
134
|
+
self.trajectory = trajectory
|
|
135
|
+
|
|
136
|
+
def __enter__(self) -> 'TraceSession':
|
|
137
|
+
return self
|
|
138
|
+
|
|
139
|
+
def __exit__(
|
|
140
|
+
self,
|
|
141
|
+
exc_type: Optional[Type[BaseException]],
|
|
142
|
+
exc_value: Optional[BaseException],
|
|
143
|
+
traceback: Optional[TracebackType],
|
|
144
|
+
) -> Literal[False]:
|
|
145
|
+
if exc_value is None:
|
|
146
|
+
self.debugger.finish_trace(self.trajectory, success=True)
|
|
147
|
+
return False
|
|
148
|
+
self.debugger.finish_trace(
|
|
149
|
+
self.trajectory,
|
|
150
|
+
success=False,
|
|
151
|
+
error=f'{exc_type.__name__ if exc_type else "Error"}: {exc_value}',
|
|
152
|
+
)
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def record(
|
|
156
|
+
self,
|
|
157
|
+
event_type: Union[EventType, str] = EventType.AGENT_STEP,
|
|
158
|
+
agent_name: str = 'agent',
|
|
159
|
+
module: Optional[str] = None,
|
|
160
|
+
step_index: Optional[int] = None,
|
|
161
|
+
parent_event_id: Optional[str] = None,
|
|
162
|
+
input: Any = None,
|
|
163
|
+
output: Any = None,
|
|
164
|
+
error: Optional[str] = None,
|
|
165
|
+
duration_ms: Optional[float] = None,
|
|
166
|
+
**metadata: Any,
|
|
167
|
+
) -> AgentEvent:
|
|
168
|
+
return self.debugger.record_event(
|
|
169
|
+
self.trajectory,
|
|
170
|
+
event_type=event_type,
|
|
171
|
+
agent_name=agent_name,
|
|
172
|
+
module=module,
|
|
173
|
+
step_index=step_index,
|
|
174
|
+
parent_event_id=parent_event_id,
|
|
175
|
+
input=input,
|
|
176
|
+
output=output,
|
|
177
|
+
error=error,
|
|
178
|
+
duration_ms=duration_ms,
|
|
179
|
+
**metadata,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def analyze(self) -> DiagnosticReport:
|
|
183
|
+
return self.debugger.analyze(self.trajectory)
|
agentdebug/recovery.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Lightweight recovery suggestions.
|
|
2
|
+
|
|
3
|
+
v0.1 ships ``ReflexionSuggestion`` — a *suggest-only* recovery generator that
|
|
4
|
+
produces a structured retry-prompt artifact based on Reflexion (Shinn et al.,
|
|
5
|
+
NeurIPS 2023, arXiv:2303.11366). Heavier strategies (Self-Refine loop, CRITIC,
|
|
6
|
+
Saga rollback, MCTS) are deferred per the roadmap and will land behind the same
|
|
7
|
+
:class:`Recoverer` protocol.
|
|
8
|
+
|
|
9
|
+
By design, **nothing here re-executes the agent** — recovery proposals are
|
|
10
|
+
artifacts to be surfaced (CLI/UI/PR comment) or fed back into the next run.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import List, Optional, Protocol
|
|
17
|
+
|
|
18
|
+
from agentdebug.models import (
|
|
19
|
+
AgentTrajectory,
|
|
20
|
+
DiagnosticReport,
|
|
21
|
+
FailureFinding,
|
|
22
|
+
new_id,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class FixProposal:
|
|
28
|
+
proposal_id: str
|
|
29
|
+
recoverer_id: str
|
|
30
|
+
target_event_id: Optional[str]
|
|
31
|
+
summary: str
|
|
32
|
+
rationale: str
|
|
33
|
+
confidence: float
|
|
34
|
+
suggestion_text: str
|
|
35
|
+
side_effects: List[str] = field(default_factory=list)
|
|
36
|
+
requires_human_approval: bool = False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Recoverer(Protocol):
|
|
40
|
+
id: str
|
|
41
|
+
|
|
42
|
+
def suggest(
|
|
43
|
+
self,
|
|
44
|
+
trajectory: AgentTrajectory,
|
|
45
|
+
report: DiagnosticReport,
|
|
46
|
+
) -> List[FixProposal]:
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ReflexionSuggestion:
|
|
51
|
+
"""Emit a Reflexion-style retry reflection per finding.
|
|
52
|
+
|
|
53
|
+
The output is purely textual — it can be appended to the agent's next
|
|
54
|
+
system prompt, written to a project ``MANUAL.md``, or surfaced in the
|
|
55
|
+
Console. There is no auto-apply.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
id = 'reflexion'
|
|
59
|
+
|
|
60
|
+
def suggest(
|
|
61
|
+
self,
|
|
62
|
+
trajectory: AgentTrajectory,
|
|
63
|
+
report: DiagnosticReport,
|
|
64
|
+
) -> List[FixProposal]:
|
|
65
|
+
if not report.findings:
|
|
66
|
+
return []
|
|
67
|
+
proposals: List[FixProposal] = []
|
|
68
|
+
for finding in report.findings:
|
|
69
|
+
proposals.append(self._build_proposal(trajectory, finding))
|
|
70
|
+
return proposals
|
|
71
|
+
|
|
72
|
+
def _build_proposal(
|
|
73
|
+
self, trajectory: AgentTrajectory, finding: FailureFinding
|
|
74
|
+
) -> FixProposal:
|
|
75
|
+
goal = trajectory.goal or '(no goal recorded)'
|
|
76
|
+
framework = trajectory.framework or '(framework not declared)'
|
|
77
|
+
evidence_block = '\n'.join(f' - {e}' for e in finding.evidence) or ' (none)'
|
|
78
|
+
suggestion_template = (
|
|
79
|
+
finding.suggestion
|
|
80
|
+
or (finding.failure_mode.suggestion_templates[0]
|
|
81
|
+
if finding.failure_mode.suggestion_templates
|
|
82
|
+
else 'Inspect the offending step and constrain the agent at that point.')
|
|
83
|
+
)
|
|
84
|
+
reflection = (
|
|
85
|
+
f'Task: {goal}\n'
|
|
86
|
+
f'Framework: {framework}\n'
|
|
87
|
+
f'Observed failure mode: {finding.failure_mode.mode_id} '
|
|
88
|
+
f'({finding.failure_mode.name})\n'
|
|
89
|
+
f'Located at agent={finding.agent_name}, step={finding.step_index}, '
|
|
90
|
+
f'event_id={finding.event_id}\n'
|
|
91
|
+
f'Evidence:\n{evidence_block}\n'
|
|
92
|
+
f'Next time, do the following:\n {suggestion_template}\n'
|
|
93
|
+
)
|
|
94
|
+
return FixProposal(
|
|
95
|
+
proposal_id=new_id('fix'),
|
|
96
|
+
recoverer_id=self.id,
|
|
97
|
+
target_event_id=finding.event_id,
|
|
98
|
+
summary=(
|
|
99
|
+
f'Reflexion retry hint for {finding.failure_mode.mode_id} '
|
|
100
|
+
f'at step {finding.step_index}'
|
|
101
|
+
),
|
|
102
|
+
rationale=(
|
|
103
|
+
'Reflexion (Shinn et al., NeurIPS 2023) converts a failure '
|
|
104
|
+
'into a verbal hint appended to next attempt.'
|
|
105
|
+
),
|
|
106
|
+
confidence=min(0.9, max(0.1, finding.confidence)),
|
|
107
|
+
suggestion_text=reflection,
|
|
108
|
+
side_effects=['memory.write'],
|
|
109
|
+
requires_human_approval=False,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
__all__ = ['Recoverer', 'FixProposal', 'ReflexionSuggestion']
|
agentdebug/storage.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Persistence backends for traces and diagnostic reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sqlite3
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional, Protocol
|
|
8
|
+
|
|
9
|
+
from agentdebug.models import (
|
|
10
|
+
AgentTrajectory,
|
|
11
|
+
DiagnosticReport,
|
|
12
|
+
model_to_json,
|
|
13
|
+
report_from_json,
|
|
14
|
+
trajectory_from_json,
|
|
15
|
+
utc_now,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TraceStore(Protocol):
|
|
20
|
+
def save_trajectory(self, trajectory: AgentTrajectory) -> None:
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
def load_trajectory(self, trace_id: str) -> Optional[AgentTrajectory]:
|
|
24
|
+
...
|
|
25
|
+
|
|
26
|
+
def list_traces(self) -> List[str]:
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class JsonlTraceStore:
|
|
31
|
+
"""Append-only local store for quick adoption and reproducible examples."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, path: str = '.agentdebug/traces.jsonl') -> None:
|
|
34
|
+
self.path = Path(path)
|
|
35
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
def save_trajectory(self, trajectory: AgentTrajectory) -> None:
|
|
38
|
+
with self.path.open('a', encoding='utf-8') as handle:
|
|
39
|
+
handle.write(model_to_json(trajectory))
|
|
40
|
+
handle.write('\n')
|
|
41
|
+
|
|
42
|
+
def load_trajectory(self, trace_id: str) -> Optional[AgentTrajectory]:
|
|
43
|
+
if not self.path.exists():
|
|
44
|
+
return None
|
|
45
|
+
match = None
|
|
46
|
+
with self.path.open('r', encoding='utf-8') as handle:
|
|
47
|
+
for line in handle:
|
|
48
|
+
if not line.strip():
|
|
49
|
+
continue
|
|
50
|
+
candidate = trajectory_from_json(line)
|
|
51
|
+
if candidate.trace_id == trace_id:
|
|
52
|
+
match = candidate
|
|
53
|
+
return match
|
|
54
|
+
|
|
55
|
+
def list_traces(self) -> List[str]:
|
|
56
|
+
if not self.path.exists():
|
|
57
|
+
return []
|
|
58
|
+
trace_ids = []
|
|
59
|
+
with self.path.open('r', encoding='utf-8') as handle:
|
|
60
|
+
for line in handle:
|
|
61
|
+
if not line.strip():
|
|
62
|
+
continue
|
|
63
|
+
trace_ids.append(trajectory_from_json(line).trace_id)
|
|
64
|
+
return trace_ids
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class SQLiteTraceStore:
|
|
68
|
+
"""Small embedded error database for local development and CI artifacts."""
|
|
69
|
+
|
|
70
|
+
def __init__(self, path: str = '.agentdebug/agentdebug.sqlite') -> None:
|
|
71
|
+
self.path = Path(path)
|
|
72
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
73
|
+
self._init_db()
|
|
74
|
+
|
|
75
|
+
def save_trajectory(self, trajectory: AgentTrajectory) -> None:
|
|
76
|
+
payload = model_to_json(trajectory)
|
|
77
|
+
with self._connect() as conn:
|
|
78
|
+
conn.execute(
|
|
79
|
+
"""
|
|
80
|
+
INSERT INTO trajectories(trace_id, task_id, framework, updated_at, payload_json)
|
|
81
|
+
VALUES (?, ?, ?, ?, ?)
|
|
82
|
+
ON CONFLICT(trace_id) DO UPDATE SET
|
|
83
|
+
task_id=excluded.task_id,
|
|
84
|
+
framework=excluded.framework,
|
|
85
|
+
updated_at=excluded.updated_at,
|
|
86
|
+
payload_json=excluded.payload_json
|
|
87
|
+
""",
|
|
88
|
+
(
|
|
89
|
+
trajectory.trace_id,
|
|
90
|
+
trajectory.task_id,
|
|
91
|
+
trajectory.framework,
|
|
92
|
+
utc_now().isoformat(),
|
|
93
|
+
payload,
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def load_trajectory(self, trace_id: str) -> Optional[AgentTrajectory]:
|
|
98
|
+
with self._connect() as conn:
|
|
99
|
+
row = conn.execute(
|
|
100
|
+
'SELECT payload_json FROM trajectories WHERE trace_id = ?',
|
|
101
|
+
(trace_id,),
|
|
102
|
+
).fetchone()
|
|
103
|
+
if row is None:
|
|
104
|
+
return None
|
|
105
|
+
return trajectory_from_json(str(row[0]))
|
|
106
|
+
|
|
107
|
+
def list_traces(self) -> List[str]:
|
|
108
|
+
with self._connect() as conn:
|
|
109
|
+
rows = conn.execute(
|
|
110
|
+
'SELECT trace_id FROM trajectories ORDER BY updated_at DESC'
|
|
111
|
+
).fetchall()
|
|
112
|
+
return [str(row[0]) for row in rows]
|
|
113
|
+
|
|
114
|
+
def save_report(self, report: DiagnosticReport) -> None:
|
|
115
|
+
with self._connect() as conn:
|
|
116
|
+
conn.execute(
|
|
117
|
+
"""
|
|
118
|
+
INSERT OR REPLACE INTO diagnostic_reports(
|
|
119
|
+
report_id, trace_id, generated_at, payload_json
|
|
120
|
+
)
|
|
121
|
+
VALUES (?, ?, ?, ?)
|
|
122
|
+
""",
|
|
123
|
+
(
|
|
124
|
+
report.report_id,
|
|
125
|
+
report.trace_id,
|
|
126
|
+
report.generated_at.isoformat(),
|
|
127
|
+
model_to_json(report),
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def list_reports(self, trace_id: Optional[str] = None) -> List[DiagnosticReport]:
|
|
132
|
+
query = 'SELECT payload_json FROM diagnostic_reports'
|
|
133
|
+
params: tuple[str, ...] = ()
|
|
134
|
+
if trace_id is not None:
|
|
135
|
+
query += ' WHERE trace_id = ?'
|
|
136
|
+
params = (trace_id,)
|
|
137
|
+
query += ' ORDER BY generated_at DESC'
|
|
138
|
+
with self._connect() as conn:
|
|
139
|
+
rows = conn.execute(query, params).fetchall()
|
|
140
|
+
return [report_from_json(str(row[0])) for row in rows]
|
|
141
|
+
|
|
142
|
+
def _connect(self) -> sqlite3.Connection:
|
|
143
|
+
return sqlite3.connect(self.path)
|
|
144
|
+
|
|
145
|
+
def _init_db(self) -> None:
|
|
146
|
+
with self._connect() as conn:
|
|
147
|
+
conn.execute(
|
|
148
|
+
"""
|
|
149
|
+
CREATE TABLE IF NOT EXISTS trajectories (
|
|
150
|
+
trace_id TEXT PRIMARY KEY,
|
|
151
|
+
task_id TEXT,
|
|
152
|
+
framework TEXT,
|
|
153
|
+
updated_at TEXT NOT NULL,
|
|
154
|
+
payload_json TEXT NOT NULL
|
|
155
|
+
)
|
|
156
|
+
"""
|
|
157
|
+
)
|
|
158
|
+
conn.execute(
|
|
159
|
+
"""
|
|
160
|
+
CREATE TABLE IF NOT EXISTS diagnostic_reports (
|
|
161
|
+
report_id TEXT PRIMARY KEY,
|
|
162
|
+
trace_id TEXT NOT NULL,
|
|
163
|
+
generated_at TEXT NOT NULL,
|
|
164
|
+
payload_json TEXT NOT NULL
|
|
165
|
+
)
|
|
166
|
+
"""
|
|
167
|
+
)
|