fableforge-agent-profiler 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_profiler/__init__.py +13 -0
- agent_profiler/classifier.py +266 -0
- agent_profiler/cli.py +180 -0
- agent_profiler/profiler.py +308 -0
- agent_profiler/visualizer.py +224 -0
- fableforge_agent_profiler-0.1.0.dist-info/METADATA +123 -0
- fableforge_agent_profiler-0.1.0.dist-info/RECORD +11 -0
- fableforge_agent_profiler-0.1.0.dist-info/WHEEL +5 -0
- fableforge_agent_profiler-0.1.0.dist-info/entry_points.txt +2 -0
- fableforge_agent_profiler-0.1.0.dist-info/licenses/LICENSE +21 -0
- fableforge_agent_profiler-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Agent Profiler - Profile and classify agent behavior patterns."""
|
|
2
|
+
|
|
3
|
+
from agent_profiler.profiler import AgentProfiler, ProfileResult
|
|
4
|
+
from agent_profiler.classifier import BehaviorClassifier
|
|
5
|
+
from agent_profiler.visualizer import ProfileVisualizer
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AgentProfiler",
|
|
9
|
+
"ProfileResult",
|
|
10
|
+
"BehaviorClassifier",
|
|
11
|
+
"ProfileVisualizer",
|
|
12
|
+
]
|
|
13
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Behavior classification using pretrained profiles and transition matrices."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class BehaviorProfile:
|
|
13
|
+
"""A predefined behavior profile template."""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
description: str
|
|
17
|
+
edit_weight: float = 0.0
|
|
18
|
+
read_weight: float = 0.0
|
|
19
|
+
grep_weight: float = 0.0
|
|
20
|
+
bash_weight: float = 0.0
|
|
21
|
+
write_weight: float = 0.0
|
|
22
|
+
error_rate_weight: float = 0.0
|
|
23
|
+
error_recovery_weight: float = 0.0
|
|
24
|
+
circular_weight: float = 0.0
|
|
25
|
+
entropy_weight: float = 0.0
|
|
26
|
+
min_turns: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DebuggingProfile(BehaviorProfile):
|
|
30
|
+
"""Profile for debugging sessions: high edit+bash, many errors, recoveries."""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
super().__init__(
|
|
34
|
+
name="debugging",
|
|
35
|
+
description="Active debugging with edits and error recovery loops",
|
|
36
|
+
edit_weight=0.25,
|
|
37
|
+
read_weight=0.15,
|
|
38
|
+
grep_weight=0.10,
|
|
39
|
+
bash_weight=0.30,
|
|
40
|
+
write_weight=0.05,
|
|
41
|
+
error_rate_weight=0.40,
|
|
42
|
+
error_recovery_weight=0.35,
|
|
43
|
+
circular_weight=0.20,
|
|
44
|
+
entropy_weight=0.15,
|
|
45
|
+
min_turns=5,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BuildingProfile(BehaviorProfile):
|
|
50
|
+
"""Profile for productive building: high write+bash, low read."""
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
super().__init__(
|
|
54
|
+
name="building",
|
|
55
|
+
description="Active feature development with writes and executions",
|
|
56
|
+
edit_weight=0.15,
|
|
57
|
+
read_weight=0.10,
|
|
58
|
+
grep_weight=0.05,
|
|
59
|
+
bash_weight=0.25,
|
|
60
|
+
write_weight=0.30,
|
|
61
|
+
error_rate_weight=-0.10,
|
|
62
|
+
error_recovery_weight=0.10,
|
|
63
|
+
circular_weight=0.05,
|
|
64
|
+
entropy_weight=0.20,
|
|
65
|
+
min_turns=3,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ExploringProfile(BehaviorProfile):
|
|
70
|
+
"""Profile for exploration: high read+grep, low edit."""
|
|
71
|
+
|
|
72
|
+
def __init__(self) -> None:
|
|
73
|
+
super().__init__(
|
|
74
|
+
name="exploring",
|
|
75
|
+
description="Code exploration with reads and searches, minimal edits",
|
|
76
|
+
edit_weight=0.05,
|
|
77
|
+
read_weight=0.35,
|
|
78
|
+
grep_weight=0.30,
|
|
79
|
+
bash_weight=0.10,
|
|
80
|
+
write_weight=0.02,
|
|
81
|
+
error_rate_weight=-0.20,
|
|
82
|
+
error_recovery_weight=0.05,
|
|
83
|
+
circular_weight=0.10,
|
|
84
|
+
entropy_weight=0.25,
|
|
85
|
+
min_turns=3,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class LostProfile(BehaviorProfile):
|
|
90
|
+
"""Profile for lost/confused sessions: circular transitions, high read."""
|
|
91
|
+
|
|
92
|
+
def __init__(self) -> None:
|
|
93
|
+
super().__init__(
|
|
94
|
+
name="lost",
|
|
95
|
+
description="Confused or circular behavior, reading without progress",
|
|
96
|
+
edit_weight=0.05,
|
|
97
|
+
read_weight=0.35,
|
|
98
|
+
grep_weight=0.15,
|
|
99
|
+
bash_weight=0.05,
|
|
100
|
+
write_weight=0.02,
|
|
101
|
+
error_rate_weight=0.10,
|
|
102
|
+
error_recovery_weight=-0.10,
|
|
103
|
+
circular_weight=0.40,
|
|
104
|
+
entropy_weight=0.10,
|
|
105
|
+
min_turns=5,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class VerifyingProfile(BehaviorProfile):
|
|
110
|
+
"""Profile for verification: read after edit, test runs."""
|
|
111
|
+
|
|
112
|
+
def __init__(self) -> None:
|
|
113
|
+
super().__init__(
|
|
114
|
+
name="verifying",
|
|
115
|
+
description="Verifying changes with reads after edits and test execution",
|
|
116
|
+
edit_weight=0.20,
|
|
117
|
+
read_weight=0.30,
|
|
118
|
+
grep_weight=0.10,
|
|
119
|
+
bash_weight=0.25,
|
|
120
|
+
write_weight=0.05,
|
|
121
|
+
error_rate_weight=-0.05,
|
|
122
|
+
error_recovery_weight=0.25,
|
|
123
|
+
circular_weight=0.15,
|
|
124
|
+
entropy_weight=0.15,
|
|
125
|
+
min_turns=4,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class BehaviorClassifier:
|
|
130
|
+
"""Classify agent behavior using pretrained profiles and scoring."""
|
|
131
|
+
|
|
132
|
+
def __init__(self) -> None:
|
|
133
|
+
self.profiles: dict[str, BehaviorProfile] = {
|
|
134
|
+
"debugging": DebuggingProfile(),
|
|
135
|
+
"building": BuildingProfile(),
|
|
136
|
+
"exploring": ExploringProfile(),
|
|
137
|
+
"lost": LostProfile(),
|
|
138
|
+
"verifying": VerifyingProfile(),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def compute_scores(
|
|
142
|
+
self,
|
|
143
|
+
edit_ratio: float = 0.0,
|
|
144
|
+
read_ratio: float = 0.0,
|
|
145
|
+
grep_ratio: float = 0.0,
|
|
146
|
+
bash_ratio: float = 0.0,
|
|
147
|
+
write_ratio: float = 0.0,
|
|
148
|
+
error_rate: float = 0.0,
|
|
149
|
+
error_recovery_rate: float = 0.0,
|
|
150
|
+
circular_ratio: float = 0.0,
|
|
151
|
+
entropy: float = 0.0,
|
|
152
|
+
num_turns: int = 0,
|
|
153
|
+
) -> dict[str, float]:
|
|
154
|
+
"""Compute profile scores for each behavior category.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
edit_ratio: Ratio of edit tool calls.
|
|
158
|
+
read_ratio: Ratio of read tool calls.
|
|
159
|
+
grep_ratio: Ratio of grep/search tool calls.
|
|
160
|
+
bash_ratio: Ratio of bash/shell tool calls.
|
|
161
|
+
write_ratio: Ratio of write tool calls.
|
|
162
|
+
error_rate: Rate of errors in the session.
|
|
163
|
+
error_recovery_rate: Rate of error recovery.
|
|
164
|
+
circular_ratio: Ratio of circular tool transitions.
|
|
165
|
+
entropy: Entropy of tool distribution.
|
|
166
|
+
num_turns: Number of turns in the session.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dict mapping category name to confidence score (0-1).
|
|
170
|
+
"""
|
|
171
|
+
scores: dict[str, float] = {}
|
|
172
|
+
|
|
173
|
+
features = {
|
|
174
|
+
"edit": edit_ratio,
|
|
175
|
+
"read": read_ratio,
|
|
176
|
+
"grep": grep_ratio,
|
|
177
|
+
"bash": bash_ratio,
|
|
178
|
+
"write": write_ratio,
|
|
179
|
+
"error_rate": error_rate,
|
|
180
|
+
"error_recovery": error_recovery_rate,
|
|
181
|
+
"circular": circular_ratio,
|
|
182
|
+
"entropy": entropy,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
for name, profile in self.profiles.items():
|
|
186
|
+
profile_features = {
|
|
187
|
+
"edit": profile.edit_weight,
|
|
188
|
+
"read": profile.read_weight,
|
|
189
|
+
"grep": profile.grep_weight,
|
|
190
|
+
"bash": profile.bash_weight,
|
|
191
|
+
"write": profile.write_weight,
|
|
192
|
+
"error_rate": profile.error_rate_weight,
|
|
193
|
+
"error_recovery": profile.error_recovery_weight,
|
|
194
|
+
"circular": profile.circular_weight,
|
|
195
|
+
"entropy": profile.entropy_weight,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
dot_product = sum(
|
|
199
|
+
features.get(k, 0.0) * profile_features.get(k, 0.0)
|
|
200
|
+
for k in set(list(features.keys()) + list(profile_features.keys()))
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
obs_vec = np.array([features.get(k, 0.0) for k in sorted(features.keys())])
|
|
204
|
+
prof_vec = np.array([profile_features.get(k, 0.0) for k in sorted(features.keys())])
|
|
205
|
+
|
|
206
|
+
obs_norm = np.linalg.norm(obs_vec)
|
|
207
|
+
prof_norm = np.linalg.norm(prof_vec)
|
|
208
|
+
|
|
209
|
+
if obs_norm > 0 and prof_norm > 0:
|
|
210
|
+
cosine_sim = float(np.dot(obs_vec, prof_vec) / (obs_norm * prof_norm))
|
|
211
|
+
else:
|
|
212
|
+
cosine_sim = 0.0
|
|
213
|
+
|
|
214
|
+
score = (dot_product + (cosine_sim + 1) / 2) / 2.0
|
|
215
|
+
|
|
216
|
+
if num_turns < profile.min_turns:
|
|
217
|
+
score *= 0.5
|
|
218
|
+
|
|
219
|
+
scores[name] = max(0.0, min(1.0, score))
|
|
220
|
+
|
|
221
|
+
total = sum(scores.values())
|
|
222
|
+
if total > 0:
|
|
223
|
+
scores = {k: v / total for k, v in scores.items()}
|
|
224
|
+
|
|
225
|
+
return scores
|
|
226
|
+
|
|
227
|
+
def classify(
|
|
228
|
+
self,
|
|
229
|
+
edit_ratio: float = 0.0,
|
|
230
|
+
read_ratio: float = 0.0,
|
|
231
|
+
grep_ratio: float = 0.0,
|
|
232
|
+
bash_ratio: float = 0.0,
|
|
233
|
+
write_ratio: float = 0.0,
|
|
234
|
+
error_rate: float = 0.0,
|
|
235
|
+
error_recovery_rate: float = 0.0,
|
|
236
|
+
circular_ratio: float = 0.0,
|
|
237
|
+
entropy: float = 0.0,
|
|
238
|
+
num_turns: int = 0,
|
|
239
|
+
) -> tuple[str, float, dict[str, float]]:
|
|
240
|
+
"""Classify behavior into a category.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Tuple of (category, confidence, all_scores).
|
|
244
|
+
"""
|
|
245
|
+
scores = self.compute_scores(
|
|
246
|
+
edit_ratio=edit_ratio,
|
|
247
|
+
read_ratio=read_ratio,
|
|
248
|
+
grep_ratio=grep_ratio,
|
|
249
|
+
bash_ratio=bash_ratio,
|
|
250
|
+
write_ratio=write_ratio,
|
|
251
|
+
error_rate=error_rate,
|
|
252
|
+
error_recovery_rate=error_recovery_rate,
|
|
253
|
+
circular_ratio=circular_ratio,
|
|
254
|
+
entropy=entropy,
|
|
255
|
+
num_turns=num_turns,
|
|
256
|
+
)
|
|
257
|
+
best = max(scores, key=scores.get) # type: ignore[arg-type]
|
|
258
|
+
return best, scores[best], scores
|
|
259
|
+
|
|
260
|
+
def get_profile(self, name: str) -> BehaviorProfile | None:
|
|
261
|
+
"""Get a profile by name."""
|
|
262
|
+
return self.profiles.get(name)
|
|
263
|
+
|
|
264
|
+
def list_profiles(self) -> dict[str, str]:
|
|
265
|
+
"""List all available profiles with descriptions."""
|
|
266
|
+
return {name: p.description for name, p in self.profiles.items()}
|
agent_profiler/cli.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""CLI for Agent Profiler."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
from rich.columns import Columns
|
|
14
|
+
|
|
15
|
+
from agent_profiler.profiler import AgentProfiler
|
|
16
|
+
from agent_profiler.classifier import BehaviorClassifier
|
|
17
|
+
from agent_profiler.visualizer import ProfileVisualizer
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
def cli() -> None:
|
|
24
|
+
"""Agent Profiler - Profile and classify agent behavior patterns."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@cli.command()
|
|
29
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
30
|
+
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file path (JSON)")
|
|
31
|
+
def profile(trace_file: str, output: str | None) -> None:
|
|
32
|
+
"""Profile an agent session from a trace file."""
|
|
33
|
+
profiler = AgentProfiler()
|
|
34
|
+
|
|
35
|
+
with console.status("[bold green]Profiling session..."):
|
|
36
|
+
result = profiler.profile(trace_file)
|
|
37
|
+
|
|
38
|
+
category_colors = {
|
|
39
|
+
"debugging": "red", "building": "green", "exploring": "blue",
|
|
40
|
+
"lost": "yellow", "verifying": "magenta",
|
|
41
|
+
}
|
|
42
|
+
color = category_colors.get(result.category, "white")
|
|
43
|
+
|
|
44
|
+
console.print(Panel(
|
|
45
|
+
f"[bold {color}]{result.category.upper()}[/bold {color}]\n"
|
|
46
|
+
f"Confidence: {result.confidence:.1%}\n"
|
|
47
|
+
f"Turns: {result.num_turns}\n"
|
|
48
|
+
f"Duration: {result.session_duration:.0f}s\n"
|
|
49
|
+
f"Error Rate: {result.error_rate:.1%}\n"
|
|
50
|
+
f"Edit Ratio: {result.edit_ratio:.1%}\n"
|
|
51
|
+
f"Read Ratio: {result.read_ratio:.1%}\n"
|
|
52
|
+
f"Write Ratio: {result.write_ratio:.1%}\n"
|
|
53
|
+
f"Bash Ratio: {result.bash_ratio:.1%}\n"
|
|
54
|
+
f"Grep Ratio: {result.grep_ratio:.1%}",
|
|
55
|
+
title="Profile Result",
|
|
56
|
+
))
|
|
57
|
+
|
|
58
|
+
if result.profile_scores:
|
|
59
|
+
score_table = Table(title="Profile Scores", show_lines=True)
|
|
60
|
+
score_table.add_column("Category", style="bold")
|
|
61
|
+
score_table.add_column("Score", justify="right")
|
|
62
|
+
score_table.add_column("Bar")
|
|
63
|
+
|
|
64
|
+
for cat, score in sorted(result.profile_scores.items(), key=lambda x: -x[1]):
|
|
65
|
+
bar_len = int(score * 20)
|
|
66
|
+
bar = "█" * bar_len + "░" * (20 - bar_len)
|
|
67
|
+
cat_color = category_colors.get(cat, "white")
|
|
68
|
+
is_best = cat == result.category
|
|
69
|
+
score_table.add_row(
|
|
70
|
+
f"[{cat_color}]{cat}[/{cat_color}]{' ←' if is_best else ''}",
|
|
71
|
+
f"{score:.1%}",
|
|
72
|
+
f"[{cat_color}]{bar}[/{cat_color}]",
|
|
73
|
+
)
|
|
74
|
+
console.print(score_table)
|
|
75
|
+
|
|
76
|
+
if result.tool_distribution.tool_counts:
|
|
77
|
+
tool_table = Table(title="Tool Distribution", show_lines=True)
|
|
78
|
+
tool_table.add_column("Tool", style="cyan")
|
|
79
|
+
tool_table.add_column("Count", justify="right")
|
|
80
|
+
tool_table.add_column("Frequency", justify="right")
|
|
81
|
+
|
|
82
|
+
for tool, count in sorted(result.tool_distribution.tool_counts.items(), key=lambda x: -x[1]):
|
|
83
|
+
freq = result.tool_distribution.frequencies.get(tool, 0.0)
|
|
84
|
+
tool_table.add_row(tool.title(), str(count), f"{freq:.1%}")
|
|
85
|
+
console.print(tool_table)
|
|
86
|
+
|
|
87
|
+
if output:
|
|
88
|
+
result_dict = {
|
|
89
|
+
"category": result.category,
|
|
90
|
+
"confidence": result.confidence,
|
|
91
|
+
"num_turns": result.num_turns,
|
|
92
|
+
"session_duration": result.session_duration,
|
|
93
|
+
"error_rate": result.error_rate,
|
|
94
|
+
"edit_ratio": result.edit_ratio,
|
|
95
|
+
"read_ratio": result.read_ratio,
|
|
96
|
+
"bash_ratio": result.bash_ratio,
|
|
97
|
+
"write_ratio": result.write_ratio,
|
|
98
|
+
"grep_ratio": result.grep_ratio,
|
|
99
|
+
"error_recovery_rate": result.error_recovery_rate,
|
|
100
|
+
"profile_scores": result.profile_scores,
|
|
101
|
+
"tool_distribution": {
|
|
102
|
+
"counts": result.tool_distribution.tool_counts,
|
|
103
|
+
"total_calls": result.tool_distribution.total_calls,
|
|
104
|
+
"entropy": result.tool_distribution.entropy,
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
Path(output).write_text(json.dumps(result_dict, indent=2))
|
|
108
|
+
console.print(f"\n[green]Results saved to {output}[/green]")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@cli.command()
|
|
112
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
113
|
+
def classify(trace_file: str) -> None:
|
|
114
|
+
"""Classify agent behavior from a trace file."""
|
|
115
|
+
profiler = AgentProfiler()
|
|
116
|
+
|
|
117
|
+
with console.status("[bold green]Classifying behavior..."):
|
|
118
|
+
result = profiler.profile(trace_file)
|
|
119
|
+
|
|
120
|
+
classifier = BehaviorClassifier()
|
|
121
|
+
profiles = classifier.list_profiles()
|
|
122
|
+
|
|
123
|
+
console.print(Panel(
|
|
124
|
+
f"[bold]Category: {result.category}[/bold]\n"
|
|
125
|
+
f"Confidence: {result.confidence:.1%}",
|
|
126
|
+
title="Classification Result",
|
|
127
|
+
))
|
|
128
|
+
|
|
129
|
+
category_descriptions = {
|
|
130
|
+
"debugging": "Active debugging with edits and error recovery loops",
|
|
131
|
+
"building": "Active feature development with writes and executions",
|
|
132
|
+
"exploring": "Code exploration with reads and searches, minimal edits",
|
|
133
|
+
"lost": "Confused or circular behavior, reading without progress",
|
|
134
|
+
"verifying": "Verifying changes with reads after edits and test execution",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
desc_table = Table(title="Profile Descriptions", show_lines=True)
|
|
138
|
+
desc_table.add_column("Profile", style="bold")
|
|
139
|
+
desc_table.add_column("Description")
|
|
140
|
+
desc_table.add_column("Your Score", justify="right")
|
|
141
|
+
|
|
142
|
+
for name, desc in profiles.items():
|
|
143
|
+
score = result.profile_scores.get(name, 0.0)
|
|
144
|
+
is_match = name == result.category
|
|
145
|
+
desc_table.add_row(
|
|
146
|
+
f"{'→ ' if is_match else ' '}{name}",
|
|
147
|
+
desc,
|
|
148
|
+
f"[bold]{score:.1%}[/bold]" if is_match else f"{score:.1%}",
|
|
149
|
+
)
|
|
150
|
+
console.print(desc_table)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@cli.command()
|
|
154
|
+
@click.argument("trace_file", type=click.Path(exists=True))
|
|
155
|
+
@click.option("--output", "-o", type=click.Path(), default="profile_chart.png", help="Output image path")
|
|
156
|
+
def visualize(trace_file: str, output: str) -> None:
|
|
157
|
+
"""Generate visualization charts for an agent session."""
|
|
158
|
+
profiler = AgentProfiler()
|
|
159
|
+
visualizer = ProfileVisualizer()
|
|
160
|
+
|
|
161
|
+
with console.status("[bold green]Profiling and generating charts..."):
|
|
162
|
+
result = profiler.profile(trace_file)
|
|
163
|
+
|
|
164
|
+
console.print("[bold]Generating profile radar chart...[/bold]")
|
|
165
|
+
fig1 = visualizer.generate_profile_chart(result, output=output)
|
|
166
|
+
console.print(f" [green]Saved: {output}[/green]")
|
|
167
|
+
|
|
168
|
+
heatmap_path = str(Path(output).with_name(Path(output).stem + "_heatmap.png"))
|
|
169
|
+
console.print("[bold]Generating transition heatmap...[/bold]")
|
|
170
|
+
fig2 = visualizer.generate_transition_heatmap(trace_file, output=heatmap_path)
|
|
171
|
+
console.print(f" [green]Saved: {heatmap_path}[/green]")
|
|
172
|
+
|
|
173
|
+
pie_path = str(Path(output).with_name(Path(output).stem + "_tools.png"))
|
|
174
|
+
console.print("[bold]Generating tool distribution pie chart...[/bold]")
|
|
175
|
+
fig3 = visualizer.generate_tool_distribution_pie(trace_file, output=pie_path)
|
|
176
|
+
console.print(f" [green]Saved: {pie_path}[/green]")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
if __name__ == "__main__":
|
|
180
|
+
cli()
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Core profiler for analyzing agent sessions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from agent_profiler.classifier import BehaviorClassifier
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ToolDistribution:
|
|
17
|
+
"""Distribution of tool usage in a session."""
|
|
18
|
+
|
|
19
|
+
tool_counts: dict[str, int] = field(default_factory=dict)
|
|
20
|
+
total_calls: int = 0
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def frequencies(self) -> dict[str, float]:
|
|
24
|
+
if self.total_calls == 0:
|
|
25
|
+
return {}
|
|
26
|
+
return {tool: count / self.total_calls for tool, count in self.tool_counts.items()}
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def dominant_tool(self) -> str:
|
|
30
|
+
if not self.tool_counts:
|
|
31
|
+
return "none"
|
|
32
|
+
return max(self.tool_counts, key=self.tool_counts.get) # type: ignore[arg-type]
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def entropy(self) -> float:
|
|
36
|
+
freqs = list(self.frequencies.values())
|
|
37
|
+
if not freqs or sum(freqs) == 0:
|
|
38
|
+
return 0.0
|
|
39
|
+
freqs_arr = np.array(freqs)
|
|
40
|
+
freqs_arr = freqs_arr[freqs_arr > 0]
|
|
41
|
+
return float(-np.sum(freqs_arr * np.log2(freqs_arr)))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class TransitionAnalysis:
|
|
46
|
+
"""Analysis of tool transition patterns."""
|
|
47
|
+
|
|
48
|
+
transitions: dict[str, dict[str, int]] = field(default_factory=dict)
|
|
49
|
+
total_transitions: int = 0
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def transition_probabilities(self) -> dict[str, dict[str, float]]:
|
|
53
|
+
probs: dict[str, dict[str, float]] = {}
|
|
54
|
+
for from_tool, to_tools in self.transitions.items():
|
|
55
|
+
total = sum(to_tools.values())
|
|
56
|
+
if total > 0:
|
|
57
|
+
probs[from_tool] = {t: c / total for t, c in to_tools.items()}
|
|
58
|
+
else:
|
|
59
|
+
probs[from_tool] = {}
|
|
60
|
+
return probs
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def circular_ratio(self) -> float:
|
|
64
|
+
"""Ratio of transitions that go back to a recently-used tool (A→B→A pattern)."""
|
|
65
|
+
if self.total_transitions == 0:
|
|
66
|
+
return 0.0
|
|
67
|
+
circular_count = 0
|
|
68
|
+
for from_tool, to_tools in self.transitions.items():
|
|
69
|
+
if from_tool in to_tools:
|
|
70
|
+
circular_count += to_tools[from_tool]
|
|
71
|
+
return circular_count / self.total_transitions
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class ProfileResult:
|
|
76
|
+
"""Result of agent session profiling."""
|
|
77
|
+
|
|
78
|
+
category: str # debugging, building, exploring, lost, verifying
|
|
79
|
+
confidence: float # 0.0 - 1.0
|
|
80
|
+
tool_distribution: ToolDistribution = field(default_factory=ToolDistribution)
|
|
81
|
+
transition_analysis: TransitionAnalysis = field(default_factory=TransitionAnalysis)
|
|
82
|
+
session_duration: float = 0.0 # seconds
|
|
83
|
+
num_turns: int = 0
|
|
84
|
+
error_rate: float = 0.0
|
|
85
|
+
edit_ratio: float = 0.0 # edit calls / total calls
|
|
86
|
+
read_ratio: float = 0.0 # read calls / total calls
|
|
87
|
+
grep_ratio: float = 0.0 # grep/search calls / total calls
|
|
88
|
+
bash_ratio: float = 0.0 # bash/shell calls / total calls
|
|
89
|
+
write_ratio: float = 0.0 # write calls / total calls
|
|
90
|
+
error_recovery_rate: float = 0.0
|
|
91
|
+
sub_categories: list[str] = field(default_factory=list)
|
|
92
|
+
profile_scores: dict[str, float] = field(default_factory=dict)
|
|
93
|
+
|
|
94
|
+
def __str__(self) -> str:
|
|
95
|
+
return (f"ProfileResult(category={self.category}, confidence={self.confidence:.2f}, "
|
|
96
|
+
f"turns={self.num_turns}, duration={self.session_duration:.1f}s)")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _load_traces(path: str | Path) -> list[dict[str, Any]]:
|
|
100
|
+
traces = []
|
|
101
|
+
with open(path) as f:
|
|
102
|
+
for line in f:
|
|
103
|
+
line = line.strip()
|
|
104
|
+
if line:
|
|
105
|
+
traces.append(json.loads(line))
|
|
106
|
+
return traces
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _extract_tool(entry: dict[str, Any]) -> str:
|
|
110
|
+
"""Extract tool name from a trace entry."""
|
|
111
|
+
content = entry.get("content", [])
|
|
112
|
+
if isinstance(content, list):
|
|
113
|
+
for block in content:
|
|
114
|
+
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
115
|
+
name = block.get("name", "")
|
|
116
|
+
if name:
|
|
117
|
+
return name
|
|
118
|
+
tool = entry.get("tool", entry.get("tool_name", entry.get("function", "")))
|
|
119
|
+
return str(tool).lower() if tool else ""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _normalize_tool(raw_tool: str) -> str:
|
|
123
|
+
"""Normalize tool names to canonical forms."""
|
|
124
|
+
tool = raw_tool.lower().strip()
|
|
125
|
+
read_names = {"read", "file_read", "cat", "get_file_contents", "view", "open"}
|
|
126
|
+
edit_names = {"edit", "file_edit", "apply_edit", "replace", "str_replace_editor"}
|
|
127
|
+
write_names = {"write", "file_write", "create_file", "create"}
|
|
128
|
+
bash_names = {"bash", "shell", "run", "execute", "terminal", "command"}
|
|
129
|
+
grep_names = {"grep", "search", "find", "glob", "rg", "ag"}
|
|
130
|
+
test_names = {"test", "pytest", "run_test", "unittest"}
|
|
131
|
+
|
|
132
|
+
if tool in read_names:
|
|
133
|
+
return "read"
|
|
134
|
+
if tool in edit_names:
|
|
135
|
+
return "edit"
|
|
136
|
+
if tool in write_names:
|
|
137
|
+
return "write"
|
|
138
|
+
if tool in bash_names:
|
|
139
|
+
return "bash"
|
|
140
|
+
if tool in grep_names:
|
|
141
|
+
return "grep"
|
|
142
|
+
if tool in test_names:
|
|
143
|
+
return "bash"
|
|
144
|
+
return tool if tool else "unknown"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _has_error(entry: dict[str, Any]) -> bool:
|
|
148
|
+
"""Check if a trace entry indicates an error."""
|
|
149
|
+
content = entry.get("content", [])
|
|
150
|
+
if isinstance(content, list):
|
|
151
|
+
for block in content:
|
|
152
|
+
if isinstance(block, dict):
|
|
153
|
+
if block.get("type") == "tool_result" and block.get("is_error", False):
|
|
154
|
+
return True
|
|
155
|
+
text = block.get("text", "")
|
|
156
|
+
if isinstance(text, str):
|
|
157
|
+
error_markers = ["error", "exception", "traceback", "failed", "failure"]
|
|
158
|
+
if any(m in text.lower() for m in error_markers):
|
|
159
|
+
return True
|
|
160
|
+
role = entry.get("role", "")
|
|
161
|
+
if role == "assistant":
|
|
162
|
+
text = ""
|
|
163
|
+
if isinstance(content, list):
|
|
164
|
+
for block in content:
|
|
165
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
166
|
+
text += block.get("text", "")
|
|
167
|
+
elif isinstance(content, str):
|
|
168
|
+
text = content
|
|
169
|
+
error_in_response = ["sorry", "i made a mistake", "let me fix", "correction", "that was wrong"]
|
|
170
|
+
return any(m in text.lower() for m in error_in_response)
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class AgentProfiler:
|
|
175
|
+
"""Profile agent sessions and classify behavior patterns."""
|
|
176
|
+
|
|
177
|
+
def __init__(self) -> None:
|
|
178
|
+
self.classifier = BehaviorClassifier()
|
|
179
|
+
|
|
180
|
+
def profile(self, session: str | Path | list[dict[str, Any]]) -> ProfileResult:
|
|
181
|
+
"""Profile an agent session and classify its behavior.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
session: Path to JSONL trace file, or list of trace dicts.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
ProfileResult with category, confidence, and detailed metrics.
|
|
188
|
+
"""
|
|
189
|
+
if isinstance(session, (str, Path)):
|
|
190
|
+
trace_data = _load_traces(session)
|
|
191
|
+
else:
|
|
192
|
+
trace_data = session
|
|
193
|
+
|
|
194
|
+
tool_dist = self._compute_tool_distribution(trace_data)
|
|
195
|
+
transitions = self._compute_transitions(trace_data)
|
|
196
|
+
error_rate = self._compute_error_rate(trace_data)
|
|
197
|
+
duration = self._compute_duration(trace_data)
|
|
198
|
+
|
|
199
|
+
edit_ratio = tool_dist.frequencies.get("edit", 0.0)
|
|
200
|
+
read_ratio = tool_dist.frequencies.get("read", 0.0)
|
|
201
|
+
grep_ratio = tool_dist.frequencies.get("grep", 0.0)
|
|
202
|
+
bash_ratio = tool_dist.frequencies.get("bash", 0.0)
|
|
203
|
+
write_ratio = tool_dist.frequencies.get("write", 0.0)
|
|
204
|
+
error_recovery = self._compute_error_recovery(trace_data)
|
|
205
|
+
|
|
206
|
+
profile_scores = self.classifier.compute_scores(
|
|
207
|
+
edit_ratio=edit_ratio,
|
|
208
|
+
read_ratio=read_ratio,
|
|
209
|
+
grep_ratio=grep_ratio,
|
|
210
|
+
bash_ratio=bash_ratio,
|
|
211
|
+
write_ratio=write_ratio,
|
|
212
|
+
error_rate=error_rate,
|
|
213
|
+
error_recovery_rate=error_recovery,
|
|
214
|
+
circular_ratio=transitions.circular_ratio,
|
|
215
|
+
entropy=tool_dist.entropy,
|
|
216
|
+
num_turns=len(trace_data),
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
best_category = max(profile_scores, key=profile_scores.get) # type: ignore[arg-type]
|
|
220
|
+
confidence = profile_scores[best_category]
|
|
221
|
+
|
|
222
|
+
sub_categories = [cat for cat, score in profile_scores.items()
|
|
223
|
+
if score > 0.3 and cat != best_category]
|
|
224
|
+
|
|
225
|
+
return ProfileResult(
|
|
226
|
+
category=best_category,
|
|
227
|
+
confidence=confidence,
|
|
228
|
+
tool_distribution=tool_dist,
|
|
229
|
+
transition_analysis=transitions,
|
|
230
|
+
session_duration=duration,
|
|
231
|
+
num_turns=len(trace_data),
|
|
232
|
+
error_rate=error_rate,
|
|
233
|
+
edit_ratio=edit_ratio,
|
|
234
|
+
read_ratio=read_ratio,
|
|
235
|
+
grep_ratio=grep_ratio,
|
|
236
|
+
bash_ratio=bash_ratio,
|
|
237
|
+
write_ratio=write_ratio,
|
|
238
|
+
error_recovery_rate=error_recovery,
|
|
239
|
+
sub_categories=sub_categories,
|
|
240
|
+
profile_scores=profile_scores,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def _compute_tool_distribution(self, trace_data: list[dict[str, Any]]) -> ToolDistribution:
|
|
244
|
+
tool_counts: dict[str, int] = {}
|
|
245
|
+
total = 0
|
|
246
|
+
for entry in trace_data:
|
|
247
|
+
raw_tool = _extract_tool(entry)
|
|
248
|
+
if raw_tool:
|
|
249
|
+
tool = _normalize_tool(raw_tool)
|
|
250
|
+
tool_counts[tool] = tool_counts.get(tool, 0) + 1
|
|
251
|
+
total += 1
|
|
252
|
+
return ToolDistribution(tool_counts=tool_counts, total_calls=total)
|
|
253
|
+
|
|
254
|
+
def _compute_transitions(self, trace_data: list[dict[str, Any]]) -> TransitionAnalysis:
|
|
255
|
+
transitions: dict[str, dict[str, int]] = {}
|
|
256
|
+
total = 0
|
|
257
|
+
prev_tool = ""
|
|
258
|
+
for entry in trace_data:
|
|
259
|
+
raw_tool = _extract_tool(entry)
|
|
260
|
+
if raw_tool:
|
|
261
|
+
tool = _normalize_tool(raw_tool)
|
|
262
|
+
if prev_tool and tool:
|
|
263
|
+
if prev_tool not in transitions:
|
|
264
|
+
transitions[prev_tool] = {}
|
|
265
|
+
transitions[prev_tool][tool] = transitions[prev_tool].get(tool, 0) + 1
|
|
266
|
+
total += 1
|
|
267
|
+
prev_tool = tool
|
|
268
|
+
return TransitionAnalysis(transitions=transitions, total_transitions=total)
|
|
269
|
+
|
|
270
|
+
def _compute_error_rate(self, trace_data: list[dict[str, Any]]) -> float:
|
|
271
|
+
if not trace_data:
|
|
272
|
+
return 0.0
|
|
273
|
+
errors = sum(1 for entry in trace_data if _has_error(entry))
|
|
274
|
+
return errors / len(trace_data)
|
|
275
|
+
|
|
276
|
+
def _compute_duration(self, trace_data: list[dict[str, Any]]) -> float:
|
|
277
|
+
if not trace_data:
|
|
278
|
+
return 0.0
|
|
279
|
+
timestamps: list[float] = []
|
|
280
|
+
for entry in trace_data:
|
|
281
|
+
ts = entry.get("timestamp", entry.get("created_at", ""))
|
|
282
|
+
if isinstance(ts, (int, float)):
|
|
283
|
+
timestamps.append(float(ts))
|
|
284
|
+
elif isinstance(ts, str) and ts:
|
|
285
|
+
try:
|
|
286
|
+
from datetime import datetime
|
|
287
|
+
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
288
|
+
timestamps.append(dt.timestamp())
|
|
289
|
+
except (ValueError, AttributeError):
|
|
290
|
+
pass
|
|
291
|
+
if len(timestamps) >= 2:
|
|
292
|
+
return max(timestamps) - min(timestamps)
|
|
293
|
+
return 0.0
|
|
294
|
+
|
|
295
|
+
def _compute_error_recovery(self, trace_data: list[dict[str, Any]]) -> float:
|
|
296
|
+
errors = 0
|
|
297
|
+
recoveries = 0
|
|
298
|
+
had_error = False
|
|
299
|
+
for entry in trace_data:
|
|
300
|
+
if _has_error(entry):
|
|
301
|
+
errors += 1
|
|
302
|
+
had_error = True
|
|
303
|
+
elif had_error:
|
|
304
|
+
recoveries += 1
|
|
305
|
+
had_error = False
|
|
306
|
+
if errors == 0:
|
|
307
|
+
return 1.0
|
|
308
|
+
return recoveries / errors
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Visualization tools for agent profiling results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from agent_profiler.profiler import ProfileResult, AgentProfiler, ToolDistribution, TransitionAnalysis
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ProfileVisualizer:
|
|
13
|
+
"""Generate charts and visualizations for agent profiles."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, style: str = "default") -> None:
|
|
16
|
+
self.style = style
|
|
17
|
+
self._setup_matplotlib()
|
|
18
|
+
|
|
19
|
+
def _setup_matplotlib(self) -> None:
|
|
20
|
+
try:
|
|
21
|
+
import matplotlib
|
|
22
|
+
matplotlib.use("Agg")
|
|
23
|
+
import matplotlib.pyplot as plt
|
|
24
|
+
plt.style.use(self.style if self.style != "default" else "seaborn-v0_8-whitegrid")
|
|
25
|
+
except (ImportError, OSError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def generate_profile_chart(self, profile: ProfileResult, output: str | Path | None = None) -> Any:
|
|
29
|
+
"""Generate a radar chart showing profile scores.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
profile: ProfileResult from AgentProfiler.
|
|
33
|
+
output: Path to save the chart. If None, displays interactively.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
matplotlib Figure object.
|
|
37
|
+
"""
|
|
38
|
+
import matplotlib.pyplot as plt
|
|
39
|
+
import numpy as np
|
|
40
|
+
|
|
41
|
+
categories = list(profile.profile_scores.keys())
|
|
42
|
+
values = [profile.profile_scores.get(c, 0.0) for c in categories]
|
|
43
|
+
|
|
44
|
+
N = len(categories)
|
|
45
|
+
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
|
|
46
|
+
values_closed = values + [values[0]]
|
|
47
|
+
angles_closed = angles + [angles[0]]
|
|
48
|
+
|
|
49
|
+
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
|
|
50
|
+
|
|
51
|
+
ax.fill(angles_closed, values_closed, alpha=0.25, color="steelblue")
|
|
52
|
+
ax.plot(angles_closed, values_closed, linewidth=2, color="steelblue")
|
|
53
|
+
|
|
54
|
+
ax.set_xticks(angles)
|
|
55
|
+
ax.set_xticklabels([c.title() for c in categories], fontsize=11)
|
|
56
|
+
ax.set_ylim(0, 1)
|
|
57
|
+
ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
|
|
58
|
+
|
|
59
|
+
ax.set_title(
|
|
60
|
+
f"Agent Profile: {profile.category.title()}\n"
|
|
61
|
+
f"Confidence: {profile.confidence:.1%} | Turns: {profile.num_turns} | "
|
|
62
|
+
f"Duration: {profile.session_duration:.0f}s",
|
|
63
|
+
fontsize=13,
|
|
64
|
+
fontweight="bold",
|
|
65
|
+
pad=20,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
highlight_idx = categories.index(profile.category) if profile.category in categories else 0
|
|
69
|
+
ax.plot(
|
|
70
|
+
[angles[highlight_idx], angles[highlight_idx]],
|
|
71
|
+
[0, values[highlight_idx]],
|
|
72
|
+
linewidth=3,
|
|
73
|
+
color="coral",
|
|
74
|
+
linestyle="--",
|
|
75
|
+
)
|
|
76
|
+
ax.scatter(
|
|
77
|
+
[angles[highlight_idx]],
|
|
78
|
+
[values[highlight_idx]],
|
|
79
|
+
color="coral",
|
|
80
|
+
s=100,
|
|
81
|
+
zorder=5,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
fig.tight_layout()
|
|
85
|
+
|
|
86
|
+
if output:
|
|
87
|
+
fig.savefig(str(output), dpi=150, bbox_inches="tight")
|
|
88
|
+
plt.close(fig)
|
|
89
|
+
|
|
90
|
+
return fig
|
|
91
|
+
|
|
92
|
+
def generate_transition_heatmap(self, session: str | Path | list[dict[str, Any]], output: str | Path | None = None) -> Any:
|
|
93
|
+
"""Generate a heatmap of tool transition probabilities.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
session: JSONL trace file or list of trace dicts.
|
|
97
|
+
output: Path to save the chart.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
matplotlib Figure object.
|
|
101
|
+
"""
|
|
102
|
+
import matplotlib.pyplot as plt
|
|
103
|
+
import numpy as np
|
|
104
|
+
|
|
105
|
+
profiler = AgentProfiler()
|
|
106
|
+
if isinstance(session, (str, Path)):
|
|
107
|
+
result = profiler.profile(session)
|
|
108
|
+
else:
|
|
109
|
+
result = profiler.profile(session)
|
|
110
|
+
|
|
111
|
+
transitions = result.transition_analysis
|
|
112
|
+
tools = sorted(set(list(transitions.transitions.keys()) +
|
|
113
|
+
[t for tos in transitions.transitions.values() for t in tos.keys()]))
|
|
114
|
+
|
|
115
|
+
if not tools:
|
|
116
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
117
|
+
ax.text(0.5, 0.5, "No transitions found", ha="center", va="center", fontsize=14)
|
|
118
|
+
ax.set_title("Tool Transition Heatmap", fontsize=13, fontweight="bold")
|
|
119
|
+
fig.tight_layout()
|
|
120
|
+
if output:
|
|
121
|
+
fig.savefig(str(output), dpi=150, bbox_inches="tight")
|
|
122
|
+
plt.close(fig)
|
|
123
|
+
return fig
|
|
124
|
+
|
|
125
|
+
n = len(tools)
|
|
126
|
+
matrix = np.zeros((n, n))
|
|
127
|
+
probs = transitions.transition_probabilities
|
|
128
|
+
|
|
129
|
+
for i, from_tool in enumerate(tools):
|
|
130
|
+
for j, to_tool in enumerate(tools):
|
|
131
|
+
matrix[i][j] = probs.get(from_tool, {}).get(to_tool, 0.0)
|
|
132
|
+
|
|
133
|
+
fig, ax = plt.subplots(figsize=(max(8, n + 2), max(6, n)))
|
|
134
|
+
im = ax.imshow(matrix, cmap="YlOrRd", vmin=0, vmax=1.0)
|
|
135
|
+
|
|
136
|
+
ax.set_xticks(range(n))
|
|
137
|
+
ax.set_yticks(range(n))
|
|
138
|
+
ax.set_xticklabels(tools, rotation=45, ha="right", fontsize=10)
|
|
139
|
+
ax.set_yticklabels(tools, fontsize=10)
|
|
140
|
+
|
|
141
|
+
for i in range(n):
|
|
142
|
+
for j in range(n):
|
|
143
|
+
if matrix[i][j] > 0.01:
|
|
144
|
+
ax.text(j, i, f"{matrix[i][j]:.2f}", ha="center", va="center",
|
|
145
|
+
fontsize=8, color="white" if matrix[i][j] > 0.5 else "black")
|
|
146
|
+
|
|
147
|
+
ax.set_title("Tool Transition Probabilities", fontsize=13, fontweight="bold", pad=10)
|
|
148
|
+
ax.set_xlabel("To Tool", fontsize=11)
|
|
149
|
+
ax.set_ylabel("From Tool", fontsize=11)
|
|
150
|
+
|
|
151
|
+
cbar = fig.colorbar(im, ax=ax, shrink=0.8)
|
|
152
|
+
cbar.set_label("Transition Probability", fontsize=10)
|
|
153
|
+
|
|
154
|
+
fig.tight_layout()
|
|
155
|
+
|
|
156
|
+
if output:
|
|
157
|
+
fig.savefig(str(output), dpi=150, bbox_inches="tight")
|
|
158
|
+
plt.close(fig)
|
|
159
|
+
|
|
160
|
+
return fig
|
|
161
|
+
|
|
162
|
+
def generate_tool_distribution_pie(self, session: str | Path | list[dict[str, Any]], output: str | Path | None = None) -> Any:
|
|
163
|
+
"""Generate a pie chart of tool usage distribution.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
session: JSONL trace file or list of trace dicts.
|
|
167
|
+
output: Path to save the chart.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
matplotlib Figure object.
|
|
171
|
+
"""
|
|
172
|
+
import matplotlib.pyplot as plt
|
|
173
|
+
|
|
174
|
+
profiler = AgentProfiler()
|
|
175
|
+
if isinstance(session, (str, Path)):
|
|
176
|
+
result = profiler.profile(session)
|
|
177
|
+
else:
|
|
178
|
+
result = profiler.profile(session)
|
|
179
|
+
|
|
180
|
+
dist = result.tool_distribution
|
|
181
|
+
|
|
182
|
+
if not dist.tool_counts:
|
|
183
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
184
|
+
ax.text(0.5, 0.5, "No tool usage data", ha="center", va="center", fontsize=14)
|
|
185
|
+
ax.set_title("Tool Distribution", fontsize=13, fontweight="bold")
|
|
186
|
+
fig.tight_layout()
|
|
187
|
+
if output:
|
|
188
|
+
fig.savefig(str(output), dpi=150, bbox_inches="tight")
|
|
189
|
+
plt.close(fig)
|
|
190
|
+
return fig
|
|
191
|
+
|
|
192
|
+
tools = list(dist.tool_counts.keys())
|
|
193
|
+
counts = list(dist.tool_counts.values())
|
|
194
|
+
|
|
195
|
+
colors = {
|
|
196
|
+
"read": "#4e79a7", "edit": "#f28e2b", "write": "#e15759",
|
|
197
|
+
"bash": "#59a14f", "grep": "#76b7b2", "unknown": "#b07aa1",
|
|
198
|
+
}
|
|
199
|
+
pie_colors = [colors.get(t, "#bab0ac") for t in tools]
|
|
200
|
+
|
|
201
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
202
|
+
wedges, texts, autotexts = ax.pie(
|
|
203
|
+
counts, labels=[t.title() for t in tools], autopct="%1.1f%%",
|
|
204
|
+
colors=pie_colors, startangle=90, pctdistance=0.8,
|
|
205
|
+
)
|
|
206
|
+
for t in texts:
|
|
207
|
+
t.set_fontsize(10)
|
|
208
|
+
for t in autotexts:
|
|
209
|
+
t.set_fontsize(9)
|
|
210
|
+
|
|
211
|
+
ax.set_title(
|
|
212
|
+
f"Tool Distribution ({dist.total_calls} calls)\n"
|
|
213
|
+
f"Dominant: {dist.dominant_tool.title()} | Entropy: {dist.entropy:.2f}",
|
|
214
|
+
fontsize=13,
|
|
215
|
+
fontweight="bold",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
fig.tight_layout()
|
|
219
|
+
|
|
220
|
+
if output:
|
|
221
|
+
fig.savefig(str(output), dpi=150, bbox_inches="tight")
|
|
222
|
+
plt.close(fig)
|
|
223
|
+
|
|
224
|
+
return fig
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fableforge-agent-profiler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Profile and classify agent behavior patterns from traces
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: scikit-learn>=1.3
|
|
12
|
+
Requires-Dist: rich>=13.0
|
|
13
|
+
Requires-Dist: click>=8.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.7
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
18
|
+
Requires-Dist: ruff; extra == "dev"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Agent Profiler
|
|
22
|
+
|
|
23
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Profile and classify agent behavior patterns from traces using transition matrices and tool distributions.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install agent-profiler
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
### Profile a Session
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Profile and classify behavior
|
|
40
|
+
aprof profile trace.jsonl
|
|
41
|
+
|
|
42
|
+
# Save results to JSON
|
|
43
|
+
aprof profile trace.jsonl -o results.json
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Classify Behavior
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
aprof classify trace.jsonl
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Generate Visualizations
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
aprof visualize trace.jsonl --output profile_chart.png
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Programming API
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from agent_profiler import AgentProfiler, BehaviorClassifier, ProfileVisualizer
|
|
62
|
+
|
|
63
|
+
# Profile a session
|
|
64
|
+
profiler = AgentProfiler()
|
|
65
|
+
result = profiler.profile("trace.jsonl")
|
|
66
|
+
print(f"Category: {result.category} (confidence: {result.confidence:.1%})")
|
|
67
|
+
print(f"Tool distribution: {result.tool_distribution.tool_counts}")
|
|
68
|
+
|
|
69
|
+
# Classify directly
|
|
70
|
+
classifier = BehaviorClassifier()
|
|
71
|
+
category, confidence, scores = classifier.classify(
|
|
72
|
+
edit_ratio=0.25, read_ratio=0.15, bash_ratio=0.30,
|
|
73
|
+
error_rate=0.4, error_recovery_rate=0.35
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Generate visualizations
|
|
77
|
+
visualizer = ProfileVisualizer()
|
|
78
|
+
visualizer.generate_profile_chart(result, output="profile.png")
|
|
79
|
+
visualizer.generate_transition_heatmap("trace.jsonl", output="heatmap.png")
|
|
80
|
+
visualizer.generate_tool_distribution_pie("trace.jsonl", output="tools.png")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Behavior Categories
|
|
84
|
+
|
|
85
|
+
| Category | Description | Key Indicators |
|
|
86
|
+
|----------|-------------|----------------|
|
|
87
|
+
| Debugging | Active debugging sessions | High Edit+Bash, many errors, recoveries |
|
|
88
|
+
| Building | Feature development | High Write+Bash, low Read |
|
|
89
|
+
| Exploring | Code investigation | High Read+Grep, low Edit |
|
|
90
|
+
| Lost | Confused/circular behavior | Circular transitions, high Read |
|
|
91
|
+
| Verifying | Change verification | Read after Edit, test runs |
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
MIT
|
|
96
|
+
|
|
97
|
+
## Ecosystem
|
|
98
|
+
|
|
99
|
+
Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
100
|
+
|
|
101
|
+
| Project | Description |
|
|
102
|
+
| --- | --- |
|
|
103
|
+
| **[Anvil](../anvil)** | Self-verified coding agent |
|
|
104
|
+
| **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
|
|
105
|
+
| **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
|
|
106
|
+
| **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
|
|
107
|
+
| **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
|
|
108
|
+
| **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
|
|
109
|
+
| **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
|
|
110
|
+
| **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
|
|
111
|
+
| **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
|
|
112
|
+
| **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
|
|
113
|
+
| **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
|
|
114
|
+
| **[AgentDev](../agent-dev)** | VSCode extension with verification |
|
|
115
|
+
| **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
|
|
116
|
+
| **[AgentSkills](../agent-skills)** | npm for agent behaviors |
|
|
117
|
+
| **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
|
|
118
|
+
| **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
|
|
119
|
+
| **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
|
|
120
|
+
| **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
|
|
121
|
+
| **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
|
|
122
|
+
| **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
|
|
123
|
+
| **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
agent_profiler/__init__.py,sha256=LvEROPWfrYPxKgp6aBALZ4dF-JxRF84n70-YJ6ouAiI,378
|
|
2
|
+
agent_profiler/classifier.py,sha256=0yONLxn3uhQPkD-2lRSJvuDRjEFeVM0a7FcKUh-PqSQ,8587
|
|
3
|
+
agent_profiler/cli.py,sha256=ciSwhyRwS5VYLvYr52PDFaDx_-SGxZizjvvbIzFp5bk,6987
|
|
4
|
+
agent_profiler/profiler.py,sha256=a4XPrHjfnQHk7fpSevLK1WZeSI0ANpVef2CpbaWHepM,11493
|
|
5
|
+
agent_profiler/visualizer.py,sha256=FUJgZPKLHshTZHfjcuXHAMgc6aCjxP4STKZqqs_FCGU,7686
|
|
6
|
+
fableforge_agent_profiler-0.1.0.dist-info/licenses/LICENSE,sha256=w5aFYmJW6UW4AWlHHzPSfzaK9jMabxY0EPRzd12k7V0,1080
|
|
7
|
+
fableforge_agent_profiler-0.1.0.dist-info/METADATA,sha256=_ED5zdnChqzIP8vStin4bm600jNcc8jXKQHtrh-uMI8,4446
|
|
8
|
+
fableforge_agent_profiler-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
fableforge_agent_profiler-0.1.0.dist-info/entry_points.txt,sha256=V0up_36o3l7wKOR3by_dFEDuWE6UVngpWjrNtLtfjEg,49
|
|
10
|
+
fableforge_agent_profiler-0.1.0.dist-info/top_level.txt,sha256=GnHEbiXyleZptDgHmb5tZoxLyIaOQNfM0Y5pNvBbzNM,15
|
|
11
|
+
fableforge_agent_profiler-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 FableForge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
agent_profiler
|