fableforge-agent-profiler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 FableForge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: fableforge-agent-profiler
3
+ Version: 0.1.0
4
+ Summary: Profile and classify agent behavior patterns from traces
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: numpy>=1.24
11
+ Requires-Dist: scikit-learn>=1.3
12
+ Requires-Dist: rich>=13.0
13
+ Requires-Dist: click>=8.0
14
+ Requires-Dist: matplotlib>=3.7
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
+ Requires-Dist: pytest-cov; extra == "dev"
18
+ Requires-Dist: ruff; extra == "dev"
19
+ Dynamic: license-file
20
+
21
+ # Agent Profiler
22
+
23
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-0-yellow.svg)](tests/)
24
+
25
+
26
+ Profile and classify agent behavior patterns from traces using transition matrices and tool distributions.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install agent-profiler
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ### Profile a Session
37
+
38
+ ```bash
39
+ # Profile and classify behavior
40
+ aprof profile trace.jsonl
41
+
42
+ # Save results to JSON
43
+ aprof profile trace.jsonl -o results.json
44
+ ```
45
+
46
+ ### Classify Behavior
47
+
48
+ ```bash
49
+ aprof classify trace.jsonl
50
+ ```
51
+
52
+ ### Generate Visualizations
53
+
54
+ ```bash
55
+ aprof visualize trace.jsonl --output profile_chart.png
56
+ ```
57
+
58
+ ## Programming API
59
+
60
+ ```python
61
+ from agent_profiler import AgentProfiler, BehaviorClassifier, ProfileVisualizer
62
+
63
+ # Profile a session
64
+ profiler = AgentProfiler()
65
+ result = profiler.profile("trace.jsonl")
66
+ print(f"Category: {result.category} (confidence: {result.confidence:.1%})")
67
+ print(f"Tool distribution: {result.tool_distribution.tool_counts}")
68
+
69
+ # Classify directly
70
+ classifier = BehaviorClassifier()
71
+ category, confidence, scores = classifier.classify(
72
+ edit_ratio=0.25, read_ratio=0.15, bash_ratio=0.30,
73
+ error_rate=0.4, error_recovery_rate=0.35
74
+ )
75
+
76
+ # Generate visualizations
77
+ visualizer = ProfileVisualizer()
78
+ visualizer.generate_profile_chart(result, output="profile.png")
79
+ visualizer.generate_transition_heatmap("trace.jsonl", output="heatmap.png")
80
+ visualizer.generate_tool_distribution_pie("trace.jsonl", output="tools.png")
81
+ ```
82
+
83
+ ## Behavior Categories
84
+
85
+ | Category | Description | Key Indicators |
86
+ |----------|-------------|----------------|
87
+ | Debugging | Active debugging sessions | High Edit+Bash, many errors, recoveries |
88
+ | Building | Feature development | High Write+Bash, low Read |
89
+ | Exploring | Code investigation | High Read+Grep, low Edit |
90
+ | Lost | Confused/circular behavior | Circular transitions, high Read |
91
+ | Verifying | Change verification | Read after Edit, test runs |
92
+
93
+ ## License
94
+
95
+ MIT
96
+
97
+ ## Ecosystem
98
+
99
+ Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
100
+
101
+ | Project | Description |
102
+ | --- | --- |
103
+ | **[Anvil](../anvil)** | Self-verified coding agent |
104
+ | **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
105
+ | **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
106
+ | **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
107
+ | **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
108
+ | **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
109
+ | **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
110
+ | **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
111
+ | **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
112
+ | **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
113
+ | **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
114
+ | **[AgentDev](../agent-dev)** | VSCode extension with verification |
115
+ | **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
116
+ | **[AgentSkills](../agent-skills)** | npm for agent behaviors |
117
+ | **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
118
+ | **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
119
+ | **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
120
+ | **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
121
+ | **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
122
+ | **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
123
+ | **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
@@ -0,0 +1,103 @@
1
+ # Agent Profiler
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-0-yellow.svg)](tests/)
4
+
5
+
6
+ Profile and classify agent behavior patterns from traces using transition matrices and tool distributions.
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install agent-profiler
12
+ ```
13
+
14
+ ## Quick Start
15
+
16
+ ### Profile a Session
17
+
18
+ ```bash
19
+ # Profile and classify behavior
20
+ aprof profile trace.jsonl
21
+
22
+ # Save results to JSON
23
+ aprof profile trace.jsonl -o results.json
24
+ ```
25
+
26
+ ### Classify Behavior
27
+
28
+ ```bash
29
+ aprof classify trace.jsonl
30
+ ```
31
+
32
+ ### Generate Visualizations
33
+
34
+ ```bash
35
+ aprof visualize trace.jsonl --output profile_chart.png
36
+ ```
37
+
38
+ ## Programming API
39
+
40
+ ```python
41
+ from agent_profiler import AgentProfiler, BehaviorClassifier, ProfileVisualizer
42
+
43
+ # Profile a session
44
+ profiler = AgentProfiler()
45
+ result = profiler.profile("trace.jsonl")
46
+ print(f"Category: {result.category} (confidence: {result.confidence:.1%})")
47
+ print(f"Tool distribution: {result.tool_distribution.tool_counts}")
48
+
49
+ # Classify directly
50
+ classifier = BehaviorClassifier()
51
+ category, confidence, scores = classifier.classify(
52
+ edit_ratio=0.25, read_ratio=0.15, bash_ratio=0.30,
53
+ error_rate=0.4, error_recovery_rate=0.35
54
+ )
55
+
56
+ # Generate visualizations
57
+ visualizer = ProfileVisualizer()
58
+ visualizer.generate_profile_chart(result, output="profile.png")
59
+ visualizer.generate_transition_heatmap("trace.jsonl", output="heatmap.png")
60
+ visualizer.generate_tool_distribution_pie("trace.jsonl", output="tools.png")
61
+ ```
62
+
63
+ ## Behavior Categories
64
+
65
+ | Category | Description | Key Indicators |
66
+ |----------|-------------|----------------|
67
+ | Debugging | Active debugging sessions | High Edit+Bash, many errors, recoveries |
68
+ | Building | Feature development | High Write+Bash, low Read |
69
+ | Exploring | Code investigation | High Read+Grep, low Edit |
70
+ | Lost | Confused/circular behavior | Circular transitions, high Read |
71
+ | Verifying | Change verification | Read after Edit, test runs |
72
+
73
+ ## License
74
+
75
+ MIT
76
+
77
+ ## Ecosystem
78
+
79
+ Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
80
+
81
+ | Project | Description |
82
+ | --- | --- |
83
+ | **[Anvil](../anvil)** | Self-verified coding agent |
84
+ | **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
85
+ | **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
86
+ | **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
87
+ | **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
88
+ | **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
89
+ | **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
90
+ | **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
91
+ | **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
92
+ | **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
93
+ | **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
94
+ | **[AgentDev](../agent-dev)** | VSCode extension with verification |
95
+ | **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
96
+ | **[AgentSkills](../agent-skills)** | npm for agent behaviors |
97
+ | **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
98
+ | **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
99
+ | **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
100
+ | **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
101
+ | **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
102
+ | **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
103
+ | **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "fableforge-agent-profiler"
7
+ version = "0.1.0"
8
+ description = "Profile and classify agent behavior patterns from traces"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = {text = "MIT"}
12
+ dependencies = [
13
+ "pydantic>=2.0",
14
+ "numpy>=1.24",
15
+ "scikit-learn>=1.3",
16
+ "rich>=13.0",
17
+ "click>=8.0",
18
+ "matplotlib>=3.7",
19
+ ]
20
+
21
+ [project.scripts]
22
+ aprof = "agent_profiler.cli:cli"
23
+
24
+ [project.optional-dependencies]
25
+ dev = ["pytest>=7.0", "pytest-cov", "ruff"]
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
29
+
30
+ [tool.ruff]
31
+ line-length = 100
32
+ target-version = "py310"
33
+
34
+ [tool.pytest.ini_options]
35
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,13 @@
1
+ """Agent Profiler - Profile and classify agent behavior patterns."""
2
+
3
+ from agent_profiler.profiler import AgentProfiler, ProfileResult
4
+ from agent_profiler.classifier import BehaviorClassifier
5
+ from agent_profiler.visualizer import ProfileVisualizer
6
+
7
+ __all__ = [
8
+ "AgentProfiler",
9
+ "ProfileResult",
10
+ "BehaviorClassifier",
11
+ "ProfileVisualizer",
12
+ ]
13
+ __version__ = "0.1.0"
@@ -0,0 +1,266 @@
1
+ """Behavior classification using pretrained profiles and transition matrices."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+
11
+ @dataclass
12
+ class BehaviorProfile:
13
+ """A predefined behavior profile template."""
14
+
15
+ name: str
16
+ description: str
17
+ edit_weight: float = 0.0
18
+ read_weight: float = 0.0
19
+ grep_weight: float = 0.0
20
+ bash_weight: float = 0.0
21
+ write_weight: float = 0.0
22
+ error_rate_weight: float = 0.0
23
+ error_recovery_weight: float = 0.0
24
+ circular_weight: float = 0.0
25
+ entropy_weight: float = 0.0
26
+ min_turns: int = 0
27
+
28
+
29
+ class DebuggingProfile(BehaviorProfile):
30
+ """Profile for debugging sessions: high edit+bash, many errors, recoveries."""
31
+
32
+ def __init__(self) -> None:
33
+ super().__init__(
34
+ name="debugging",
35
+ description="Active debugging with edits and error recovery loops",
36
+ edit_weight=0.25,
37
+ read_weight=0.15,
38
+ grep_weight=0.10,
39
+ bash_weight=0.30,
40
+ write_weight=0.05,
41
+ error_rate_weight=0.40,
42
+ error_recovery_weight=0.35,
43
+ circular_weight=0.20,
44
+ entropy_weight=0.15,
45
+ min_turns=5,
46
+ )
47
+
48
+
49
+ class BuildingProfile(BehaviorProfile):
50
+ """Profile for productive building: high write+bash, low read."""
51
+
52
+ def __init__(self) -> None:
53
+ super().__init__(
54
+ name="building",
55
+ description="Active feature development with writes and executions",
56
+ edit_weight=0.15,
57
+ read_weight=0.10,
58
+ grep_weight=0.05,
59
+ bash_weight=0.25,
60
+ write_weight=0.30,
61
+ error_rate_weight=-0.10,
62
+ error_recovery_weight=0.10,
63
+ circular_weight=0.05,
64
+ entropy_weight=0.20,
65
+ min_turns=3,
66
+ )
67
+
68
+
69
+ class ExploringProfile(BehaviorProfile):
70
+ """Profile for exploration: high read+grep, low edit."""
71
+
72
+ def __init__(self) -> None:
73
+ super().__init__(
74
+ name="exploring",
75
+ description="Code exploration with reads and searches, minimal edits",
76
+ edit_weight=0.05,
77
+ read_weight=0.35,
78
+ grep_weight=0.30,
79
+ bash_weight=0.10,
80
+ write_weight=0.02,
81
+ error_rate_weight=-0.20,
82
+ error_recovery_weight=0.05,
83
+ circular_weight=0.10,
84
+ entropy_weight=0.25,
85
+ min_turns=3,
86
+ )
87
+
88
+
89
+ class LostProfile(BehaviorProfile):
90
+ """Profile for lost/confused sessions: circular transitions, high read."""
91
+
92
+ def __init__(self) -> None:
93
+ super().__init__(
94
+ name="lost",
95
+ description="Confused or circular behavior, reading without progress",
96
+ edit_weight=0.05,
97
+ read_weight=0.35,
98
+ grep_weight=0.15,
99
+ bash_weight=0.05,
100
+ write_weight=0.02,
101
+ error_rate_weight=0.10,
102
+ error_recovery_weight=-0.10,
103
+ circular_weight=0.40,
104
+ entropy_weight=0.10,
105
+ min_turns=5,
106
+ )
107
+
108
+
109
+ class VerifyingProfile(BehaviorProfile):
110
+ """Profile for verification: read after edit, test runs."""
111
+
112
+ def __init__(self) -> None:
113
+ super().__init__(
114
+ name="verifying",
115
+ description="Verifying changes with reads after edits and test execution",
116
+ edit_weight=0.20,
117
+ read_weight=0.30,
118
+ grep_weight=0.10,
119
+ bash_weight=0.25,
120
+ write_weight=0.05,
121
+ error_rate_weight=-0.05,
122
+ error_recovery_weight=0.25,
123
+ circular_weight=0.15,
124
+ entropy_weight=0.15,
125
+ min_turns=4,
126
+ )
127
+
128
+
129
+ class BehaviorClassifier:
130
+ """Classify agent behavior using pretrained profiles and scoring."""
131
+
132
+ def __init__(self) -> None:
133
+ self.profiles: dict[str, BehaviorProfile] = {
134
+ "debugging": DebuggingProfile(),
135
+ "building": BuildingProfile(),
136
+ "exploring": ExploringProfile(),
137
+ "lost": LostProfile(),
138
+ "verifying": VerifyingProfile(),
139
+ }
140
+
141
+ def compute_scores(
142
+ self,
143
+ edit_ratio: float = 0.0,
144
+ read_ratio: float = 0.0,
145
+ grep_ratio: float = 0.0,
146
+ bash_ratio: float = 0.0,
147
+ write_ratio: float = 0.0,
148
+ error_rate: float = 0.0,
149
+ error_recovery_rate: float = 0.0,
150
+ circular_ratio: float = 0.0,
151
+ entropy: float = 0.0,
152
+ num_turns: int = 0,
153
+ ) -> dict[str, float]:
154
+ """Compute profile scores for each behavior category.
155
+
156
+ Args:
157
+ edit_ratio: Ratio of edit tool calls.
158
+ read_ratio: Ratio of read tool calls.
159
+ grep_ratio: Ratio of grep/search tool calls.
160
+ bash_ratio: Ratio of bash/shell tool calls.
161
+ write_ratio: Ratio of write tool calls.
162
+ error_rate: Rate of errors in the session.
163
+ error_recovery_rate: Rate of error recovery.
164
+ circular_ratio: Ratio of circular tool transitions.
165
+ entropy: Entropy of tool distribution.
166
+ num_turns: Number of turns in the session.
167
+
168
+ Returns:
169
+ Dict mapping category name to confidence score (0-1).
170
+ """
171
+ scores: dict[str, float] = {}
172
+
173
+ features = {
174
+ "edit": edit_ratio,
175
+ "read": read_ratio,
176
+ "grep": grep_ratio,
177
+ "bash": bash_ratio,
178
+ "write": write_ratio,
179
+ "error_rate": error_rate,
180
+ "error_recovery": error_recovery_rate,
181
+ "circular": circular_ratio,
182
+ "entropy": entropy,
183
+ }
184
+
185
+ for name, profile in self.profiles.items():
186
+ profile_features = {
187
+ "edit": profile.edit_weight,
188
+ "read": profile.read_weight,
189
+ "grep": profile.grep_weight,
190
+ "bash": profile.bash_weight,
191
+ "write": profile.write_weight,
192
+ "error_rate": profile.error_rate_weight,
193
+ "error_recovery": profile.error_recovery_weight,
194
+ "circular": profile.circular_weight,
195
+ "entropy": profile.entropy_weight,
196
+ }
197
+
198
+ dot_product = sum(
199
+ features.get(k, 0.0) * profile_features.get(k, 0.0)
200
+ for k in set(list(features.keys()) + list(profile_features.keys()))
201
+ )
202
+
203
+ obs_vec = np.array([features.get(k, 0.0) for k in sorted(features.keys())])
204
+ prof_vec = np.array([profile_features.get(k, 0.0) for k in sorted(features.keys())])
205
+
206
+ obs_norm = np.linalg.norm(obs_vec)
207
+ prof_norm = np.linalg.norm(prof_vec)
208
+
209
+ if obs_norm > 0 and prof_norm > 0:
210
+ cosine_sim = float(np.dot(obs_vec, prof_vec) / (obs_norm * prof_norm))
211
+ else:
212
+ cosine_sim = 0.0
213
+
214
+ score = (dot_product + (cosine_sim + 1) / 2) / 2.0
215
+
216
+ if num_turns < profile.min_turns:
217
+ score *= 0.5
218
+
219
+ scores[name] = max(0.0, min(1.0, score))
220
+
221
+ total = sum(scores.values())
222
+ if total > 0:
223
+ scores = {k: v / total for k, v in scores.items()}
224
+
225
+ return scores
226
+
227
+ def classify(
228
+ self,
229
+ edit_ratio: float = 0.0,
230
+ read_ratio: float = 0.0,
231
+ grep_ratio: float = 0.0,
232
+ bash_ratio: float = 0.0,
233
+ write_ratio: float = 0.0,
234
+ error_rate: float = 0.0,
235
+ error_recovery_rate: float = 0.0,
236
+ circular_ratio: float = 0.0,
237
+ entropy: float = 0.0,
238
+ num_turns: int = 0,
239
+ ) -> tuple[str, float, dict[str, float]]:
240
+ """Classify behavior into a category.
241
+
242
+ Returns:
243
+ Tuple of (category, confidence, all_scores).
244
+ """
245
+ scores = self.compute_scores(
246
+ edit_ratio=edit_ratio,
247
+ read_ratio=read_ratio,
248
+ grep_ratio=grep_ratio,
249
+ bash_ratio=bash_ratio,
250
+ write_ratio=write_ratio,
251
+ error_rate=error_rate,
252
+ error_recovery_rate=error_recovery_rate,
253
+ circular_ratio=circular_ratio,
254
+ entropy=entropy,
255
+ num_turns=num_turns,
256
+ )
257
+ best = max(scores, key=scores.get) # type: ignore[arg-type]
258
+ return best, scores[best], scores
259
+
260
+ def get_profile(self, name: str) -> BehaviorProfile | None:
261
+ """Get a profile by name."""
262
+ return self.profiles.get(name)
263
+
264
+ def list_profiles(self) -> dict[str, str]:
265
+ """List all available profiles with descriptions."""
266
+ return {name: p.description for name, p in self.profiles.items()}