fableforge-agent-profiler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fableforge_agent_profiler-0.1.0/LICENSE +21 -0
- fableforge_agent_profiler-0.1.0/PKG-INFO +123 -0
- fableforge_agent_profiler-0.1.0/README.md +103 -0
- fableforge_agent_profiler-0.1.0/pyproject.toml +35 -0
- fableforge_agent_profiler-0.1.0/setup.cfg +4 -0
- fableforge_agent_profiler-0.1.0/src/agent_profiler/__init__.py +13 -0
- fableforge_agent_profiler-0.1.0/src/agent_profiler/classifier.py +266 -0
- fableforge_agent_profiler-0.1.0/src/agent_profiler/cli.py +180 -0
- fableforge_agent_profiler-0.1.0/src/agent_profiler/profiler.py +308 -0
- fableforge_agent_profiler-0.1.0/src/agent_profiler/visualizer.py +224 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/PKG-INFO +123 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/SOURCES.txt +14 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/dependency_links.txt +1 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/entry_points.txt +2 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/requires.txt +11 -0
- fableforge_agent_profiler-0.1.0/src/fableforge_agent_profiler.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 FableForge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fableforge-agent-profiler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Profile and classify agent behavior patterns from traces
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: numpy>=1.24
|
|
11
|
+
Requires-Dist: scikit-learn>=1.3
|
|
12
|
+
Requires-Dist: rich>=13.0
|
|
13
|
+
Requires-Dist: click>=8.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.7
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
18
|
+
Requires-Dist: ruff; extra == "dev"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Agent Profiler
|
|
22
|
+
|
|
23
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Profile and classify agent behavior patterns from traces using transition matrices and tool distributions.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install agent-profiler
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
### Profile a Session
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Profile and classify behavior
|
|
40
|
+
aprof profile trace.jsonl
|
|
41
|
+
|
|
42
|
+
# Save results to JSON
|
|
43
|
+
aprof profile trace.jsonl -o results.json
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Classify Behavior
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
aprof classify trace.jsonl
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Generate Visualizations
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
aprof visualize trace.jsonl --output profile_chart.png
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Programming API
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from agent_profiler import AgentProfiler, BehaviorClassifier, ProfileVisualizer
|
|
62
|
+
|
|
63
|
+
# Profile a session
|
|
64
|
+
profiler = AgentProfiler()
|
|
65
|
+
result = profiler.profile("trace.jsonl")
|
|
66
|
+
print(f"Category: {result.category} (confidence: {result.confidence:.1%})")
|
|
67
|
+
print(f"Tool distribution: {result.tool_distribution.tool_counts}")
|
|
68
|
+
|
|
69
|
+
# Classify directly
|
|
70
|
+
classifier = BehaviorClassifier()
|
|
71
|
+
category, confidence, scores = classifier.classify(
|
|
72
|
+
edit_ratio=0.25, read_ratio=0.15, bash_ratio=0.30,
|
|
73
|
+
error_rate=0.4, error_recovery_rate=0.35
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Generate visualizations
|
|
77
|
+
visualizer = ProfileVisualizer()
|
|
78
|
+
visualizer.generate_profile_chart(result, output="profile.png")
|
|
79
|
+
visualizer.generate_transition_heatmap("trace.jsonl", output="heatmap.png")
|
|
80
|
+
visualizer.generate_tool_distribution_pie("trace.jsonl", output="tools.png")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Behavior Categories
|
|
84
|
+
|
|
85
|
+
| Category | Description | Key Indicators |
|
|
86
|
+
|----------|-------------|----------------|
|
|
87
|
+
| Debugging | Active debugging sessions | High Edit+Bash, many errors, recoveries |
|
|
88
|
+
| Building | Feature development | High Write+Bash, low Read |
|
|
89
|
+
| Exploring | Code investigation | High Read+Grep, low Edit |
|
|
90
|
+
| Lost | Confused/circular behavior | Circular transitions, high Read |
|
|
91
|
+
| Verifying | Change verification | Read after Edit, test runs |
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
MIT
|
|
96
|
+
|
|
97
|
+
## Ecosystem
|
|
98
|
+
|
|
99
|
+
Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
100
|
+
|
|
101
|
+
| Project | Description |
|
|
102
|
+
| --- | --- |
|
|
103
|
+
| **[Anvil](../anvil)** | Self-verified coding agent |
|
|
104
|
+
| **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
|
|
105
|
+
| **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
|
|
106
|
+
| **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
|
|
107
|
+
| **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
|
|
108
|
+
| **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
|
|
109
|
+
| **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
|
|
110
|
+
| **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
|
|
111
|
+
| **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
|
|
112
|
+
| **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
|
|
113
|
+
| **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
|
|
114
|
+
| **[AgentDev](../agent-dev)** | VSCode extension with verification |
|
|
115
|
+
| **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
|
|
116
|
+
| **[AgentSkills](../agent-skills)** | npm for agent behaviors |
|
|
117
|
+
| **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
|
|
118
|
+
| **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
|
|
119
|
+
| **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
|
|
120
|
+
| **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
|
|
121
|
+
| **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
|
|
122
|
+
| **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
|
|
123
|
+
| **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Agent Profiler
|
|
2
|
+
|
|
3
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Profile and classify agent behavior patterns from traces using transition matrices and tool distributions.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install agent-profiler
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
### Profile a Session
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Profile and classify behavior
|
|
20
|
+
aprof profile trace.jsonl
|
|
21
|
+
|
|
22
|
+
# Save results to JSON
|
|
23
|
+
aprof profile trace.jsonl -o results.json
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Classify Behavior
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
aprof classify trace.jsonl
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Generate Visualizations
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
aprof visualize trace.jsonl --output profile_chart.png
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Programming API
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from agent_profiler import AgentProfiler, BehaviorClassifier, ProfileVisualizer
|
|
42
|
+
|
|
43
|
+
# Profile a session
|
|
44
|
+
profiler = AgentProfiler()
|
|
45
|
+
result = profiler.profile("trace.jsonl")
|
|
46
|
+
print(f"Category: {result.category} (confidence: {result.confidence:.1%})")
|
|
47
|
+
print(f"Tool distribution: {result.tool_distribution.tool_counts}")
|
|
48
|
+
|
|
49
|
+
# Classify directly
|
|
50
|
+
classifier = BehaviorClassifier()
|
|
51
|
+
category, confidence, scores = classifier.classify(
|
|
52
|
+
edit_ratio=0.25, read_ratio=0.15, bash_ratio=0.30,
|
|
53
|
+
error_rate=0.4, error_recovery_rate=0.35
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Generate visualizations
|
|
57
|
+
visualizer = ProfileVisualizer()
|
|
58
|
+
visualizer.generate_profile_chart(result, output="profile.png")
|
|
59
|
+
visualizer.generate_transition_heatmap("trace.jsonl", output="heatmap.png")
|
|
60
|
+
visualizer.generate_tool_distribution_pie("trace.jsonl", output="tools.png")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Behavior Categories
|
|
64
|
+
|
|
65
|
+
| Category | Description | Key Indicators |
|
|
66
|
+
|----------|-------------|----------------|
|
|
67
|
+
| Debugging | Active debugging sessions | High Edit+Bash, many errors, recoveries |
|
|
68
|
+
| Building | Feature development | High Write+Bash, low Read |
|
|
69
|
+
| Exploring | Code investigation | High Read+Grep, low Edit |
|
|
70
|
+
| Lost | Confused/circular behavior | Circular transitions, high Read |
|
|
71
|
+
| Verifying | Change verification | Read after Edit, test runs |
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
MIT
|
|
76
|
+
|
|
77
|
+
## Ecosystem
|
|
78
|
+
|
|
79
|
+
Part of the [FableForge](../) ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
80
|
+
|
|
81
|
+
| Project | Description |
|
|
82
|
+
| --- | --- |
|
|
83
|
+
| **[Anvil](../anvil)** | Self-verified coding agent |
|
|
84
|
+
| **[VerifyLoop](../verifyloop)** | Plan→Execute→Verify→Recover framework |
|
|
85
|
+
| **[ErrorRecovery](../error-recovery)** | Self-healing middleware (3,725 error patterns) |
|
|
86
|
+
| **[FableForge-14B](../fableforge-14b)** | The fine-tuned 14B model (4-stage training) |
|
|
87
|
+
| **[ShellWhisperer](../shell-whisperer)** | 1.5B edge agent (phone/RPi, 50ms) |
|
|
88
|
+
| **[ReasonCritic](../reason-critic)** | Verification model (130 benchmark tasks) |
|
|
89
|
+
| **[TraceCompiler](../trace-compiler)** | Compile traces → LoRA skills |
|
|
90
|
+
| **[AgentRuntime](../agent-runtime)** | Persistent agent daemon (systemd for AI) |
|
|
91
|
+
| **[AgentSwarm](../agent-swarm)** | Multi-agent from real trace transitions |
|
|
92
|
+
| **[AgentTelemetry](../agent-telemetry)** | Datadog for agents (token tracking, costs) |
|
|
93
|
+
| **[BenchAgent](../bench-agent)** | HumanEval for tool-use (107 tasks) |
|
|
94
|
+
| **[AgentDev](../agent-dev)** | VSCode extension with verification |
|
|
95
|
+
| **[TraceViz](../trace-viz)** | Trace replay visualizer (Next.js) |
|
|
96
|
+
| **[AgentSkills](../agent-skills)** | npm for agent behaviors |
|
|
97
|
+
| **[AgentCurriculum](../agent-curriculum)** | 5-stage progressive training |
|
|
98
|
+
| **[AgentFuzzer](../agent-fuzzer)** | Adversarial testing for agents |
|
|
99
|
+
| **[AgentConstitution](../agent-constitution)** | Safety guardrails from traces |
|
|
100
|
+
| **[CostOptimizer](../cost-optimizer)** | Token cost reduction (50-80%) |
|
|
101
|
+
| **[AgentProfiler](../agent-profiler)** | Behavioral fingerprinting |
|
|
102
|
+
| **[TrajectoryDistiller](../trajectory-distiller)** | Trace→training data pipeline |
|
|
103
|
+
| **[Fable5-Dataset](../fable5-dataset)** | HuggingFace dataset release |
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fableforge-agent-profiler"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Profile and classify agent behavior patterns from traces"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pydantic>=2.0",
|
|
14
|
+
"numpy>=1.24",
|
|
15
|
+
"scikit-learn>=1.3",
|
|
16
|
+
"rich>=13.0",
|
|
17
|
+
"click>=8.0",
|
|
18
|
+
"matplotlib>=3.7",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
aprof = "agent_profiler.cli:cli"
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
dev = ["pytest>=7.0", "pytest-cov", "ruff"]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
where = ["src"]
|
|
29
|
+
|
|
30
|
+
[tool.ruff]
|
|
31
|
+
line-length = 100
|
|
32
|
+
target-version = "py310"
|
|
33
|
+
|
|
34
|
+
[tool.pytest.ini_options]
|
|
35
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Agent Profiler - Profile and classify agent behavior patterns."""
|
|
2
|
+
|
|
3
|
+
from agent_profiler.profiler import AgentProfiler, ProfileResult
|
|
4
|
+
from agent_profiler.classifier import BehaviorClassifier
|
|
5
|
+
from agent_profiler.visualizer import ProfileVisualizer
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AgentProfiler",
|
|
9
|
+
"ProfileResult",
|
|
10
|
+
"BehaviorClassifier",
|
|
11
|
+
"ProfileVisualizer",
|
|
12
|
+
]
|
|
13
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Behavior classification using pretrained profiles and transition matrices."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class BehaviorProfile:
|
|
13
|
+
"""A predefined behavior profile template."""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
description: str
|
|
17
|
+
edit_weight: float = 0.0
|
|
18
|
+
read_weight: float = 0.0
|
|
19
|
+
grep_weight: float = 0.0
|
|
20
|
+
bash_weight: float = 0.0
|
|
21
|
+
write_weight: float = 0.0
|
|
22
|
+
error_rate_weight: float = 0.0
|
|
23
|
+
error_recovery_weight: float = 0.0
|
|
24
|
+
circular_weight: float = 0.0
|
|
25
|
+
entropy_weight: float = 0.0
|
|
26
|
+
min_turns: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DebuggingProfile(BehaviorProfile):
|
|
30
|
+
"""Profile for debugging sessions: high edit+bash, many errors, recoveries."""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
super().__init__(
|
|
34
|
+
name="debugging",
|
|
35
|
+
description="Active debugging with edits and error recovery loops",
|
|
36
|
+
edit_weight=0.25,
|
|
37
|
+
read_weight=0.15,
|
|
38
|
+
grep_weight=0.10,
|
|
39
|
+
bash_weight=0.30,
|
|
40
|
+
write_weight=0.05,
|
|
41
|
+
error_rate_weight=0.40,
|
|
42
|
+
error_recovery_weight=0.35,
|
|
43
|
+
circular_weight=0.20,
|
|
44
|
+
entropy_weight=0.15,
|
|
45
|
+
min_turns=5,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BuildingProfile(BehaviorProfile):
|
|
50
|
+
"""Profile for productive building: high write+bash, low read."""
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
super().__init__(
|
|
54
|
+
name="building",
|
|
55
|
+
description="Active feature development with writes and executions",
|
|
56
|
+
edit_weight=0.15,
|
|
57
|
+
read_weight=0.10,
|
|
58
|
+
grep_weight=0.05,
|
|
59
|
+
bash_weight=0.25,
|
|
60
|
+
write_weight=0.30,
|
|
61
|
+
error_rate_weight=-0.10,
|
|
62
|
+
error_recovery_weight=0.10,
|
|
63
|
+
circular_weight=0.05,
|
|
64
|
+
entropy_weight=0.20,
|
|
65
|
+
min_turns=3,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ExploringProfile(BehaviorProfile):
|
|
70
|
+
"""Profile for exploration: high read+grep, low edit."""
|
|
71
|
+
|
|
72
|
+
def __init__(self) -> None:
|
|
73
|
+
super().__init__(
|
|
74
|
+
name="exploring",
|
|
75
|
+
description="Code exploration with reads and searches, minimal edits",
|
|
76
|
+
edit_weight=0.05,
|
|
77
|
+
read_weight=0.35,
|
|
78
|
+
grep_weight=0.30,
|
|
79
|
+
bash_weight=0.10,
|
|
80
|
+
write_weight=0.02,
|
|
81
|
+
error_rate_weight=-0.20,
|
|
82
|
+
error_recovery_weight=0.05,
|
|
83
|
+
circular_weight=0.10,
|
|
84
|
+
entropy_weight=0.25,
|
|
85
|
+
min_turns=3,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class LostProfile(BehaviorProfile):
|
|
90
|
+
"""Profile for lost/confused sessions: circular transitions, high read."""
|
|
91
|
+
|
|
92
|
+
def __init__(self) -> None:
|
|
93
|
+
super().__init__(
|
|
94
|
+
name="lost",
|
|
95
|
+
description="Confused or circular behavior, reading without progress",
|
|
96
|
+
edit_weight=0.05,
|
|
97
|
+
read_weight=0.35,
|
|
98
|
+
grep_weight=0.15,
|
|
99
|
+
bash_weight=0.05,
|
|
100
|
+
write_weight=0.02,
|
|
101
|
+
error_rate_weight=0.10,
|
|
102
|
+
error_recovery_weight=-0.10,
|
|
103
|
+
circular_weight=0.40,
|
|
104
|
+
entropy_weight=0.10,
|
|
105
|
+
min_turns=5,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class VerifyingProfile(BehaviorProfile):
|
|
110
|
+
"""Profile for verification: read after edit, test runs."""
|
|
111
|
+
|
|
112
|
+
def __init__(self) -> None:
|
|
113
|
+
super().__init__(
|
|
114
|
+
name="verifying",
|
|
115
|
+
description="Verifying changes with reads after edits and test execution",
|
|
116
|
+
edit_weight=0.20,
|
|
117
|
+
read_weight=0.30,
|
|
118
|
+
grep_weight=0.10,
|
|
119
|
+
bash_weight=0.25,
|
|
120
|
+
write_weight=0.05,
|
|
121
|
+
error_rate_weight=-0.05,
|
|
122
|
+
error_recovery_weight=0.25,
|
|
123
|
+
circular_weight=0.15,
|
|
124
|
+
entropy_weight=0.15,
|
|
125
|
+
min_turns=4,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class BehaviorClassifier:
|
|
130
|
+
"""Classify agent behavior using pretrained profiles and scoring."""
|
|
131
|
+
|
|
132
|
+
def __init__(self) -> None:
|
|
133
|
+
self.profiles: dict[str, BehaviorProfile] = {
|
|
134
|
+
"debugging": DebuggingProfile(),
|
|
135
|
+
"building": BuildingProfile(),
|
|
136
|
+
"exploring": ExploringProfile(),
|
|
137
|
+
"lost": LostProfile(),
|
|
138
|
+
"verifying": VerifyingProfile(),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def compute_scores(
|
|
142
|
+
self,
|
|
143
|
+
edit_ratio: float = 0.0,
|
|
144
|
+
read_ratio: float = 0.0,
|
|
145
|
+
grep_ratio: float = 0.0,
|
|
146
|
+
bash_ratio: float = 0.0,
|
|
147
|
+
write_ratio: float = 0.0,
|
|
148
|
+
error_rate: float = 0.0,
|
|
149
|
+
error_recovery_rate: float = 0.0,
|
|
150
|
+
circular_ratio: float = 0.0,
|
|
151
|
+
entropy: float = 0.0,
|
|
152
|
+
num_turns: int = 0,
|
|
153
|
+
) -> dict[str, float]:
|
|
154
|
+
"""Compute profile scores for each behavior category.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
edit_ratio: Ratio of edit tool calls.
|
|
158
|
+
read_ratio: Ratio of read tool calls.
|
|
159
|
+
grep_ratio: Ratio of grep/search tool calls.
|
|
160
|
+
bash_ratio: Ratio of bash/shell tool calls.
|
|
161
|
+
write_ratio: Ratio of write tool calls.
|
|
162
|
+
error_rate: Rate of errors in the session.
|
|
163
|
+
error_recovery_rate: Rate of error recovery.
|
|
164
|
+
circular_ratio: Ratio of circular tool transitions.
|
|
165
|
+
entropy: Entropy of tool distribution.
|
|
166
|
+
num_turns: Number of turns in the session.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dict mapping category name to confidence score (0-1).
|
|
170
|
+
"""
|
|
171
|
+
scores: dict[str, float] = {}
|
|
172
|
+
|
|
173
|
+
features = {
|
|
174
|
+
"edit": edit_ratio,
|
|
175
|
+
"read": read_ratio,
|
|
176
|
+
"grep": grep_ratio,
|
|
177
|
+
"bash": bash_ratio,
|
|
178
|
+
"write": write_ratio,
|
|
179
|
+
"error_rate": error_rate,
|
|
180
|
+
"error_recovery": error_recovery_rate,
|
|
181
|
+
"circular": circular_ratio,
|
|
182
|
+
"entropy": entropy,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
for name, profile in self.profiles.items():
|
|
186
|
+
profile_features = {
|
|
187
|
+
"edit": profile.edit_weight,
|
|
188
|
+
"read": profile.read_weight,
|
|
189
|
+
"grep": profile.grep_weight,
|
|
190
|
+
"bash": profile.bash_weight,
|
|
191
|
+
"write": profile.write_weight,
|
|
192
|
+
"error_rate": profile.error_rate_weight,
|
|
193
|
+
"error_recovery": profile.error_recovery_weight,
|
|
194
|
+
"circular": profile.circular_weight,
|
|
195
|
+
"entropy": profile.entropy_weight,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
dot_product = sum(
|
|
199
|
+
features.get(k, 0.0) * profile_features.get(k, 0.0)
|
|
200
|
+
for k in set(list(features.keys()) + list(profile_features.keys()))
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
obs_vec = np.array([features.get(k, 0.0) for k in sorted(features.keys())])
|
|
204
|
+
prof_vec = np.array([profile_features.get(k, 0.0) for k in sorted(features.keys())])
|
|
205
|
+
|
|
206
|
+
obs_norm = np.linalg.norm(obs_vec)
|
|
207
|
+
prof_norm = np.linalg.norm(prof_vec)
|
|
208
|
+
|
|
209
|
+
if obs_norm > 0 and prof_norm > 0:
|
|
210
|
+
cosine_sim = float(np.dot(obs_vec, prof_vec) / (obs_norm * prof_norm))
|
|
211
|
+
else:
|
|
212
|
+
cosine_sim = 0.0
|
|
213
|
+
|
|
214
|
+
score = (dot_product + (cosine_sim + 1) / 2) / 2.0
|
|
215
|
+
|
|
216
|
+
if num_turns < profile.min_turns:
|
|
217
|
+
score *= 0.5
|
|
218
|
+
|
|
219
|
+
scores[name] = max(0.0, min(1.0, score))
|
|
220
|
+
|
|
221
|
+
total = sum(scores.values())
|
|
222
|
+
if total > 0:
|
|
223
|
+
scores = {k: v / total for k, v in scores.items()}
|
|
224
|
+
|
|
225
|
+
return scores
|
|
226
|
+
|
|
227
|
+
def classify(
|
|
228
|
+
self,
|
|
229
|
+
edit_ratio: float = 0.0,
|
|
230
|
+
read_ratio: float = 0.0,
|
|
231
|
+
grep_ratio: float = 0.0,
|
|
232
|
+
bash_ratio: float = 0.0,
|
|
233
|
+
write_ratio: float = 0.0,
|
|
234
|
+
error_rate: float = 0.0,
|
|
235
|
+
error_recovery_rate: float = 0.0,
|
|
236
|
+
circular_ratio: float = 0.0,
|
|
237
|
+
entropy: float = 0.0,
|
|
238
|
+
num_turns: int = 0,
|
|
239
|
+
) -> tuple[str, float, dict[str, float]]:
|
|
240
|
+
"""Classify behavior into a category.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Tuple of (category, confidence, all_scores).
|
|
244
|
+
"""
|
|
245
|
+
scores = self.compute_scores(
|
|
246
|
+
edit_ratio=edit_ratio,
|
|
247
|
+
read_ratio=read_ratio,
|
|
248
|
+
grep_ratio=grep_ratio,
|
|
249
|
+
bash_ratio=bash_ratio,
|
|
250
|
+
write_ratio=write_ratio,
|
|
251
|
+
error_rate=error_rate,
|
|
252
|
+
error_recovery_rate=error_recovery_rate,
|
|
253
|
+
circular_ratio=circular_ratio,
|
|
254
|
+
entropy=entropy,
|
|
255
|
+
num_turns=num_turns,
|
|
256
|
+
)
|
|
257
|
+
best = max(scores, key=scores.get) # type: ignore[arg-type]
|
|
258
|
+
return best, scores[best], scores
|
|
259
|
+
|
|
260
|
+
def get_profile(self, name: str) -> BehaviorProfile | None:
|
|
261
|
+
"""Get a profile by name."""
|
|
262
|
+
return self.profiles.get(name)
|
|
263
|
+
|
|
264
|
+
def list_profiles(self) -> dict[str, str]:
|
|
265
|
+
"""List all available profiles with descriptions."""
|
|
266
|
+
return {name: p.description for name, p in self.profiles.items()}
|