agent-control-evaluator-budget 7.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_evaluator_budget-7.7.0/.gitignore +90 -0
- agent_control_evaluator_budget-7.7.0/Makefile +28 -0
- agent_control_evaluator_budget-7.7.0/PKG-INFO +164 -0
- agent_control_evaluator_budget-7.7.0/README.md +148 -0
- agent_control_evaluator_budget-7.7.0/pyproject.toml +47 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/__init__.py +0 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/budget/__init__.py +24 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/budget/config.py +120 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/budget/evaluator.py +309 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/budget/memory_store.py +332 -0
- agent_control_evaluator_budget-7.7.0/src/agent_control_evaluator_budget/budget/store.py +124 -0
- agent_control_evaluator_budget-7.7.0/tests/__init__.py +0 -0
- agent_control_evaluator_budget-7.7.0/tests/budget/__init__.py +0 -0
- agent_control_evaluator_budget-7.7.0/tests/budget/test_budget.py +2105 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
23
|
+
|
|
24
|
+
# Virtual environments
|
|
25
|
+
venv/
|
|
26
|
+
env/
|
|
27
|
+
ENV/
|
|
28
|
+
.venv
|
|
29
|
+
|
|
30
|
+
# UV
|
|
31
|
+
.uv/
|
|
32
|
+
uv.lock
|
|
33
|
+
|
|
34
|
+
# IDEs
|
|
35
|
+
.vscode/
|
|
36
|
+
.idea/
|
|
37
|
+
*.swp
|
|
38
|
+
*.swo
|
|
39
|
+
*~
|
|
40
|
+
.DS_Store
|
|
41
|
+
coverage-*.xml
|
|
42
|
+
|
|
43
|
+
# Testing
|
|
44
|
+
.pytest_cache/
|
|
45
|
+
.coverage
|
|
46
|
+
coverage-*.xml
|
|
47
|
+
htmlcov/
|
|
48
|
+
.tox/
|
|
49
|
+
.mypy_cache/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Playwright
|
|
53
|
+
playwright-report/
|
|
54
|
+
playwright/.cache/
|
|
55
|
+
test-results/
|
|
56
|
+
|
|
57
|
+
# Environment variables
|
|
58
|
+
.env
|
|
59
|
+
.env.local
|
|
60
|
+
.env.*.local
|
|
61
|
+
|
|
62
|
+
# Logs
|
|
63
|
+
*.log
|
|
64
|
+
logs/
|
|
65
|
+
|
|
66
|
+
# Database
|
|
67
|
+
*.db
|
|
68
|
+
*.sqlite3
|
|
69
|
+
server/openapi.json
|
|
70
|
+
server/.generated/
|
|
71
|
+
|
|
72
|
+
# Temporary files
|
|
73
|
+
tmp/
|
|
74
|
+
temp/
|
|
75
|
+
*.tmp
|
|
76
|
+
|
|
77
|
+
# OS
|
|
78
|
+
.DS_Store
|
|
79
|
+
Thumbs.db
|
|
80
|
+
|
|
81
|
+
# Intellij
|
|
82
|
+
*.iml
|
|
83
|
+
|
|
84
|
+
## CLAUDE
|
|
85
|
+
.claude
|
|
86
|
+
|
|
87
|
+
# Local notes
|
|
88
|
+
rearch_plan.md
|
|
89
|
+
|
|
90
|
+
node_modules
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
.PHONY: help test lint lint-fix typecheck check build
|
|
2
|
+
|
|
3
|
+
help:
|
|
4
|
+
@echo "Agent Control Evaluator - Budget - Makefile commands"
|
|
5
|
+
@echo ""
|
|
6
|
+
@echo " make test - run pytest"
|
|
7
|
+
@echo " make lint - run ruff check"
|
|
8
|
+
@echo " make lint-fix - run ruff check --fix"
|
|
9
|
+
@echo " make typecheck - run mypy"
|
|
10
|
+
@echo " make check - run lint, typecheck, and test"
|
|
11
|
+
@echo " make build - build package"
|
|
12
|
+
|
|
13
|
+
test:
|
|
14
|
+
uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-budget.xml -q
|
|
15
|
+
|
|
16
|
+
lint:
|
|
17
|
+
uv run --with ruff ruff check --config ../../../pyproject.toml src/
|
|
18
|
+
|
|
19
|
+
lint-fix:
|
|
20
|
+
uv run --with ruff ruff check --config ../../../pyproject.toml --fix src/
|
|
21
|
+
|
|
22
|
+
typecheck:
|
|
23
|
+
uv run --with mypy mypy --config-file ../../../pyproject.toml src/
|
|
24
|
+
|
|
25
|
+
check: lint typecheck test
|
|
26
|
+
|
|
27
|
+
build:
|
|
28
|
+
uv build
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-control-evaluator-budget
|
|
3
|
+
Version: 7.7.0
|
|
4
|
+
Summary: Budget evaluator for agent-control -- cumulative LLM cost and token tracking
|
|
5
|
+
Author: Agent Control Team
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: agent-control-evaluators>=7.7.0
|
|
9
|
+
Requires-Dist: agent-control-models>=7.7.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: mypy>=1.8.0; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
14
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# agent-control-evaluator-budget
|
|
18
|
+
|
|
19
|
+
Budget evaluator for agent-control that tracks cumulative LLM token and cost usage per scope and time window.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install "agent-control-evaluators[budget]"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Fallback direct wheel install:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install agent-control-evaluator-budget
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
For local development:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
uv pip install -e evaluators/contrib/budget
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quickstart
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from agent_control_evaluator_budget.budget import (
|
|
43
|
+
BudgetEvaluatorConfig,
|
|
44
|
+
BudgetLimitRule,
|
|
45
|
+
ModelPricing,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
config = BudgetEvaluatorConfig(
|
|
49
|
+
budget_id="support-daily",
|
|
50
|
+
limits=[
|
|
51
|
+
BudgetLimitRule(
|
|
52
|
+
scope={"agent": "support"},
|
|
53
|
+
group_by="user_id",
|
|
54
|
+
window_seconds=86_400,
|
|
55
|
+
limit=500,
|
|
56
|
+
limit_unit="usd_cents",
|
|
57
|
+
),
|
|
58
|
+
BudgetLimitRule(
|
|
59
|
+
scope={"agent": "support"},
|
|
60
|
+
group_by="user_id",
|
|
61
|
+
window_seconds=86_400,
|
|
62
|
+
limit=50_000,
|
|
63
|
+
limit_unit="tokens",
|
|
64
|
+
),
|
|
65
|
+
],
|
|
66
|
+
pricing={
|
|
67
|
+
"gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
|
|
68
|
+
},
|
|
69
|
+
model_path="model",
|
|
70
|
+
metadata_paths={
|
|
71
|
+
"agent": "metadata.agent",
|
|
72
|
+
"user_id": "metadata.user_id",
|
|
73
|
+
},
|
|
74
|
+
unknown_model_behavior="block",
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
The evaluator reads token usage from standard fields such as `usage.input_tokens` and `usage.output_tokens`. Configure `token_path` only when your event shape uses a custom location.
|
|
79
|
+
|
|
80
|
+
## Scope and group_by
|
|
81
|
+
|
|
82
|
+
Each `BudgetLimitRule` has a static `scope` and an optional `group_by` field.
|
|
83
|
+
|
|
84
|
+
`scope` filters which events a rule applies to. A rule with `scope={"agent": "support"}` only applies when extracted metadata contains `agent="support"`. An empty scope is global.
|
|
85
|
+
|
|
86
|
+
`group_by` creates independent buckets per extracted metadata value. The common per-user pattern is:
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
BudgetLimitRule(
|
|
90
|
+
scope={"agent": "support"},
|
|
91
|
+
group_by="user_id",
|
|
92
|
+
window_seconds=86_400,
|
|
93
|
+
limit=500,
|
|
94
|
+
limit_unit="usd_cents",
|
|
95
|
+
)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
With `metadata_paths={"user_id": "metadata.user_id"}`, each user gets a separate daily budget inside the support scope.
|
|
99
|
+
|
|
100
|
+
## Budget pools
|
|
101
|
+
|
|
102
|
+
`budget_id` identifies the accumulated budget pool.
|
|
103
|
+
|
|
104
|
+
Evaluators with the same `budget_id` share accumulated spend and token totals across all evaluator instances. Each evaluator still evaluates using its own configured rules -- the shared state is the bucket (the rolling sum), not the rule set. Evaluators with different `budget_id` values are fully isolated.
|
|
105
|
+
|
|
106
|
+
Use stable names such as `support-daily`, `billing-global`, or `tenant-acme-monthly`. Avoid generating a new `budget_id` per request unless each request should have an isolated budget.
|
|
107
|
+
|
|
108
|
+
## Pricing
|
|
109
|
+
|
|
110
|
+
`ModelPricing` stores cost rates in cents per 1K tokens:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
ModelPricing(input_per_1k=0.04, output_per_1k=0.16)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
`input_per_1k` is applied to input tokens. `output_per_1k` is applied to output tokens.
|
|
117
|
+
|
|
118
|
+
Pricing and `model_path` are required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit both. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0.
|
|
119
|
+
|
|
120
|
+
## Dual Ceiling Pattern
|
|
121
|
+
|
|
122
|
+
Use two evaluators when cost and token ceilings need independent control records or different `budget_id` pools:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
cost_config = BudgetEvaluatorConfig(
|
|
126
|
+
budget_id="support-cost-daily",
|
|
127
|
+
limits=[
|
|
128
|
+
BudgetLimitRule(
|
|
129
|
+
scope={"agent": "support"},
|
|
130
|
+
group_by="user_id",
|
|
131
|
+
window_seconds=86_400,
|
|
132
|
+
limit=500,
|
|
133
|
+
limit_unit="usd_cents",
|
|
134
|
+
)
|
|
135
|
+
],
|
|
136
|
+
pricing={
|
|
137
|
+
"gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
|
|
138
|
+
},
|
|
139
|
+
model_path="model",
|
|
140
|
+
metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
token_config = BudgetEvaluatorConfig(
|
|
144
|
+
budget_id="support-token-daily",
|
|
145
|
+
limits=[
|
|
146
|
+
BudgetLimitRule(
|
|
147
|
+
scope={"agent": "support"},
|
|
148
|
+
group_by="user_id",
|
|
149
|
+
window_seconds=86_400,
|
|
150
|
+
limit=50_000,
|
|
151
|
+
limit_unit="tokens",
|
|
152
|
+
)
|
|
153
|
+
],
|
|
154
|
+
metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
This pattern lets cost and token budgets reset, alert, and roll out independently. A single evaluator can also contain both rules when one shared pool and one control result are sufficient.
|
|
159
|
+
|
|
160
|
+
## Limitations
|
|
161
|
+
|
|
162
|
+
`InMemoryBudgetStore` is single-process only. State is lost on restart and is not shared across workers or pods.
|
|
163
|
+
|
|
164
|
+
Use a distributed store for production deployments that run multiple processes, multiple workers, or multiple pods.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# agent-control-evaluator-budget
|
|
2
|
+
|
|
3
|
+
Budget evaluator for agent-control that tracks cumulative LLM token and cost usage per scope and time window.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install "agent-control-evaluators[budget]"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Fallback direct wheel install:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install agent-control-evaluator-budget
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
For local development:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv pip install -e evaluators/contrib/budget
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quickstart
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from agent_control_evaluator_budget.budget import (
|
|
27
|
+
BudgetEvaluatorConfig,
|
|
28
|
+
BudgetLimitRule,
|
|
29
|
+
ModelPricing,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
config = BudgetEvaluatorConfig(
|
|
33
|
+
budget_id="support-daily",
|
|
34
|
+
limits=[
|
|
35
|
+
BudgetLimitRule(
|
|
36
|
+
scope={"agent": "support"},
|
|
37
|
+
group_by="user_id",
|
|
38
|
+
window_seconds=86_400,
|
|
39
|
+
limit=500,
|
|
40
|
+
limit_unit="usd_cents",
|
|
41
|
+
),
|
|
42
|
+
BudgetLimitRule(
|
|
43
|
+
scope={"agent": "support"},
|
|
44
|
+
group_by="user_id",
|
|
45
|
+
window_seconds=86_400,
|
|
46
|
+
limit=50_000,
|
|
47
|
+
limit_unit="tokens",
|
|
48
|
+
),
|
|
49
|
+
],
|
|
50
|
+
pricing={
|
|
51
|
+
"gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
|
|
52
|
+
},
|
|
53
|
+
model_path="model",
|
|
54
|
+
metadata_paths={
|
|
55
|
+
"agent": "metadata.agent",
|
|
56
|
+
"user_id": "metadata.user_id",
|
|
57
|
+
},
|
|
58
|
+
unknown_model_behavior="block",
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
The evaluator reads token usage from standard fields such as `usage.input_tokens` and `usage.output_tokens`. Configure `token_path` only when your event shape uses a custom location.
|
|
63
|
+
|
|
64
|
+
## Scope and group_by
|
|
65
|
+
|
|
66
|
+
Each `BudgetLimitRule` has a static `scope` and an optional `group_by` field.
|
|
67
|
+
|
|
68
|
+
`scope` filters which events a rule applies to. A rule with `scope={"agent": "support"}` only applies when extracted metadata contains `agent="support"`. An empty scope is global.
|
|
69
|
+
|
|
70
|
+
`group_by` creates independent buckets per extracted metadata value. The common per-user pattern is:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
BudgetLimitRule(
|
|
74
|
+
scope={"agent": "support"},
|
|
75
|
+
group_by="user_id",
|
|
76
|
+
window_seconds=86_400,
|
|
77
|
+
limit=500,
|
|
78
|
+
limit_unit="usd_cents",
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
With `metadata_paths={"user_id": "metadata.user_id"}`, each user gets a separate daily budget inside the support scope.
|
|
83
|
+
|
|
84
|
+
## Budget pools
|
|
85
|
+
|
|
86
|
+
`budget_id` identifies the accumulated budget pool.
|
|
87
|
+
|
|
88
|
+
Evaluators with the same `budget_id` share accumulated spend and token totals across all evaluator instances. Each evaluator still evaluates using its own configured rules -- the shared state is the bucket (the rolling sum), not the rule set. Evaluators with different `budget_id` values are fully isolated.
|
|
89
|
+
|
|
90
|
+
Use stable names such as `support-daily`, `billing-global`, or `tenant-acme-monthly`. Avoid generating a new `budget_id` per request unless each request should have an isolated budget.
|
|
91
|
+
|
|
92
|
+
## Pricing
|
|
93
|
+
|
|
94
|
+
`ModelPricing` stores cost rates in cents per 1K tokens:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
ModelPricing(input_per_1k=0.04, output_per_1k=0.16)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
`input_per_1k` is applied to input tokens. `output_per_1k` is applied to output tokens.
|
|
101
|
+
|
|
102
|
+
Pricing and `model_path` are required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit both. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0.
|
|
103
|
+
|
|
104
|
+
## Dual Ceiling Pattern
|
|
105
|
+
|
|
106
|
+
Use two evaluators when cost and token ceilings need independent control records or different `budget_id` pools:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
cost_config = BudgetEvaluatorConfig(
|
|
110
|
+
budget_id="support-cost-daily",
|
|
111
|
+
limits=[
|
|
112
|
+
BudgetLimitRule(
|
|
113
|
+
scope={"agent": "support"},
|
|
114
|
+
group_by="user_id",
|
|
115
|
+
window_seconds=86_400,
|
|
116
|
+
limit=500,
|
|
117
|
+
limit_unit="usd_cents",
|
|
118
|
+
)
|
|
119
|
+
],
|
|
120
|
+
pricing={
|
|
121
|
+
"gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
|
|
122
|
+
},
|
|
123
|
+
model_path="model",
|
|
124
|
+
metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
token_config = BudgetEvaluatorConfig(
|
|
128
|
+
budget_id="support-token-daily",
|
|
129
|
+
limits=[
|
|
130
|
+
BudgetLimitRule(
|
|
131
|
+
scope={"agent": "support"},
|
|
132
|
+
group_by="user_id",
|
|
133
|
+
window_seconds=86_400,
|
|
134
|
+
limit=50_000,
|
|
135
|
+
limit_unit="tokens",
|
|
136
|
+
)
|
|
137
|
+
],
|
|
138
|
+
metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
This pattern lets cost and token budgets reset, alert, and roll out independently. A single evaluator can also contain both rules when one shared pool and one control result are sufficient.
|
|
143
|
+
|
|
144
|
+
## Limitations
|
|
145
|
+
|
|
146
|
+
`InMemoryBudgetStore` is single-process only. State is lost on restart and is not shared across workers or pods.
|
|
147
|
+
|
|
148
|
+
Use a distributed store for production deployments that run multiple processes, multiple workers, or multiple pods.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "agent-control-evaluator-budget"
|
|
3
|
+
version = "7.7.0"
|
|
4
|
+
description = "Budget evaluator for agent-control -- cumulative LLM cost and token tracking"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = { text = "Apache-2.0" }
|
|
8
|
+
authors = [{ name = "Agent Control Team" }]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"agent-control-evaluators>=7.7.0",
|
|
11
|
+
"agent-control-models>=7.7.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
dev = [
|
|
16
|
+
"pytest>=8.0.0",
|
|
17
|
+
"pytest-asyncio>=0.23.0",
|
|
18
|
+
"ruff>=0.1.0",
|
|
19
|
+
"mypy>=1.8.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.entry-points."agent_control.evaluators"]
|
|
23
|
+
budget = "agent_control_evaluator_budget.budget:BudgetEvaluator"
|
|
24
|
+
|
|
25
|
+
[build-system]
|
|
26
|
+
requires = ["hatchling"]
|
|
27
|
+
build-backend = "hatchling.build"
|
|
28
|
+
|
|
29
|
+
[tool.hatch.build.targets.wheel]
|
|
30
|
+
packages = ["src/agent_control_evaluator_budget"]
|
|
31
|
+
|
|
32
|
+
[tool.ruff]
|
|
33
|
+
line-length = 100
|
|
34
|
+
target-version = "py312"
|
|
35
|
+
|
|
36
|
+
[tool.ruff.lint]
|
|
37
|
+
select = ["E", "F", "I"]
|
|
38
|
+
|
|
39
|
+
[tool.uv.sources]
|
|
40
|
+
agent-control-evaluators = { path = "../../builtin", editable = true }
|
|
41
|
+
agent-control-models = { path = "../../../models", editable = true }
|
|
42
|
+
|
|
43
|
+
[dependency-groups]
|
|
44
|
+
dev = [
|
|
45
|
+
"pytest>=9.0.2",
|
|
46
|
+
"pytest-asyncio>=1.3.0",
|
|
47
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Budget evaluator for per-agent LLM cost and token tracking."""
|
|
2
|
+
|
|
3
|
+
from agent_control_evaluator_budget.budget.config import (
|
|
4
|
+
BudgetEvaluatorConfig,
|
|
5
|
+
BudgetLimitRule,
|
|
6
|
+
ModelPricing,
|
|
7
|
+
)
|
|
8
|
+
from agent_control_evaluator_budget.budget.evaluator import BudgetEvaluator
|
|
9
|
+
from agent_control_evaluator_budget.budget.memory_store import InMemoryBudgetStore
|
|
10
|
+
from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore
|
|
11
|
+
|
|
12
|
+
# Note: clear_budget_stores is a testing utility and is intentionally not
|
|
13
|
+
# re-exported here. Import it directly from the evaluator submodule in tests:
|
|
14
|
+
# from agent_control_evaluator_budget.budget.evaluator import clear_budget_stores
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"BudgetEvaluator",
|
|
18
|
+
"BudgetEvaluatorConfig",
|
|
19
|
+
"BudgetLimitRule",
|
|
20
|
+
"BudgetSnapshot",
|
|
21
|
+
"BudgetStore",
|
|
22
|
+
"InMemoryBudgetStore",
|
|
23
|
+
"ModelPricing",
|
|
24
|
+
]
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Configuration for the budget evaluator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from agent_control_evaluators._base import EvaluatorConfig
|
|
8
|
+
from pydantic import Field, field_validator, model_validator
|
|
9
|
+
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
# Window convenience constants (seconds)
|
|
12
|
+
# ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
WINDOW_HOURLY = 3600
|
|
15
|
+
WINDOW_DAILY = 86400
|
|
16
|
+
WINDOW_WEEKLY = 604800
|
|
17
|
+
WINDOW_MONTHLY = 2592000 # 30 days
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ModelPricing(EvaluatorConfig):
|
|
21
|
+
"""Per-model token pricing in cents per 1K tokens."""
|
|
22
|
+
|
|
23
|
+
input_per_1k: float = 0.0
|
|
24
|
+
output_per_1k: float = 0.0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BudgetLimitRule(EvaluatorConfig):
|
|
28
|
+
"""A single budget limit rule.
|
|
29
|
+
|
|
30
|
+
Each rule defines a ceiling for a combination of scope dimensions
|
|
31
|
+
and time window. Multiple rules can apply to the same step -- the
|
|
32
|
+
evaluator checks all of them and triggers on the first breach.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
scope: Static scope dimensions that must match for this rule
|
|
36
|
+
to apply. Empty dict = global rule.
|
|
37
|
+
Examples:
|
|
38
|
+
{"agent": "summarizer"} -- per-agent limit
|
|
39
|
+
{"agent": "summarizer", "channel": "slack"} -- agent+channel limit
|
|
40
|
+
group_by: If set, the limit is applied independently for each
|
|
41
|
+
unique value of this dimension. e.g. group_by="user_id" means
|
|
42
|
+
each user gets their own budget. None = shared/global limit.
|
|
43
|
+
window_seconds: Time window for accumulation in seconds.
|
|
44
|
+
None = cumulative (no reset). See WINDOW_* constants.
|
|
45
|
+
limit: Maximum usage in the window. Interpreted by limit_unit.
|
|
46
|
+
limit_unit: Unit for limit. usd_cents checks spend; tokens checks
|
|
47
|
+
input + output tokens.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
scope: dict[str, str] = Field(default_factory=dict)
|
|
51
|
+
group_by: str | None = None
|
|
52
|
+
window_seconds: int | None = None
|
|
53
|
+
limit: int
|
|
54
|
+
limit_unit: Literal["usd_cents", "tokens"] = "usd_cents"
|
|
55
|
+
|
|
56
|
+
@field_validator("limit")
|
|
57
|
+
@classmethod
|
|
58
|
+
def validate_limit(cls, v: int) -> int:
|
|
59
|
+
if v <= 0:
|
|
60
|
+
raise ValueError("limit must be a positive integer")
|
|
61
|
+
return v
|
|
62
|
+
|
|
63
|
+
@field_validator("window_seconds")
|
|
64
|
+
@classmethod
|
|
65
|
+
def validate_window_seconds(cls, v: int | None) -> int | None:
|
|
66
|
+
if v is not None and v <= 0:
|
|
67
|
+
raise ValueError("window_seconds must be positive")
|
|
68
|
+
return v
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class BudgetEvaluatorConfig(EvaluatorConfig):
|
|
72
|
+
"""Configuration for the budget evaluator.
|
|
73
|
+
|
|
74
|
+
Attributes:
|
|
75
|
+
limits: List of budget limit rules. Each is checked independently.
|
|
76
|
+
budget_id: Unique budget pool identifier. Same budget_id shares
|
|
77
|
+
accumulated spend. Different budget_id is fully isolated.
|
|
78
|
+
unknown_model_behavior: What to do when a model is not found in the
|
|
79
|
+
pricing table and a cost-based rule exists. block=fail closed,
|
|
80
|
+
warn=log warning and treat cost as 0.
|
|
81
|
+
pricing: Optional model pricing table. Maps model name to ModelPricing.
|
|
82
|
+
Required when any rule uses limit_unit="usd_cents". Used to
|
|
83
|
+
derive cost in USD from token counts and model name.
|
|
84
|
+
token_path: Dot-notation path to extract token usage from step
|
|
85
|
+
data (e.g. "usage.total_tokens"). If None, looks for standard
|
|
86
|
+
fields (input_tokens, output_tokens, total_tokens, usage).
|
|
87
|
+
model_path: Dot-notation path to extract model name (for pricing lookup).
|
|
88
|
+
Required when any rule uses limit_unit="usd_cents".
|
|
89
|
+
metadata_paths: Mapping of metadata field name to dot-notation path
|
|
90
|
+
in step data. Used to extract scope dimensions (channel, user_id, etc).
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
limits: list[BudgetLimitRule] = Field(min_length=1)
|
|
94
|
+
budget_id: str = Field(
|
|
95
|
+
default="default",
|
|
96
|
+
description=(
|
|
97
|
+
"Unique budget pool identifier. Same budget_id shares accumulated spend. "
|
|
98
|
+
"Different budget_id is fully isolated."
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
unknown_model_behavior: Literal["block", "warn"] = Field(
|
|
102
|
+
default="block",
|
|
103
|
+
description=(
|
|
104
|
+
"What to do when a model is not found in the pricing table and a cost-based "
|
|
105
|
+
"rule exists. block=fail closed, warn=log warning and treat cost as 0."
|
|
106
|
+
),
|
|
107
|
+
)
|
|
108
|
+
pricing: dict[str, ModelPricing] | None = None
|
|
109
|
+
token_path: str | None = None
|
|
110
|
+
model_path: str | None = None
|
|
111
|
+
metadata_paths: dict[str, str] = Field(default_factory=dict)
|
|
112
|
+
|
|
113
|
+
@model_validator(mode="after")
|
|
114
|
+
def require_pricing_for_cost_rules(self) -> BudgetEvaluatorConfig:
|
|
115
|
+
has_cost_rule = any(rule.limit_unit == "usd_cents" for rule in self.limits)
|
|
116
|
+
if has_cost_rule and self.pricing is None:
|
|
117
|
+
raise ValueError('pricing is required when any rule uses limit_unit="usd_cents"')
|
|
118
|
+
if has_cost_rule and not (self.model_path or "").strip():
|
|
119
|
+
raise ValueError('model_path is required when any rule uses limit_unit="usd_cents"')
|
|
120
|
+
return self
|