agent-control-evaluators 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_evaluators-5.0.0/.gitignore +86 -0
- agent_control_evaluators-5.0.0/Makefile +33 -0
- agent_control_evaluators-5.0.0/PKG-INFO +70 -0
- agent_control_evaluators-5.0.0/README.md +51 -0
- agent_control_evaluators-5.0.0/pyproject.toml +37 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/__init__.py +76 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/_base.py +178 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/_discovery.py +109 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/_factory.py +105 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/_registry.py +87 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/json/__init__.py +6 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/json/config.py +237 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/json/evaluator.py +502 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/list/__init__.py +6 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/list/config.py +26 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/list/evaluator.py +132 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/py.typed +0 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/regex/__init__.py +6 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/regex/config.py +23 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/regex/evaluator.py +73 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/sql/__init__.py +6 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/sql/config.py +187 -0
- agent_control_evaluators-5.0.0/src/agent_control_evaluators/sql/evaluator.py +1251 -0
- agent_control_evaluators-5.0.0/tests/__init__.py +1 -0
- agent_control_evaluators-5.0.0/tests/json/__init__.py +1 -0
- agent_control_evaluators-5.0.0/tests/json/test_json.py +781 -0
- agent_control_evaluators-5.0.0/tests/sql/__init__.py +1 -0
- agent_control_evaluators-5.0.0/tests/sql/test_sql.py +2527 -0
- agent_control_evaluators-5.0.0/tests/test_base.py +140 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
23
|
+
|
|
24
|
+
# Virtual environments
|
|
25
|
+
venv/
|
|
26
|
+
env/
|
|
27
|
+
ENV/
|
|
28
|
+
.venv
|
|
29
|
+
|
|
30
|
+
# UV
|
|
31
|
+
.uv/
|
|
32
|
+
uv.lock
|
|
33
|
+
|
|
34
|
+
# IDEs
|
|
35
|
+
.vscode/
|
|
36
|
+
.idea/
|
|
37
|
+
*.swp
|
|
38
|
+
*.swo
|
|
39
|
+
*~
|
|
40
|
+
.DS_Store
|
|
41
|
+
coverage-*.xml
|
|
42
|
+
|
|
43
|
+
# Testing
|
|
44
|
+
.pytest_cache/
|
|
45
|
+
.coverage
|
|
46
|
+
coverage-*.xml
|
|
47
|
+
htmlcov/
|
|
48
|
+
.tox/
|
|
49
|
+
.mypy_cache/
|
|
50
|
+
.ruff_cache/
|
|
51
|
+
|
|
52
|
+
# Playwright
|
|
53
|
+
playwright-report/
|
|
54
|
+
playwright/.cache/
|
|
55
|
+
test-results/
|
|
56
|
+
|
|
57
|
+
# Environment variables
|
|
58
|
+
.env
|
|
59
|
+
.env.local
|
|
60
|
+
.env.*.local
|
|
61
|
+
|
|
62
|
+
# Logs
|
|
63
|
+
*.log
|
|
64
|
+
logs/
|
|
65
|
+
|
|
66
|
+
# Database
|
|
67
|
+
*.db
|
|
68
|
+
*.sqlite3
|
|
69
|
+
|
|
70
|
+
# Temporary files
|
|
71
|
+
tmp/
|
|
72
|
+
temp/
|
|
73
|
+
*.tmp
|
|
74
|
+
|
|
75
|
+
# OS
|
|
76
|
+
.DS_Store
|
|
77
|
+
Thumbs.db
|
|
78
|
+
|
|
79
|
+
# Intellij
|
|
80
|
+
*.iml
|
|
81
|
+
|
|
82
|
+
## CLAUDE
|
|
83
|
+
.claude
|
|
84
|
+
|
|
85
|
+
# Local notes
|
|
86
|
+
rearch_plan.md
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
.PHONY: help sync test lint lint-fix typecheck build publish
|
|
2
|
+
|
|
3
|
+
PACKAGE := agent-control-evaluators
|
|
4
|
+
|
|
5
|
+
help:
|
|
6
|
+
@echo "Agent Control Evaluators - Makefile commands"
|
|
7
|
+
@echo ""
|
|
8
|
+
@echo " make test - run pytest"
|
|
9
|
+
@echo " make lint - run ruff check"
|
|
10
|
+
@echo " make lint-fix - run ruff check --fix"
|
|
11
|
+
@echo " make typecheck - run mypy"
|
|
12
|
+
@echo " make build - build package"
|
|
13
|
+
|
|
14
|
+
sync:
|
|
15
|
+
uv sync
|
|
16
|
+
|
|
17
|
+
test:
|
|
18
|
+
uv run pytest --cov=src --cov-report=xml:../../coverage-evaluators.xml -q
|
|
19
|
+
|
|
20
|
+
lint:
|
|
21
|
+
uv run ruff check --config ../../pyproject.toml src/
|
|
22
|
+
|
|
23
|
+
lint-fix:
|
|
24
|
+
uv run ruff check --config ../../pyproject.toml --fix src/
|
|
25
|
+
|
|
26
|
+
typecheck:
|
|
27
|
+
uv run mypy --config-file ../../pyproject.toml src/
|
|
28
|
+
|
|
29
|
+
build:
|
|
30
|
+
uv build
|
|
31
|
+
|
|
32
|
+
publish:
|
|
33
|
+
uv publish
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-control-evaluators
|
|
3
|
+
Version: 5.0.0
|
|
4
|
+
Summary: Builtin evaluators for agent-control
|
|
5
|
+
Author: Agent Control Team
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: agent-control-models
|
|
9
|
+
Requires-Dist: google-re2>=1.1
|
|
10
|
+
Requires-Dist: jsonschema>=4.0.0
|
|
11
|
+
Requires-Dist: pydantic>=2.12.4
|
|
12
|
+
Requires-Dist: sqlglot[rs]>=20.0.0
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
16
|
+
Provides-Extra: galileo
|
|
17
|
+
Requires-Dist: agent-control-evaluator-galileo>=3.0.0; extra == 'galileo'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Agent Control Evaluators
|
|
21
|
+
|
|
22
|
+
Built-in evaluators for agent-control.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install agent-control-evaluators
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Available Evaluators
|
|
31
|
+
|
|
32
|
+
| Name | Description |
|
|
33
|
+
|------|-------------|
|
|
34
|
+
| `regex` | Regular expression pattern matching |
|
|
35
|
+
| `list` | List-based value matching (allow/deny) |
|
|
36
|
+
| `json` | JSON validation (schema, required fields, types) |
|
|
37
|
+
| `sql` | SQL query validation |
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
Evaluators are automatically discovered via Python entry points:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from agent_control_evaluators import discover_evaluators, list_evaluators
|
|
45
|
+
|
|
46
|
+
# Load all available evaluators
|
|
47
|
+
discover_evaluators()
|
|
48
|
+
|
|
49
|
+
# See what's available
|
|
50
|
+
print(list_evaluators())
|
|
51
|
+
# {'regex': <class 'RegexEvaluator'>, 'list': ..., 'json': ..., 'sql': ...}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## External Evaluators
|
|
55
|
+
|
|
56
|
+
Additional evaluators are available via separate packages:
|
|
57
|
+
|
|
58
|
+
- `agent-control-evaluator-galileo` - Galileo Luna2 evaluator
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Direct install
|
|
62
|
+
pip install agent-control-evaluator-galileo
|
|
63
|
+
|
|
64
|
+
# Or via convenience extra
|
|
65
|
+
pip install agent-control-evaluators[galileo]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Creating Custom Evaluators
|
|
69
|
+
|
|
70
|
+
See [AGENTS.md](../../AGENTS.md) for guidance on creating new evaluators.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Agent Control Evaluators
|
|
2
|
+
|
|
3
|
+
Built-in evaluators for agent-control.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install agent-control-evaluators
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Available Evaluators
|
|
12
|
+
|
|
13
|
+
| Name | Description |
|
|
14
|
+
|------|-------------|
|
|
15
|
+
| `regex` | Regular expression pattern matching |
|
|
16
|
+
| `list` | List-based value matching (allow/deny) |
|
|
17
|
+
| `json` | JSON validation (schema, required fields, types) |
|
|
18
|
+
| `sql` | SQL query validation |
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
Evaluators are automatically discovered via Python entry points:
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from agent_control_evaluators import discover_evaluators, list_evaluators
|
|
26
|
+
|
|
27
|
+
# Load all available evaluators
|
|
28
|
+
discover_evaluators()
|
|
29
|
+
|
|
30
|
+
# See what's available
|
|
31
|
+
print(list_evaluators())
|
|
32
|
+
# {'regex': <class 'RegexEvaluator'>, 'list': ..., 'json': ..., 'sql': ...}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## External Evaluators
|
|
36
|
+
|
|
37
|
+
Additional evaluators are available via separate packages:
|
|
38
|
+
|
|
39
|
+
- `agent-control-evaluator-galileo` - Galileo Luna2 evaluator
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Direct install
|
|
43
|
+
pip install agent-control-evaluator-galileo
|
|
44
|
+
|
|
45
|
+
# Or via convenience extra
|
|
46
|
+
pip install agent-control-evaluators[galileo]
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Creating Custom Evaluators
|
|
50
|
+
|
|
51
|
+
See [AGENTS.md](../../AGENTS.md) for guidance on creating new evaluators.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "agent-control-evaluators"
|
|
3
|
+
version = "5.0.0"
|
|
4
|
+
description = "Builtin evaluators for agent-control"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = { text = "Apache-2.0" }
|
|
8
|
+
authors = [{ name = "Agent Control Team" }]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"agent-control-models",
|
|
11
|
+
"pydantic>=2.12.4",
|
|
12
|
+
"google-re2>=1.1",
|
|
13
|
+
"jsonschema>=4.0.0",
|
|
14
|
+
"sqlglot[rs]>=20.0.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
galileo = ["agent-control-evaluator-galileo>=3.0.0"]
|
|
19
|
+
dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0"]
|
|
20
|
+
|
|
21
|
+
[project.entry-points."agent_control.evaluators"]
|
|
22
|
+
regex = "agent_control_evaluators.regex:RegexEvaluator"
|
|
23
|
+
list = "agent_control_evaluators.list:ListEvaluator"
|
|
24
|
+
json = "agent_control_evaluators.json:JSONEvaluator"
|
|
25
|
+
sql = "agent_control_evaluators.sql:SQLEvaluator"
|
|
26
|
+
|
|
27
|
+
[build-system]
|
|
28
|
+
requires = ["hatchling"]
|
|
29
|
+
build-backend = "hatchling.build"
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.wheel]
|
|
32
|
+
packages = ["src/agent_control_evaluators"]
|
|
33
|
+
|
|
34
|
+
[tool.uv.sources]
|
|
35
|
+
agent-control-models = { workspace = true }
|
|
36
|
+
# For local dev: use local galileo package instead of PyPI
|
|
37
|
+
agent-control-evaluator-galileo = { path = "../extra/galileo", editable = true }
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Agent Control Evaluators.
|
|
2
|
+
|
|
3
|
+
This package contains builtin evaluator implementations for agent-control.
|
|
4
|
+
Built-in evaluators (regex, list, json, sql) are registered automatically on import.
|
|
5
|
+
|
|
6
|
+
Available evaluators:
|
|
7
|
+
Built-in (no namespace):
|
|
8
|
+
- regex: Regular expression matching
|
|
9
|
+
- list: List-based value matching
|
|
10
|
+
- json: JSON validation
|
|
11
|
+
- sql: SQL query validation
|
|
12
|
+
|
|
13
|
+
Naming convention:
|
|
14
|
+
- Built-in: "regex", "list", "json", "sql"
|
|
15
|
+
- External: "provider.name" (e.g., "galileo.luna2")
|
|
16
|
+
- Agent-scoped: "agent:name" (custom code deployed with agent)
|
|
17
|
+
|
|
18
|
+
External evaluators are installed via separate packages (e.g., agent-control-evaluator-galileo).
|
|
19
|
+
Custom evaluators are Evaluator classes deployed with the engine.
|
|
20
|
+
Their schemas are registered via initAgent for validation purposes.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
__version__ = version("agent-control-evaluators")
|
|
27
|
+
except PackageNotFoundError:
|
|
28
|
+
__version__ = "0.0.0.dev"
|
|
29
|
+
|
|
30
|
+
# Core infrastructure - export from _base and _registry
|
|
31
|
+
from agent_control_evaluators._base import Evaluator, EvaluatorConfig, EvaluatorMetadata
|
|
32
|
+
from agent_control_evaluators._discovery import (
|
|
33
|
+
discover_evaluators,
|
|
34
|
+
ensure_evaluators_discovered,
|
|
35
|
+
list_evaluators,
|
|
36
|
+
reset_evaluator_discovery,
|
|
37
|
+
)
|
|
38
|
+
from agent_control_evaluators._factory import clear_evaluator_cache, get_evaluator_instance
|
|
39
|
+
from agent_control_evaluators._registry import (
|
|
40
|
+
clear_evaluators,
|
|
41
|
+
get_all_evaluators,
|
|
42
|
+
get_evaluator,
|
|
43
|
+
register_evaluator,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Import built-in evaluators to auto-register them
|
|
47
|
+
from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig
|
|
48
|
+
from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig
|
|
49
|
+
from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig
|
|
50
|
+
from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# Core infrastructure
|
|
54
|
+
"Evaluator",
|
|
55
|
+
"EvaluatorConfig",
|
|
56
|
+
"EvaluatorMetadata",
|
|
57
|
+
"register_evaluator",
|
|
58
|
+
"get_evaluator",
|
|
59
|
+
"get_all_evaluators",
|
|
60
|
+
"clear_evaluators",
|
|
61
|
+
"discover_evaluators",
|
|
62
|
+
"ensure_evaluators_discovered",
|
|
63
|
+
"reset_evaluator_discovery",
|
|
64
|
+
"list_evaluators",
|
|
65
|
+
"get_evaluator_instance",
|
|
66
|
+
"clear_evaluator_cache",
|
|
67
|
+
# Built-in evaluators
|
|
68
|
+
"RegexEvaluator",
|
|
69
|
+
"RegexEvaluatorConfig",
|
|
70
|
+
"ListEvaluator",
|
|
71
|
+
"ListEvaluatorConfig",
|
|
72
|
+
"JSONEvaluator",
|
|
73
|
+
"JSONEvaluatorConfig",
|
|
74
|
+
"SQLEvaluator",
|
|
75
|
+
"SQLEvaluatorConfig",
|
|
76
|
+
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Evaluator base classes and metadata."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar
|
|
9
|
+
|
|
10
|
+
from agent_control_models import EvaluatorResult
|
|
11
|
+
from agent_control_models.base import BaseModel
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from typing import Self
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EvaluatorConfig(BaseModel):
|
|
20
|
+
"""Base class for typed evaluator configurations.
|
|
21
|
+
|
|
22
|
+
Extends the project's BaseModel to ensure consistent behavior
|
|
23
|
+
and enable type checking across all evaluator configs.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
from agent_control_evaluators import EvaluatorConfig
|
|
28
|
+
|
|
29
|
+
class MyEvaluatorConfig(EvaluatorConfig):
|
|
30
|
+
pattern: str
|
|
31
|
+
threshold: float = 0.5
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
ConfigT = TypeVar("ConfigT", bound=EvaluatorConfig)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class EvaluatorMetadata:
|
|
43
|
+
"""Metadata about an evaluator.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
name: Unique evaluator name (e.g., "regex", "galileo.luna2")
|
|
47
|
+
version: Evaluator version string
|
|
48
|
+
description: Human-readable description
|
|
49
|
+
requires_api_key: Whether the evaluator requires an API key
|
|
50
|
+
timeout_ms: Default timeout in milliseconds
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
name: str
|
|
54
|
+
version: str
|
|
55
|
+
description: str
|
|
56
|
+
requires_api_key: bool = False
|
|
57
|
+
timeout_ms: int = 10000
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Evaluator(ABC, Generic[ConfigT]): # noqa: UP046 - need Python 3.10 compat
|
|
61
|
+
"""Base class for all evaluators (built-in, external, or custom).
|
|
62
|
+
|
|
63
|
+
All evaluators follow the same pattern:
|
|
64
|
+
1. Define metadata and config_model as class variables
|
|
65
|
+
2. Implement evaluate() method
|
|
66
|
+
3. Register with @register_evaluator decorator
|
|
67
|
+
|
|
68
|
+
IMPORTANT - Instance Caching & Thread Safety:
|
|
69
|
+
Evaluator instances are cached and reused across multiple evaluate() calls
|
|
70
|
+
when they have the same configuration. This means:
|
|
71
|
+
|
|
72
|
+
- DO NOT store mutable request-scoped state on `self`
|
|
73
|
+
- The evaluate() method may be called concurrently from multiple requests
|
|
74
|
+
- Any state stored in __init__ should be immutable or thread-safe
|
|
75
|
+
- Use local variables within evaluate() for request-specific state
|
|
76
|
+
|
|
77
|
+
Good pattern:
|
|
78
|
+
def __init__(self, config):
|
|
79
|
+
super().__init__(config)
|
|
80
|
+
self._compiled_regex = re.compile(config.pattern) # OK: immutable
|
|
81
|
+
|
|
82
|
+
async def evaluate(self, data):
|
|
83
|
+
result = self._compiled_regex.search(data) # OK: uses immutable state
|
|
84
|
+
return EvaluatorResult(matched=result is not None, ...)
|
|
85
|
+
|
|
86
|
+
Bad pattern:
|
|
87
|
+
def __init__(self, config):
|
|
88
|
+
super().__init__(config)
|
|
89
|
+
self.call_count = 0 # BAD: mutable state shared across requests
|
|
90
|
+
|
|
91
|
+
async def evaluate(self, data):
|
|
92
|
+
self.call_count += 1 # BAD: race condition, leaks between requests
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
```python
|
|
96
|
+
from agent_control_evaluators import (
|
|
97
|
+
Evaluator,
|
|
98
|
+
EvaluatorConfig,
|
|
99
|
+
EvaluatorMetadata,
|
|
100
|
+
register_evaluator,
|
|
101
|
+
)
|
|
102
|
+
from agent_control_models import EvaluatorResult
|
|
103
|
+
|
|
104
|
+
class MyConfig(EvaluatorConfig):
|
|
105
|
+
threshold: float = 0.5
|
|
106
|
+
|
|
107
|
+
@register_evaluator
|
|
108
|
+
class MyEvaluator(Evaluator[MyConfig]):
|
|
109
|
+
metadata = EvaluatorMetadata(
|
|
110
|
+
name="my-evaluator",
|
|
111
|
+
version="1.0.0",
|
|
112
|
+
description="My custom evaluator",
|
|
113
|
+
)
|
|
114
|
+
config_model = MyConfig
|
|
115
|
+
|
|
116
|
+
async def evaluate(self, data: Any) -> EvaluatorResult:
|
|
117
|
+
return EvaluatorResult(
|
|
118
|
+
matched=len(str(data)) > self.config.threshold,
|
|
119
|
+
confidence=1.0,
|
|
120
|
+
message="Evaluation complete"
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
metadata: ClassVar[EvaluatorMetadata]
|
|
126
|
+
config_model: ClassVar[type[EvaluatorConfig]]
|
|
127
|
+
|
|
128
|
+
def __init__(self, config: ConfigT) -> None:
|
|
129
|
+
"""Initialize evaluator with validated config.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
config: Validated configuration (instance of config_model)
|
|
133
|
+
"""
|
|
134
|
+
self.config: ConfigT = config
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_dict(cls, config_dict: dict[str, Any]) -> Self:
|
|
138
|
+
"""Create evaluator instance from raw config dict.
|
|
139
|
+
|
|
140
|
+
Validates config against config_model before creating instance.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
config_dict: Raw configuration dictionary
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Evaluator instance with validated config
|
|
147
|
+
"""
|
|
148
|
+
validated = cls.config_model(**config_dict)
|
|
149
|
+
return cls(validated) # type: ignore[arg-type]
|
|
150
|
+
|
|
151
|
+
@abstractmethod
|
|
152
|
+
async def evaluate(self, data: Any) -> EvaluatorResult:
|
|
153
|
+
"""Evaluate data and return result.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
data: Data extracted by selector from the payload
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
EvaluatorResult with matched status, confidence, and message
|
|
160
|
+
"""
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
def get_timeout_seconds(self) -> float:
|
|
164
|
+
"""Get timeout in seconds from config or metadata default."""
|
|
165
|
+
timeout_ms: int = getattr(self.config, "timeout_ms", self.metadata.timeout_ms)
|
|
166
|
+
return float(timeout_ms) / 1000.0
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def is_available(cls) -> bool:
|
|
170
|
+
"""Check if evaluator dependencies are satisfied.
|
|
171
|
+
|
|
172
|
+
Override this method for evaluators with optional dependencies.
|
|
173
|
+
Return False to skip registration during discovery.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
True if evaluator can be used, False otherwise
|
|
177
|
+
"""
|
|
178
|
+
return True
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Evaluator discovery via entry points."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
from importlib.metadata import entry_points
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from agent_control_evaluators._registry import (
|
|
11
|
+
get_all_evaluators,
|
|
12
|
+
get_evaluator,
|
|
13
|
+
register_evaluator,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from agent_control_evaluators._base import Evaluator
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
_DISCOVERY_COMPLETE = False
|
|
22
|
+
_DISCOVERY_LOCK = threading.Lock()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def discover_evaluators() -> int:
|
|
26
|
+
"""Discover and register evaluators via entry points.
|
|
27
|
+
|
|
28
|
+
All evaluators (built-in and third-party) are discovered via the
|
|
29
|
+
'agent_control.evaluators' entry point group. Evaluators are only registered
|
|
30
|
+
if their `is_available()` method returns True.
|
|
31
|
+
|
|
32
|
+
Safe to call multiple times - only runs discovery once.
|
|
33
|
+
Thread-safe via lock.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Number of evaluators discovered
|
|
37
|
+
"""
|
|
38
|
+
global _DISCOVERY_COMPLETE
|
|
39
|
+
|
|
40
|
+
# Fast path without lock
|
|
41
|
+
if _DISCOVERY_COMPLETE:
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
with _DISCOVERY_LOCK:
|
|
45
|
+
# Double-check after acquiring lock
|
|
46
|
+
if _DISCOVERY_COMPLETE:
|
|
47
|
+
return 0
|
|
48
|
+
|
|
49
|
+
discovered = 0
|
|
50
|
+
|
|
51
|
+
# Discover ALL evaluators (built-in and third-party) via entry points.
|
|
52
|
+
# Only register evaluators where is_available() returns True.
|
|
53
|
+
try:
|
|
54
|
+
eps = entry_points(group="agent_control.evaluators")
|
|
55
|
+
for ep in eps:
|
|
56
|
+
try:
|
|
57
|
+
evaluator_class = ep.load()
|
|
58
|
+
name = evaluator_class.metadata.name
|
|
59
|
+
|
|
60
|
+
# Skip if already registered
|
|
61
|
+
if get_evaluator(name) is not None:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Check if evaluator dependencies are satisfied
|
|
65
|
+
if not evaluator_class.is_available():
|
|
66
|
+
logger.debug(f"Evaluator '{name}' not available, skipping")
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
register_evaluator(evaluator_class)
|
|
70
|
+
logger.debug(f"Registered evaluator: {name}")
|
|
71
|
+
discovered += 1
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.warning(f"Failed to load evaluator '{ep.name}': {e}")
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.debug(f"Entry point discovery not available: {e}")
|
|
76
|
+
|
|
77
|
+
_DISCOVERY_COMPLETE = True
|
|
78
|
+
logger.debug(f"Evaluator discovery complete: {discovered} new evaluators")
|
|
79
|
+
return discovered
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def ensure_evaluators_discovered() -> None:
|
|
83
|
+
"""Ensure evaluator discovery has run. Call this before using evaluators."""
|
|
84
|
+
if not _DISCOVERY_COMPLETE:
|
|
85
|
+
discover_evaluators()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def reset_evaluator_discovery() -> None:
|
|
89
|
+
"""Reset discovery state. Useful for testing."""
|
|
90
|
+
global _DISCOVERY_COMPLETE
|
|
91
|
+
with _DISCOVERY_LOCK:
|
|
92
|
+
_DISCOVERY_COMPLETE = False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# =============================================================================
|
|
96
|
+
# Public evaluator API
|
|
97
|
+
# =============================================================================
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def list_evaluators() -> dict[str, type[Evaluator[Any]]]:
|
|
101
|
+
"""List all registered evaluators.
|
|
102
|
+
|
|
103
|
+
This function ensures evaluator discovery has run before returning results.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Dictionary mapping evaluator names to evaluator classes
|
|
107
|
+
"""
|
|
108
|
+
ensure_evaluators_discovered()
|
|
109
|
+
return get_all_evaluators()
|