agent-control-evaluators 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_evaluators/__init__.py +76 -0
- agent_control_evaluators/_base.py +178 -0
- agent_control_evaluators/_discovery.py +109 -0
- agent_control_evaluators/_factory.py +105 -0
- agent_control_evaluators/_registry.py +87 -0
- agent_control_evaluators/json/__init__.py +6 -0
- agent_control_evaluators/json/config.py +237 -0
- agent_control_evaluators/json/evaluator.py +502 -0
- agent_control_evaluators/list/__init__.py +6 -0
- agent_control_evaluators/list/config.py +26 -0
- agent_control_evaluators/list/evaluator.py +132 -0
- agent_control_evaluators/py.typed +0 -0
- agent_control_evaluators/regex/__init__.py +6 -0
- agent_control_evaluators/regex/config.py +23 -0
- agent_control_evaluators/regex/evaluator.py +73 -0
- agent_control_evaluators/sql/__init__.py +6 -0
- agent_control_evaluators/sql/config.py +187 -0
- agent_control_evaluators/sql/evaluator.py +1251 -0
- agent_control_evaluators-5.0.0.dist-info/METADATA +70 -0
- agent_control_evaluators-5.0.0.dist-info/RECORD +22 -0
- agent_control_evaluators-5.0.0.dist-info/WHEEL +4 -0
- agent_control_evaluators-5.0.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Agent Control Evaluators.
|
|
2
|
+
|
|
3
|
+
This package contains builtin evaluator implementations for agent-control.
|
|
4
|
+
Built-in evaluators (regex, list, json, sql) are registered automatically on import.
|
|
5
|
+
|
|
6
|
+
Available evaluators:
|
|
7
|
+
Built-in (no namespace):
|
|
8
|
+
- regex: Regular expression matching
|
|
9
|
+
- list: List-based value matching
|
|
10
|
+
- json: JSON validation
|
|
11
|
+
- sql: SQL query validation
|
|
12
|
+
|
|
13
|
+
Naming convention:
|
|
14
|
+
- Built-in: "regex", "list", "json", "sql"
|
|
15
|
+
- External: "provider.name" (e.g., "galileo.luna2")
|
|
16
|
+
- Agent-scoped: "agent:name" (custom code deployed with agent)
|
|
17
|
+
|
|
18
|
+
External evaluators are installed via separate packages (e.g., agent-control-evaluator-galileo).
|
|
19
|
+
Custom evaluators are Evaluator classes deployed with the engine.
|
|
20
|
+
Their schemas are registered via initAgent for validation purposes.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
__version__ = version("agent-control-evaluators")
|
|
27
|
+
except PackageNotFoundError:
|
|
28
|
+
__version__ = "0.0.0.dev"
|
|
29
|
+
|
|
30
|
+
# Core infrastructure - export from _base and _registry
|
|
31
|
+
from agent_control_evaluators._base import Evaluator, EvaluatorConfig, EvaluatorMetadata
|
|
32
|
+
from agent_control_evaluators._discovery import (
|
|
33
|
+
discover_evaluators,
|
|
34
|
+
ensure_evaluators_discovered,
|
|
35
|
+
list_evaluators,
|
|
36
|
+
reset_evaluator_discovery,
|
|
37
|
+
)
|
|
38
|
+
from agent_control_evaluators._factory import clear_evaluator_cache, get_evaluator_instance
|
|
39
|
+
from agent_control_evaluators._registry import (
|
|
40
|
+
clear_evaluators,
|
|
41
|
+
get_all_evaluators,
|
|
42
|
+
get_evaluator,
|
|
43
|
+
register_evaluator,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Import built-in evaluators to auto-register them
|
|
47
|
+
from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig
|
|
48
|
+
from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig
|
|
49
|
+
from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig
|
|
50
|
+
from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# Core infrastructure
|
|
54
|
+
"Evaluator",
|
|
55
|
+
"EvaluatorConfig",
|
|
56
|
+
"EvaluatorMetadata",
|
|
57
|
+
"register_evaluator",
|
|
58
|
+
"get_evaluator",
|
|
59
|
+
"get_all_evaluators",
|
|
60
|
+
"clear_evaluators",
|
|
61
|
+
"discover_evaluators",
|
|
62
|
+
"ensure_evaluators_discovered",
|
|
63
|
+
"reset_evaluator_discovery",
|
|
64
|
+
"list_evaluators",
|
|
65
|
+
"get_evaluator_instance",
|
|
66
|
+
"clear_evaluator_cache",
|
|
67
|
+
# Built-in evaluators
|
|
68
|
+
"RegexEvaluator",
|
|
69
|
+
"RegexEvaluatorConfig",
|
|
70
|
+
"ListEvaluator",
|
|
71
|
+
"ListEvaluatorConfig",
|
|
72
|
+
"JSONEvaluator",
|
|
73
|
+
"JSONEvaluatorConfig",
|
|
74
|
+
"SQLEvaluator",
|
|
75
|
+
"SQLEvaluatorConfig",
|
|
76
|
+
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Evaluator base classes and metadata."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar
|
|
9
|
+
|
|
10
|
+
from agent_control_models import EvaluatorResult
|
|
11
|
+
from agent_control_models.base import BaseModel
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from typing import Self
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EvaluatorConfig(BaseModel):
|
|
20
|
+
"""Base class for typed evaluator configurations.
|
|
21
|
+
|
|
22
|
+
Extends the project's BaseModel to ensure consistent behavior
|
|
23
|
+
and enable type checking across all evaluator configs.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
from agent_control_evaluators import EvaluatorConfig
|
|
28
|
+
|
|
29
|
+
class MyEvaluatorConfig(EvaluatorConfig):
|
|
30
|
+
pattern: str
|
|
31
|
+
threshold: float = 0.5
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
ConfigT = TypeVar("ConfigT", bound=EvaluatorConfig)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class EvaluatorMetadata:
|
|
43
|
+
"""Metadata about an evaluator.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
name: Unique evaluator name (e.g., "regex", "galileo.luna2")
|
|
47
|
+
version: Evaluator version string
|
|
48
|
+
description: Human-readable description
|
|
49
|
+
requires_api_key: Whether the evaluator requires an API key
|
|
50
|
+
timeout_ms: Default timeout in milliseconds
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
name: str
|
|
54
|
+
version: str
|
|
55
|
+
description: str
|
|
56
|
+
requires_api_key: bool = False
|
|
57
|
+
timeout_ms: int = 10000
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Evaluator(ABC, Generic[ConfigT]): # noqa: UP046 - need Python 3.10 compat
|
|
61
|
+
"""Base class for all evaluators (built-in, external, or custom).
|
|
62
|
+
|
|
63
|
+
All evaluators follow the same pattern:
|
|
64
|
+
1. Define metadata and config_model as class variables
|
|
65
|
+
2. Implement evaluate() method
|
|
66
|
+
3. Register with @register_evaluator decorator
|
|
67
|
+
|
|
68
|
+
IMPORTANT - Instance Caching & Thread Safety:
|
|
69
|
+
Evaluator instances are cached and reused across multiple evaluate() calls
|
|
70
|
+
when they have the same configuration. This means:
|
|
71
|
+
|
|
72
|
+
- DO NOT store mutable request-scoped state on `self`
|
|
73
|
+
- The evaluate() method may be called concurrently from multiple requests
|
|
74
|
+
- Any state stored in __init__ should be immutable or thread-safe
|
|
75
|
+
- Use local variables within evaluate() for request-specific state
|
|
76
|
+
|
|
77
|
+
Good pattern:
|
|
78
|
+
def __init__(self, config):
|
|
79
|
+
super().__init__(config)
|
|
80
|
+
self._compiled_regex = re.compile(config.pattern) # OK: immutable
|
|
81
|
+
|
|
82
|
+
async def evaluate(self, data):
|
|
83
|
+
result = self._compiled_regex.search(data) # OK: uses immutable state
|
|
84
|
+
return EvaluatorResult(matched=result is not None, ...)
|
|
85
|
+
|
|
86
|
+
Bad pattern:
|
|
87
|
+
def __init__(self, config):
|
|
88
|
+
super().__init__(config)
|
|
89
|
+
self.call_count = 0 # BAD: mutable state shared across requests
|
|
90
|
+
|
|
91
|
+
async def evaluate(self, data):
|
|
92
|
+
self.call_count += 1 # BAD: race condition, leaks between requests
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
```python
|
|
96
|
+
from agent_control_evaluators import (
|
|
97
|
+
Evaluator,
|
|
98
|
+
EvaluatorConfig,
|
|
99
|
+
EvaluatorMetadata,
|
|
100
|
+
register_evaluator,
|
|
101
|
+
)
|
|
102
|
+
from agent_control_models import EvaluatorResult
|
|
103
|
+
|
|
104
|
+
class MyConfig(EvaluatorConfig):
|
|
105
|
+
threshold: float = 0.5
|
|
106
|
+
|
|
107
|
+
@register_evaluator
|
|
108
|
+
class MyEvaluator(Evaluator[MyConfig]):
|
|
109
|
+
metadata = EvaluatorMetadata(
|
|
110
|
+
name="my-evaluator",
|
|
111
|
+
version="1.0.0",
|
|
112
|
+
description="My custom evaluator",
|
|
113
|
+
)
|
|
114
|
+
config_model = MyConfig
|
|
115
|
+
|
|
116
|
+
async def evaluate(self, data: Any) -> EvaluatorResult:
|
|
117
|
+
return EvaluatorResult(
|
|
118
|
+
matched=len(str(data)) > self.config.threshold,
|
|
119
|
+
confidence=1.0,
|
|
120
|
+
message="Evaluation complete"
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
metadata: ClassVar[EvaluatorMetadata]
|
|
126
|
+
config_model: ClassVar[type[EvaluatorConfig]]
|
|
127
|
+
|
|
128
|
+
def __init__(self, config: ConfigT) -> None:
|
|
129
|
+
"""Initialize evaluator with validated config.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
config: Validated configuration (instance of config_model)
|
|
133
|
+
"""
|
|
134
|
+
self.config: ConfigT = config
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_dict(cls, config_dict: dict[str, Any]) -> Self:
|
|
138
|
+
"""Create evaluator instance from raw config dict.
|
|
139
|
+
|
|
140
|
+
Validates config against config_model before creating instance.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
config_dict: Raw configuration dictionary
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Evaluator instance with validated config
|
|
147
|
+
"""
|
|
148
|
+
validated = cls.config_model(**config_dict)
|
|
149
|
+
return cls(validated) # type: ignore[arg-type]
|
|
150
|
+
|
|
151
|
+
@abstractmethod
|
|
152
|
+
async def evaluate(self, data: Any) -> EvaluatorResult:
|
|
153
|
+
"""Evaluate data and return result.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
data: Data extracted by selector from the payload
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
EvaluatorResult with matched status, confidence, and message
|
|
160
|
+
"""
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
def get_timeout_seconds(self) -> float:
|
|
164
|
+
"""Get timeout in seconds from config or metadata default."""
|
|
165
|
+
timeout_ms: int = getattr(self.config, "timeout_ms", self.metadata.timeout_ms)
|
|
166
|
+
return float(timeout_ms) / 1000.0
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def is_available(cls) -> bool:
|
|
170
|
+
"""Check if evaluator dependencies are satisfied.
|
|
171
|
+
|
|
172
|
+
Override this method for evaluators with optional dependencies.
|
|
173
|
+
Return False to skip registration during discovery.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
True if evaluator can be used, False otherwise
|
|
177
|
+
"""
|
|
178
|
+
return True
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Evaluator discovery via entry points."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
from importlib.metadata import entry_points
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
from agent_control_evaluators._registry import (
|
|
11
|
+
get_all_evaluators,
|
|
12
|
+
get_evaluator,
|
|
13
|
+
register_evaluator,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from agent_control_evaluators._base import Evaluator
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
_DISCOVERY_COMPLETE = False
|
|
22
|
+
_DISCOVERY_LOCK = threading.Lock()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def discover_evaluators() -> int:
|
|
26
|
+
"""Discover and register evaluators via entry points.
|
|
27
|
+
|
|
28
|
+
All evaluators (built-in and third-party) are discovered via the
|
|
29
|
+
'agent_control.evaluators' entry point group. Evaluators are only registered
|
|
30
|
+
if their `is_available()` method returns True.
|
|
31
|
+
|
|
32
|
+
Safe to call multiple times - only runs discovery once.
|
|
33
|
+
Thread-safe via lock.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Number of evaluators discovered
|
|
37
|
+
"""
|
|
38
|
+
global _DISCOVERY_COMPLETE
|
|
39
|
+
|
|
40
|
+
# Fast path without lock
|
|
41
|
+
if _DISCOVERY_COMPLETE:
|
|
42
|
+
return 0
|
|
43
|
+
|
|
44
|
+
with _DISCOVERY_LOCK:
|
|
45
|
+
# Double-check after acquiring lock
|
|
46
|
+
if _DISCOVERY_COMPLETE:
|
|
47
|
+
return 0
|
|
48
|
+
|
|
49
|
+
discovered = 0
|
|
50
|
+
|
|
51
|
+
# Discover ALL evaluators (built-in and third-party) via entry points.
|
|
52
|
+
# Only register evaluators where is_available() returns True.
|
|
53
|
+
try:
|
|
54
|
+
eps = entry_points(group="agent_control.evaluators")
|
|
55
|
+
for ep in eps:
|
|
56
|
+
try:
|
|
57
|
+
evaluator_class = ep.load()
|
|
58
|
+
name = evaluator_class.metadata.name
|
|
59
|
+
|
|
60
|
+
# Skip if already registered
|
|
61
|
+
if get_evaluator(name) is not None:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Check if evaluator dependencies are satisfied
|
|
65
|
+
if not evaluator_class.is_available():
|
|
66
|
+
logger.debug(f"Evaluator '{name}' not available, skipping")
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
register_evaluator(evaluator_class)
|
|
70
|
+
logger.debug(f"Registered evaluator: {name}")
|
|
71
|
+
discovered += 1
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.warning(f"Failed to load evaluator '{ep.name}': {e}")
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.debug(f"Entry point discovery not available: {e}")
|
|
76
|
+
|
|
77
|
+
_DISCOVERY_COMPLETE = True
|
|
78
|
+
logger.debug(f"Evaluator discovery complete: {discovered} new evaluators")
|
|
79
|
+
return discovered
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def ensure_evaluators_discovered() -> None:
|
|
83
|
+
"""Ensure evaluator discovery has run. Call this before using evaluators."""
|
|
84
|
+
if not _DISCOVERY_COMPLETE:
|
|
85
|
+
discover_evaluators()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def reset_evaluator_discovery() -> None:
|
|
89
|
+
"""Reset discovery state. Useful for testing."""
|
|
90
|
+
global _DISCOVERY_COMPLETE
|
|
91
|
+
with _DISCOVERY_LOCK:
|
|
92
|
+
_DISCOVERY_COMPLETE = False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# =============================================================================
|
|
96
|
+
# Public evaluator API
|
|
97
|
+
# =============================================================================
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def list_evaluators() -> dict[str, type[Evaluator[Any]]]:
|
|
101
|
+
"""List all registered evaluators.
|
|
102
|
+
|
|
103
|
+
This function ensures evaluator discovery has run before returning results.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Dictionary mapping evaluator names to evaluator classes
|
|
107
|
+
"""
|
|
108
|
+
ensure_evaluators_discovered()
|
|
109
|
+
return get_all_evaluators()
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Evaluator factory with instance caching."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from collections import OrderedDict
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
from agent_control_evaluators._discovery import list_evaluators
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from agent_control_models import EvaluatorSpec
|
|
15
|
+
|
|
16
|
+
from agent_control_evaluators._base import Evaluator
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Configuration
|
|
21
|
+
DEFAULT_CACHE_SIZE = 100
|
|
22
|
+
MIN_CACHE_SIZE = 1 # Minimum to avoid infinite loop in eviction
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _parse_cache_size() -> int:
|
|
26
|
+
"""Parse EVALUATOR_CACHE_SIZE from env with safe fallback."""
|
|
27
|
+
raw = os.environ.get("EVALUATOR_CACHE_SIZE")
|
|
28
|
+
if raw is None:
|
|
29
|
+
return DEFAULT_CACHE_SIZE
|
|
30
|
+
try:
|
|
31
|
+
return int(raw)
|
|
32
|
+
except ValueError:
|
|
33
|
+
logger.warning(
|
|
34
|
+
f"Invalid EVALUATOR_CACHE_SIZE '{raw}', using default {DEFAULT_CACHE_SIZE}"
|
|
35
|
+
)
|
|
36
|
+
return DEFAULT_CACHE_SIZE
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
EVALUATOR_CACHE_SIZE = max(_parse_cache_size(), MIN_CACHE_SIZE)
|
|
40
|
+
|
|
41
|
+
# LRU cache for evaluator instances: cache_key -> Evaluator instance
|
|
42
|
+
_EVALUATOR_CACHE: OrderedDict[str, Evaluator[Any]] = OrderedDict()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _config_hash(config: dict[str, Any]) -> str:
|
|
46
|
+
"""Create a hashable key from config dict."""
|
|
47
|
+
return json.dumps(config, sort_keys=True, default=str)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_evaluator_instance(evaluator_spec: EvaluatorSpec) -> Evaluator[Any]:
|
|
51
|
+
"""Get or create a cached evaluator instance from specification.
|
|
52
|
+
|
|
53
|
+
Uses LRU caching to reuse evaluator instances with the same config.
|
|
54
|
+
Cache key is: {evaluator_name}:{config_hash}
|
|
55
|
+
|
|
56
|
+
WARNING: Evaluator instances are cached and reused across requests!
|
|
57
|
+
Evaluator implementations MUST be stateless - do not store mutable
|
|
58
|
+
request-scoped state on the evaluator instance. See Evaluator
|
|
59
|
+
docstring for details on safe patterns.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
evaluator_spec: The evaluator specification with name and config
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Evaluator instance (cached or new)
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
ValueError: If evaluator not found
|
|
69
|
+
"""
|
|
70
|
+
# Build cache key
|
|
71
|
+
cache_key = f"{evaluator_spec.name}:{_config_hash(evaluator_spec.config)}"
|
|
72
|
+
|
|
73
|
+
# Check cache
|
|
74
|
+
if cache_key in _EVALUATOR_CACHE:
|
|
75
|
+
# Move to end (most recently used)
|
|
76
|
+
_EVALUATOR_CACHE.move_to_end(cache_key)
|
|
77
|
+
logger.debug(f"Cache hit for evaluator: {evaluator_spec.name}")
|
|
78
|
+
return _EVALUATOR_CACHE[cache_key]
|
|
79
|
+
|
|
80
|
+
# Cache miss - create new instance
|
|
81
|
+
evaluators = list_evaluators()
|
|
82
|
+
evaluator_cls = evaluators.get(evaluator_spec.name)
|
|
83
|
+
|
|
84
|
+
if evaluator_cls is None:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Evaluator '{evaluator_spec.name}' not found. "
|
|
87
|
+
f"Available evaluators: {', '.join(evaluators.keys())}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
logger.debug(f"Cache miss, creating evaluator: {evaluator_spec.name}")
|
|
91
|
+
instance = evaluator_cls.from_dict(evaluator_spec.config)
|
|
92
|
+
|
|
93
|
+
# Evict oldest if cache is full
|
|
94
|
+
while len(_EVALUATOR_CACHE) >= EVALUATOR_CACHE_SIZE:
|
|
95
|
+
evicted_key, _ = _EVALUATOR_CACHE.popitem(last=False)
|
|
96
|
+
logger.debug(f"Evicted evaluator from cache: {evicted_key}")
|
|
97
|
+
|
|
98
|
+
# Cache the instance
|
|
99
|
+
_EVALUATOR_CACHE[cache_key] = instance
|
|
100
|
+
return instance
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def clear_evaluator_cache() -> None:
|
|
104
|
+
"""Clear all cached evaluator instances. Useful for testing."""
|
|
105
|
+
_EVALUATOR_CACHE.clear()
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Evaluator registry for registration and lookup."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from agent_control_evaluators._base import Evaluator
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# =============================================================================
|
|
14
|
+
# Evaluator Registry
|
|
15
|
+
# =============================================================================
|
|
16
|
+
|
|
17
|
+
_EVALUATOR_REGISTRY: dict[str, type[Evaluator[Any]]] = {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def register_evaluator(
|
|
21
|
+
evaluator_class: type[Evaluator[Any]],
|
|
22
|
+
) -> type[Evaluator[Any]]:
|
|
23
|
+
"""Register an evaluator class by its metadata name.
|
|
24
|
+
|
|
25
|
+
Can be used as a decorator or called directly. Respects the evaluator's
|
|
26
|
+
is_available() method - evaluators with unavailable dependencies are
|
|
27
|
+
silently skipped.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
evaluator_class: Evaluator class to register
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The same evaluator class (for decorator usage)
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If evaluator name already registered with different class
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
```python
|
|
40
|
+
@register_evaluator
|
|
41
|
+
class MyEvaluator(Evaluator[MyConfig]):
|
|
42
|
+
metadata = EvaluatorMetadata(name="my-evaluator", ...)
|
|
43
|
+
...
|
|
44
|
+
```
|
|
45
|
+
"""
|
|
46
|
+
name = evaluator_class.metadata.name
|
|
47
|
+
|
|
48
|
+
# Check if evaluator dependencies are satisfied
|
|
49
|
+
if not evaluator_class.is_available():
|
|
50
|
+
logger.debug(f"Evaluator '{name}' not available (is_available=False), skipping")
|
|
51
|
+
return evaluator_class
|
|
52
|
+
|
|
53
|
+
if name in _EVALUATOR_REGISTRY:
|
|
54
|
+
# Allow re-registration of same class (e.g., during hot reload)
|
|
55
|
+
if _EVALUATOR_REGISTRY[name] is evaluator_class:
|
|
56
|
+
return evaluator_class
|
|
57
|
+
raise ValueError(f"Evaluator '{name}' is already registered")
|
|
58
|
+
|
|
59
|
+
_EVALUATOR_REGISTRY[name] = evaluator_class
|
|
60
|
+
logger.debug(f"Registered evaluator: {name} v{evaluator_class.metadata.version}")
|
|
61
|
+
return evaluator_class
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_evaluator(name: str) -> type[Evaluator[Any]] | None:
|
|
65
|
+
"""Get a registered evaluator by name.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
name: Evaluator name to look up
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Evaluator class if found, None otherwise
|
|
72
|
+
"""
|
|
73
|
+
return _EVALUATOR_REGISTRY.get(name)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_all_evaluators() -> dict[str, type[Evaluator[Any]]]:
|
|
77
|
+
"""Get all registered evaluators.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dictionary mapping evaluator names to evaluator classes
|
|
81
|
+
"""
|
|
82
|
+
return dict(_EVALUATOR_REGISTRY)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def clear_evaluators() -> None:
|
|
86
|
+
"""Clear all registered evaluators. Useful for testing."""
|
|
87
|
+
_EVALUATOR_REGISTRY.clear()
|