agent-control-evaluators 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ """Agent Control Evaluators.
2
+
3
+ This package contains builtin evaluator implementations for agent-control.
4
+ Built-in evaluators (regex, list, json, sql) are registered automatically on import.
5
+
6
+ Available evaluators:
7
+ Built-in (no namespace):
8
+ - regex: Regular expression matching
9
+ - list: List-based value matching
10
+ - json: JSON validation
11
+ - sql: SQL query validation
12
+
13
+ Naming convention:
14
+ - Built-in: "regex", "list", "json", "sql"
15
+ - External: "provider.name" (e.g., "galileo.luna2")
16
+ - Agent-scoped: "agent:name" (custom code deployed with agent)
17
+
18
+ External evaluators are installed via separate packages (e.g., agent-control-evaluator-galileo).
19
+ Custom evaluators are Evaluator classes deployed with the engine.
20
+ Their schemas are registered via initAgent for validation purposes.
21
+ """
22
+
23
+ from importlib.metadata import PackageNotFoundError, version
24
+
25
+ try:
26
+ __version__ = version("agent-control-evaluators")
27
+ except PackageNotFoundError:
28
+ __version__ = "0.0.0.dev"
29
+
30
+ # Core infrastructure - export from _base and _registry
31
+ from agent_control_evaluators._base import Evaluator, EvaluatorConfig, EvaluatorMetadata
32
+ from agent_control_evaluators._discovery import (
33
+ discover_evaluators,
34
+ ensure_evaluators_discovered,
35
+ list_evaluators,
36
+ reset_evaluator_discovery,
37
+ )
38
+ from agent_control_evaluators._factory import clear_evaluator_cache, get_evaluator_instance
39
+ from agent_control_evaluators._registry import (
40
+ clear_evaluators,
41
+ get_all_evaluators,
42
+ get_evaluator,
43
+ register_evaluator,
44
+ )
45
+
46
+ # Import built-in evaluators to auto-register them
47
+ from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig
48
+ from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig
49
+ from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig
50
+ from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig
51
+
52
+ __all__ = [
53
+ # Core infrastructure
54
+ "Evaluator",
55
+ "EvaluatorConfig",
56
+ "EvaluatorMetadata",
57
+ "register_evaluator",
58
+ "get_evaluator",
59
+ "get_all_evaluators",
60
+ "clear_evaluators",
61
+ "discover_evaluators",
62
+ "ensure_evaluators_discovered",
63
+ "reset_evaluator_discovery",
64
+ "list_evaluators",
65
+ "get_evaluator_instance",
66
+ "clear_evaluator_cache",
67
+ # Built-in evaluators
68
+ "RegexEvaluator",
69
+ "RegexEvaluatorConfig",
70
+ "ListEvaluator",
71
+ "ListEvaluatorConfig",
72
+ "JSONEvaluator",
73
+ "JSONEvaluatorConfig",
74
+ "SQLEvaluator",
75
+ "SQLEvaluatorConfig",
76
+ ]
@@ -0,0 +1,178 @@
1
+ """Evaluator base classes and metadata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar
9
+
10
+ from agent_control_models import EvaluatorResult
11
+ from agent_control_models.base import BaseModel
12
+
13
+ if TYPE_CHECKING:
14
+ from typing import Self
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class EvaluatorConfig(BaseModel):
20
+ """Base class for typed evaluator configurations.
21
+
22
+ Extends the project's BaseModel to ensure consistent behavior
23
+ and enable type checking across all evaluator configs.
24
+
25
+ Example:
26
+ ```python
27
+ from agent_control_evaluators import EvaluatorConfig
28
+
29
+ class MyEvaluatorConfig(EvaluatorConfig):
30
+ pattern: str
31
+ threshold: float = 0.5
32
+ ```
33
+ """
34
+
35
+ pass
36
+
37
+
38
+ ConfigT = TypeVar("ConfigT", bound=EvaluatorConfig)
39
+
40
+
41
+ @dataclass
42
+ class EvaluatorMetadata:
43
+ """Metadata about an evaluator.
44
+
45
+ Attributes:
46
+ name: Unique evaluator name (e.g., "regex", "galileo.luna2")
47
+ version: Evaluator version string
48
+ description: Human-readable description
49
+ requires_api_key: Whether the evaluator requires an API key
50
+ timeout_ms: Default timeout in milliseconds
51
+ """
52
+
53
+ name: str
54
+ version: str
55
+ description: str
56
+ requires_api_key: bool = False
57
+ timeout_ms: int = 10000
58
+
59
+
60
+ class Evaluator(ABC, Generic[ConfigT]): # noqa: UP046 - need Python 3.10 compat
61
+ """Base class for all evaluators (built-in, external, or custom).
62
+
63
+ All evaluators follow the same pattern:
64
+ 1. Define metadata and config_model as class variables
65
+ 2. Implement evaluate() method
66
+ 3. Register with @register_evaluator decorator
67
+
68
+ IMPORTANT - Instance Caching & Thread Safety:
69
+ Evaluator instances are cached and reused across multiple evaluate() calls
70
+ when they have the same configuration. This means:
71
+
72
+ - DO NOT store mutable request-scoped state on `self`
73
+ - The evaluate() method may be called concurrently from multiple requests
74
+ - Any state stored in __init__ should be immutable or thread-safe
75
+ - Use local variables within evaluate() for request-specific state
76
+
77
+ Good pattern:
78
+ def __init__(self, config):
79
+ super().__init__(config)
80
+ self._compiled_regex = re.compile(config.pattern) # OK: immutable
81
+
82
+ async def evaluate(self, data):
83
+ result = self._compiled_regex.search(data) # OK: uses immutable state
84
+ return EvaluatorResult(matched=result is not None, ...)
85
+
86
+ Bad pattern:
87
+ def __init__(self, config):
88
+ super().__init__(config)
89
+ self.call_count = 0 # BAD: mutable state shared across requests
90
+
91
+ async def evaluate(self, data):
92
+ self.call_count += 1 # BAD: race condition, leaks between requests
93
+
94
+ Example:
95
+ ```python
96
+ from agent_control_evaluators import (
97
+ Evaluator,
98
+ EvaluatorConfig,
99
+ EvaluatorMetadata,
100
+ register_evaluator,
101
+ )
102
+ from agent_control_models import EvaluatorResult
103
+
104
+ class MyConfig(EvaluatorConfig):
105
+ threshold: float = 0.5
106
+
107
+ @register_evaluator
108
+ class MyEvaluator(Evaluator[MyConfig]):
109
+ metadata = EvaluatorMetadata(
110
+ name="my-evaluator",
111
+ version="1.0.0",
112
+ description="My custom evaluator",
113
+ )
114
+ config_model = MyConfig
115
+
116
+ async def evaluate(self, data: Any) -> EvaluatorResult:
117
+ return EvaluatorResult(
118
+ matched=len(str(data)) > self.config.threshold,
119
+ confidence=1.0,
120
+ message="Evaluation complete"
121
+ )
122
+ ```
123
+ """
124
+
125
+ metadata: ClassVar[EvaluatorMetadata]
126
+ config_model: ClassVar[type[EvaluatorConfig]]
127
+
128
+ def __init__(self, config: ConfigT) -> None:
129
+ """Initialize evaluator with validated config.
130
+
131
+ Args:
132
+ config: Validated configuration (instance of config_model)
133
+ """
134
+ self.config: ConfigT = config
135
+
136
+ @classmethod
137
+ def from_dict(cls, config_dict: dict[str, Any]) -> Self:
138
+ """Create evaluator instance from raw config dict.
139
+
140
+ Validates config against config_model before creating instance.
141
+
142
+ Args:
143
+ config_dict: Raw configuration dictionary
144
+
145
+ Returns:
146
+ Evaluator instance with validated config
147
+ """
148
+ validated = cls.config_model(**config_dict)
149
+ return cls(validated) # type: ignore[arg-type]
150
+
151
+ @abstractmethod
152
+ async def evaluate(self, data: Any) -> EvaluatorResult:
153
+ """Evaluate data and return result.
154
+
155
+ Args:
156
+ data: Data extracted by selector from the payload
157
+
158
+ Returns:
159
+ EvaluatorResult with matched status, confidence, and message
160
+ """
161
+ pass
162
+
163
+ def get_timeout_seconds(self) -> float:
164
+ """Get timeout in seconds from config or metadata default."""
165
+ timeout_ms: int = getattr(self.config, "timeout_ms", self.metadata.timeout_ms)
166
+ return float(timeout_ms) / 1000.0
167
+
168
+ @classmethod
169
+ def is_available(cls) -> bool:
170
+ """Check if evaluator dependencies are satisfied.
171
+
172
+ Override this method for evaluators with optional dependencies.
173
+ Return False to skip registration during discovery.
174
+
175
+ Returns:
176
+ True if evaluator can be used, False otherwise
177
+ """
178
+ return True
@@ -0,0 +1,109 @@
1
+ """Evaluator discovery via entry points."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import threading
7
+ from importlib.metadata import entry_points
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ from agent_control_evaluators._registry import (
11
+ get_all_evaluators,
12
+ get_evaluator,
13
+ register_evaluator,
14
+ )
15
+
16
+ if TYPE_CHECKING:
17
+ from agent_control_evaluators._base import Evaluator
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ _DISCOVERY_COMPLETE = False
22
+ _DISCOVERY_LOCK = threading.Lock()
23
+
24
+
25
+ def discover_evaluators() -> int:
26
+ """Discover and register evaluators via entry points.
27
+
28
+ All evaluators (built-in and third-party) are discovered via the
29
+ 'agent_control.evaluators' entry point group. Evaluators are only registered
30
+ if their `is_available()` method returns True.
31
+
32
+ Safe to call multiple times - only runs discovery once.
33
+ Thread-safe via lock.
34
+
35
+ Returns:
36
+ Number of evaluators discovered
37
+ """
38
+ global _DISCOVERY_COMPLETE
39
+
40
+ # Fast path without lock
41
+ if _DISCOVERY_COMPLETE:
42
+ return 0
43
+
44
+ with _DISCOVERY_LOCK:
45
+ # Double-check after acquiring lock
46
+ if _DISCOVERY_COMPLETE:
47
+ return 0
48
+
49
+ discovered = 0
50
+
51
+ # Discover ALL evaluators (built-in and third-party) via entry points.
52
+ # Only register evaluators where is_available() returns True.
53
+ try:
54
+ eps = entry_points(group="agent_control.evaluators")
55
+ for ep in eps:
56
+ try:
57
+ evaluator_class = ep.load()
58
+ name = evaluator_class.metadata.name
59
+
60
+ # Skip if already registered
61
+ if get_evaluator(name) is not None:
62
+ continue
63
+
64
+ # Check if evaluator dependencies are satisfied
65
+ if not evaluator_class.is_available():
66
+ logger.debug(f"Evaluator '{name}' not available, skipping")
67
+ continue
68
+
69
+ register_evaluator(evaluator_class)
70
+ logger.debug(f"Registered evaluator: {name}")
71
+ discovered += 1
72
+ except Exception as e:
73
+ logger.warning(f"Failed to load evaluator '{ep.name}': {e}")
74
+ except Exception as e:
75
+ logger.debug(f"Entry point discovery not available: {e}")
76
+
77
+ _DISCOVERY_COMPLETE = True
78
+ logger.debug(f"Evaluator discovery complete: {discovered} new evaluators")
79
+ return discovered
80
+
81
+
82
+ def ensure_evaluators_discovered() -> None:
83
+ """Ensure evaluator discovery has run. Call this before using evaluators."""
84
+ if not _DISCOVERY_COMPLETE:
85
+ discover_evaluators()
86
+
87
+
88
+ def reset_evaluator_discovery() -> None:
89
+ """Reset discovery state. Useful for testing."""
90
+ global _DISCOVERY_COMPLETE
91
+ with _DISCOVERY_LOCK:
92
+ _DISCOVERY_COMPLETE = False
93
+
94
+
95
+ # =============================================================================
96
+ # Public evaluator API
97
+ # =============================================================================
98
+
99
+
100
+ def list_evaluators() -> dict[str, type[Evaluator[Any]]]:
101
+ """List all registered evaluators.
102
+
103
+ This function ensures evaluator discovery has run before returning results.
104
+
105
+ Returns:
106
+ Dictionary mapping evaluator names to evaluator classes
107
+ """
108
+ ensure_evaluators_discovered()
109
+ return get_all_evaluators()
@@ -0,0 +1,105 @@
1
+ """Evaluator factory with instance caching."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ from collections import OrderedDict
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ from agent_control_evaluators._discovery import list_evaluators
12
+
13
+ if TYPE_CHECKING:
14
+ from agent_control_models import EvaluatorSpec
15
+
16
+ from agent_control_evaluators._base import Evaluator
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Configuration
21
+ DEFAULT_CACHE_SIZE = 100
22
+ MIN_CACHE_SIZE = 1 # Minimum to avoid infinite loop in eviction
23
+
24
+
25
+ def _parse_cache_size() -> int:
26
+ """Parse EVALUATOR_CACHE_SIZE from env with safe fallback."""
27
+ raw = os.environ.get("EVALUATOR_CACHE_SIZE")
28
+ if raw is None:
29
+ return DEFAULT_CACHE_SIZE
30
+ try:
31
+ return int(raw)
32
+ except ValueError:
33
+ logger.warning(
34
+ f"Invalid EVALUATOR_CACHE_SIZE '{raw}', using default {DEFAULT_CACHE_SIZE}"
35
+ )
36
+ return DEFAULT_CACHE_SIZE
37
+
38
+
39
+ EVALUATOR_CACHE_SIZE = max(_parse_cache_size(), MIN_CACHE_SIZE)
40
+
41
+ # LRU cache for evaluator instances: cache_key -> Evaluator instance
42
+ _EVALUATOR_CACHE: OrderedDict[str, Evaluator[Any]] = OrderedDict()
43
+
44
+
45
+ def _config_hash(config: dict[str, Any]) -> str:
46
+ """Create a hashable key from config dict."""
47
+ return json.dumps(config, sort_keys=True, default=str)
48
+
49
+
50
+ def get_evaluator_instance(evaluator_spec: EvaluatorSpec) -> Evaluator[Any]:
51
+ """Get or create a cached evaluator instance from specification.
52
+
53
+ Uses LRU caching to reuse evaluator instances with the same config.
54
+ Cache key is: {evaluator_name}:{config_hash}
55
+
56
+ WARNING: Evaluator instances are cached and reused across requests!
57
+ Evaluator implementations MUST be stateless - do not store mutable
58
+ request-scoped state on the evaluator instance. See Evaluator
59
+ docstring for details on safe patterns.
60
+
61
+ Args:
62
+ evaluator_spec: The evaluator specification with name and config
63
+
64
+ Returns:
65
+ Evaluator instance (cached or new)
66
+
67
+ Raises:
68
+ ValueError: If evaluator not found
69
+ """
70
+ # Build cache key
71
+ cache_key = f"{evaluator_spec.name}:{_config_hash(evaluator_spec.config)}"
72
+
73
+ # Check cache
74
+ if cache_key in _EVALUATOR_CACHE:
75
+ # Move to end (most recently used)
76
+ _EVALUATOR_CACHE.move_to_end(cache_key)
77
+ logger.debug(f"Cache hit for evaluator: {evaluator_spec.name}")
78
+ return _EVALUATOR_CACHE[cache_key]
79
+
80
+ # Cache miss - create new instance
81
+ evaluators = list_evaluators()
82
+ evaluator_cls = evaluators.get(evaluator_spec.name)
83
+
84
+ if evaluator_cls is None:
85
+ raise ValueError(
86
+ f"Evaluator '{evaluator_spec.name}' not found. "
87
+ f"Available evaluators: {', '.join(evaluators.keys())}"
88
+ )
89
+
90
+ logger.debug(f"Cache miss, creating evaluator: {evaluator_spec.name}")
91
+ instance = evaluator_cls.from_dict(evaluator_spec.config)
92
+
93
+ # Evict oldest if cache is full
94
+ while len(_EVALUATOR_CACHE) >= EVALUATOR_CACHE_SIZE:
95
+ evicted_key, _ = _EVALUATOR_CACHE.popitem(last=False)
96
+ logger.debug(f"Evicted evaluator from cache: {evicted_key}")
97
+
98
+ # Cache the instance
99
+ _EVALUATOR_CACHE[cache_key] = instance
100
+ return instance
101
+
102
+
103
+ def clear_evaluator_cache() -> None:
104
+ """Clear all cached evaluator instances. Useful for testing."""
105
+ _EVALUATOR_CACHE.clear()
@@ -0,0 +1,87 @@
1
+ """Evaluator registry for registration and lookup."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from agent_control_evaluators._base import Evaluator
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # =============================================================================
14
+ # Evaluator Registry
15
+ # =============================================================================
16
+
17
+ _EVALUATOR_REGISTRY: dict[str, type[Evaluator[Any]]] = {}
18
+
19
+
20
+ def register_evaluator(
21
+ evaluator_class: type[Evaluator[Any]],
22
+ ) -> type[Evaluator[Any]]:
23
+ """Register an evaluator class by its metadata name.
24
+
25
+ Can be used as a decorator or called directly. Respects the evaluator's
26
+ is_available() method - evaluators with unavailable dependencies are
27
+ silently skipped.
28
+
29
+ Args:
30
+ evaluator_class: Evaluator class to register
31
+
32
+ Returns:
33
+ The same evaluator class (for decorator usage)
34
+
35
+ Raises:
36
+ ValueError: If evaluator name already registered with different class
37
+
38
+ Example:
39
+ ```python
40
+ @register_evaluator
41
+ class MyEvaluator(Evaluator[MyConfig]):
42
+ metadata = EvaluatorMetadata(name="my-evaluator", ...)
43
+ ...
44
+ ```
45
+ """
46
+ name = evaluator_class.metadata.name
47
+
48
+ # Check if evaluator dependencies are satisfied
49
+ if not evaluator_class.is_available():
50
+ logger.debug(f"Evaluator '{name}' not available (is_available=False), skipping")
51
+ return evaluator_class
52
+
53
+ if name in _EVALUATOR_REGISTRY:
54
+ # Allow re-registration of same class (e.g., during hot reload)
55
+ if _EVALUATOR_REGISTRY[name] is evaluator_class:
56
+ return evaluator_class
57
+ raise ValueError(f"Evaluator '{name}' is already registered")
58
+
59
+ _EVALUATOR_REGISTRY[name] = evaluator_class
60
+ logger.debug(f"Registered evaluator: {name} v{evaluator_class.metadata.version}")
61
+ return evaluator_class
62
+
63
+
64
+ def get_evaluator(name: str) -> type[Evaluator[Any]] | None:
65
+ """Get a registered evaluator by name.
66
+
67
+ Args:
68
+ name: Evaluator name to look up
69
+
70
+ Returns:
71
+ Evaluator class if found, None otherwise
72
+ """
73
+ return _EVALUATOR_REGISTRY.get(name)
74
+
75
+
76
+ def get_all_evaluators() -> dict[str, type[Evaluator[Any]]]:
77
+ """Get all registered evaluators.
78
+
79
+ Returns:
80
+ Dictionary mapping evaluator names to evaluator classes
81
+ """
82
+ return dict(_EVALUATOR_REGISTRY)
83
+
84
+
85
+ def clear_evaluators() -> None:
86
+ """Clear all registered evaluators. Useful for testing."""
87
+ _EVALUATOR_REGISTRY.clear()
@@ -0,0 +1,6 @@
1
+ """JSON validation evaluator."""
2
+
3
+ from agent_control_evaluators.json.config import JSONEvaluatorConfig
4
+ from agent_control_evaluators.json.evaluator import JSONEvaluator
5
+
6
+ __all__ = ["JSONEvaluator", "JSONEvaluatorConfig"]