adversarial-workflow 0.6.6__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,10 @@ YAML parsing and discovery for custom evaluators.
4
4
  This module handles discovering evaluator definitions from
5
5
  .adversarial/evaluators/*.yml files and parsing them into
6
6
  EvaluatorConfig objects.
7
+
8
+ Supports dual-field model specification (ADV-0015):
9
+ - Legacy: model + api_key_env fields (backwards compatible)
10
+ - New: model_requirement field (resolved via ModelResolver)
7
11
  """
8
12
 
9
13
  from __future__ import annotations
@@ -14,7 +18,7 @@ from pathlib import Path
14
18
 
15
19
  import yaml
16
20
 
17
- from .config import EvaluatorConfig
21
+ from .config import EvaluatorConfig, ModelRequirement
18
22
 
19
23
  logger = logging.getLogger(__name__)
20
24
 
@@ -54,26 +58,39 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
54
58
  raise EvaluatorParseError(f"YAML must be a mapping, got {type(data).__name__}: {yml_file}")
55
59
 
56
60
  # Validate required fields exist
57
- required = [
61
+ # model and api_key_env are only required if model_requirement is not present
62
+ always_required = [
58
63
  "name",
59
64
  "description",
60
- "model",
61
- "api_key_env",
62
65
  "prompt",
63
66
  "output_suffix",
64
67
  ]
65
- missing = [f for f in required if f not in data]
68
+ has_model_requirement = "model_requirement" in data
69
+ if not has_model_requirement:
70
+ # Legacy format: model and api_key_env are required
71
+ always_required.extend(["model", "api_key_env"])
72
+
73
+ missing = [f for f in always_required if f not in data]
66
74
  if missing:
67
75
  raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
68
76
 
69
77
  # Validate required fields are strings (YAML can parse 'yes' as bool, '123' as int)
70
- for field in required:
78
+ for field in always_required:
71
79
  value = data[field]
72
80
  if not isinstance(value, str):
73
81
  raise EvaluatorParseError(
74
82
  f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
75
83
  )
76
84
 
85
+ # Validate model and api_key_env are strings if present (even when optional)
86
+ for field in ["model", "api_key_env"]:
87
+ if field in data and data[field] is not None:
88
+ value = data[field]
89
+ if not isinstance(value, str):
90
+ raise EvaluatorParseError(
91
+ f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
92
+ )
93
+
77
94
  # Validate name format (valid CLI command name)
78
95
  name = data["name"]
79
96
  if not re.match(r"^[a-zA-Z][a-zA-Z0-9_-]*$", name):
@@ -143,6 +160,67 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
143
160
  )
144
161
  data["timeout"] = 600
145
162
 
163
+ # Parse model_requirement if present (ADV-0015)
164
+ model_requirement = None
165
+ if "model_requirement" in data:
166
+ req_data = data["model_requirement"]
167
+
168
+ # Validate model_requirement is a mapping
169
+ if not isinstance(req_data, dict):
170
+ raise EvaluatorParseError(
171
+ f"model_requirement must be a mapping, got {type(req_data).__name__}"
172
+ )
173
+
174
+ # Validate required fields in model_requirement
175
+ if "family" not in req_data:
176
+ raise EvaluatorParseError("model_requirement.family is required")
177
+ if "tier" not in req_data:
178
+ raise EvaluatorParseError("model_requirement.tier is required")
179
+
180
+ # Validate family and tier are strings
181
+ family = req_data["family"]
182
+ tier = req_data["tier"]
183
+ if not isinstance(family, str):
184
+ raise EvaluatorParseError(
185
+ f"model_requirement.family must be a string, got {type(family).__name__}"
186
+ )
187
+ if not isinstance(tier, str):
188
+ raise EvaluatorParseError(
189
+ f"model_requirement.tier must be a string, got {type(tier).__name__}"
190
+ )
191
+
192
+ # Validate optional min_version is string if present
193
+ min_version = req_data.get("min_version", "")
194
+ # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
195
+ if isinstance(min_version, bool):
196
+ raise EvaluatorParseError(
197
+ f"model_requirement.min_version must be a string, got bool: {min_version!r}"
198
+ )
199
+ # Convert integers to strings (YAML parses '0' as int 0)
200
+ if isinstance(min_version, int):
201
+ min_version = str(min_version)
202
+ elif min_version and not isinstance(min_version, str):
203
+ raise EvaluatorParseError(
204
+ f"model_requirement.min_version must be a string, got {type(min_version).__name__}"
205
+ )
206
+
207
+ # Validate optional min_context is integer if present
208
+ min_context = req_data.get("min_context", 0)
209
+ # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
210
+ if isinstance(min_context, bool):
211
+ raise EvaluatorParseError("model_requirement.min_context must be an integer, got bool")
212
+ if min_context and not isinstance(min_context, int):
213
+ raise EvaluatorParseError(
214
+ f"model_requirement.min_context must be an integer, got {type(min_context).__name__}"
215
+ )
216
+
217
+ model_requirement = ModelRequirement(
218
+ family=family,
219
+ tier=tier,
220
+ min_version=min_version,
221
+ min_context=min_context,
222
+ )
223
+
146
224
  # Filter to known fields only (log unknown fields)
147
225
  known_fields = {
148
226
  "name",
@@ -156,17 +234,27 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
156
234
  "aliases",
157
235
  "version",
158
236
  "timeout",
237
+ "model_requirement", # ADV-0015
159
238
  }
160
239
  unknown = set(data.keys()) - known_fields
161
240
  if unknown:
162
241
  logger.warning("Unknown fields in %s: %s", yml_file.name, ", ".join(sorted(unknown)))
163
242
 
164
- # Build filtered data dict
165
- filtered_data = {k: v for k, v in data.items() if k in known_fields}
243
+ # Build filtered data dict (exclude model_requirement as it's handled separately)
244
+ scalar_fields = known_fields - {"model_requirement"}
245
+ filtered_data = {k: v for k, v in data.items() if k in scalar_fields}
246
+
247
+ # Set defaults for optional model/api_key_env when model_requirement is present
248
+ # Also handle explicit null values (YAML parses empty or null as None)
249
+ if "model" not in filtered_data or filtered_data["model"] is None:
250
+ filtered_data["model"] = ""
251
+ if "api_key_env" not in filtered_data or filtered_data["api_key_env"] is None:
252
+ filtered_data["api_key_env"] = ""
166
253
 
167
- # Create config with metadata
254
+ # Create config with metadata and model_requirement
168
255
  config = EvaluatorConfig(
169
256
  **filtered_data,
257
+ model_requirement=model_requirement,
170
258
  source="local",
171
259
  config_file=str(yml_file),
172
260
  )
@@ -0,0 +1,211 @@
1
+ """
2
+ Model resolver for evaluator configurations (ADV-0015: Model Routing Layer - Phase 1).
3
+
4
+ This module provides the ModelResolver class that resolves model requirements
5
+ to actual model IDs using an embedded registry. It supports:
6
+ - model_requirement field (new structured format)
7
+ - model field (legacy string format)
8
+ - Fallback from model_requirement to model on resolution failure
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import warnings
14
+ from typing import TYPE_CHECKING, ClassVar
15
+
16
+ if TYPE_CHECKING:
17
+ from adversarial_workflow.evaluators.config import EvaluatorConfig, ModelRequirement
18
+
19
+
20
+ class ResolutionError(Exception):
21
+ """Raised when model resolution fails."""
22
+
23
+
24
+ class ModelResolver:
25
+ """Resolves model requirements to actual model IDs.
26
+
27
+ Uses an embedded registry (matching adversarial-evaluator-library/providers/registry.yml)
28
+ to map family/tier pairs to concrete model identifiers.
29
+
30
+ Resolution order:
31
+ 1. If model_requirement present: resolve via registry
32
+ 2. If resolution fails AND model present: warn + fallback to legacy
33
+ 3. If resolution fails AND no model: raise ResolutionError
34
+ 4. If no model_requirement AND model present: use legacy directly
35
+ 5. If neither: raise ResolutionError
36
+ """
37
+
38
+ # Default registry - matches adversarial-evaluator-library/providers/registry.yml
39
+ # Updated 2026-02-03 per Library team handoff (ADR-0005)
40
+ DEFAULT_REGISTRY: ClassVar[dict[str, dict[str, dict[str, list[str] | str]]]] = {
41
+ "claude": {
42
+ "opus": {
43
+ "models": ["claude-4-opus-20260115", "claude-opus-4-5-20251101"],
44
+ "prefix": "anthropic/",
45
+ },
46
+ "sonnet": {
47
+ "models": ["claude-4-sonnet-20260115"],
48
+ "prefix": "anthropic/",
49
+ },
50
+ "haiku": {
51
+ "models": ["claude-4-haiku-20260115"],
52
+ "prefix": "anthropic/",
53
+ },
54
+ },
55
+ "gpt": {
56
+ "flagship": {
57
+ "models": ["gpt-4o", "gpt-4o-2024-08-06"],
58
+ "prefix": "",
59
+ },
60
+ "standard": {
61
+ "models": ["gpt-4-turbo", "gpt-4"],
62
+ "prefix": "",
63
+ },
64
+ "mini": {
65
+ "models": ["gpt-4o-mini"],
66
+ "prefix": "",
67
+ },
68
+ },
69
+ "o": {
70
+ "flagship": {
71
+ "models": ["o1", "o1-2024-12-17"],
72
+ "prefix": "",
73
+ },
74
+ "mini": {
75
+ "models": ["o3-mini"],
76
+ "prefix": "",
77
+ },
78
+ },
79
+ "gemini": {
80
+ "pro": {
81
+ "models": ["gemini-2.5-pro"],
82
+ "prefix": "gemini/",
83
+ },
84
+ "flash": {
85
+ "models": ["gemini-2.5-flash"],
86
+ "prefix": "gemini/",
87
+ },
88
+ },
89
+ "mistral": {
90
+ "large": {
91
+ "models": ["mistral-large-latest"],
92
+ "prefix": "mistral/",
93
+ },
94
+ "small": {
95
+ "models": ["mistral-small-latest"],
96
+ "prefix": "mistral/",
97
+ },
98
+ },
99
+ "codestral": {
100
+ "latest": {
101
+ "models": ["codestral-latest"],
102
+ "prefix": "mistral/",
103
+ },
104
+ },
105
+ "llama": {
106
+ "large": {
107
+ "models": ["llama-3.3-70b"],
108
+ "prefix": "", # varies by host
109
+ },
110
+ "medium": {
111
+ "models": ["llama-3.1-8b"],
112
+ "prefix": "",
113
+ },
114
+ },
115
+ }
116
+
117
+ # API key environment variable mapping by family
118
+ API_KEY_MAP: ClassVar[dict[str, str]] = {
119
+ "claude": "ANTHROPIC_API_KEY",
120
+ "gpt": "OPENAI_API_KEY",
121
+ "o": "OPENAI_API_KEY",
122
+ "gemini": "GEMINI_API_KEY",
123
+ "mistral": "MISTRAL_API_KEY",
124
+ "codestral": "MISTRAL_API_KEY",
125
+ "llama": "TOGETHER_API_KEY",
126
+ }
127
+
128
+ def resolve(self, config: EvaluatorConfig) -> tuple[str, str]:
129
+ """Resolve evaluator config to (model_id, api_key_env).
130
+
131
+ Args:
132
+ config: EvaluatorConfig with model and/or model_requirement
133
+
134
+ Returns:
135
+ (model_id, api_key_env) tuple
136
+
137
+ Raises:
138
+ ResolutionError: If resolution fails and no fallback available
139
+ """
140
+ if config.model_requirement:
141
+ try:
142
+ return self._resolve_requirement(config.model_requirement)
143
+ except ResolutionError as e:
144
+ if config.model:
145
+ # Fall back to legacy with warning
146
+ warnings.warn(
147
+ f"model_requirement resolution failed for {config.name}: {e}. "
148
+ f"Falling back to legacy model field: {config.model}",
149
+ UserWarning,
150
+ stacklevel=2,
151
+ )
152
+ return (config.model, config.api_key_env)
153
+ raise
154
+
155
+ # Legacy only
156
+ if config.model:
157
+ return (config.model, config.api_key_env)
158
+
159
+ raise ResolutionError("No model or model_requirement specified")
160
+
161
+ def _resolve_requirement(self, req: ModelRequirement) -> tuple[str, str]:
162
+ """Resolve requirement to model ID using registry.
163
+
164
+ Args:
165
+ req: ModelRequirement with family and tier
166
+
167
+ Returns:
168
+ (model_id, api_key_env) tuple
169
+
170
+ Raises:
171
+ ResolutionError: If family or tier not found in registry
172
+ """
173
+ # TODO(Phase 2): ModelRequirement.min_version and ModelRequirement.min_context
174
+ # are currently parsed but not used for filtering. Phase 1 only performs
175
+ # family/tier matching. Phase 2 will implement filtering by min_version
176
+ # and min_context requirements.
177
+ family = self.DEFAULT_REGISTRY.get(req.family)
178
+ if not family:
179
+ raise ResolutionError(f"Unknown model family: {req.family}")
180
+
181
+ tier_data = family.get(req.tier)
182
+ if not tier_data:
183
+ raise ResolutionError(f"Unknown tier '{req.tier}' for family '{req.family}'")
184
+
185
+ # Return first (latest) model in tier
186
+ models = tier_data.get("models", [])
187
+ if not models:
188
+ raise ResolutionError(f"No models defined for {req.family}/{req.tier}")
189
+ # Registry type is list[str] | str for flexibility; actual values are always lists
190
+ model_id = models[0] # type: ignore[index]
191
+
192
+ # Apply provider prefix for LiteLLM compatibility
193
+ prefix = tier_data.get("prefix", "")
194
+ if prefix:
195
+ model_id = f"{prefix}{model_id}"
196
+
197
+ # Determine API key env from family
198
+ api_key_env = self._get_api_key_env(req.family)
199
+
200
+ return (model_id, api_key_env)
201
+
202
+ def _get_api_key_env(self, family: str) -> str:
203
+ """Get default API key environment variable for family.
204
+
205
+ Args:
206
+ family: Model family name
207
+
208
+ Returns:
209
+ Environment variable name for API key
210
+ """
211
+ return self.API_KEY_MAP.get(family, f"{family.upper()}_API_KEY")
@@ -1,4 +1,9 @@
1
- """Generic evaluator runner."""
1
+ """Generic evaluator runner.
2
+
3
+ Supports dual-field model specification (ADV-0015):
4
+ - Legacy: model + api_key_env fields (backwards compatible)
5
+ - New: model_requirement field (resolved via ModelResolver)
6
+ """
2
7
 
3
8
  from __future__ import annotations
4
9
 
@@ -14,6 +19,7 @@ from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
14
19
  from ..utils.config import load_config
15
20
  from ..utils.validation import validate_evaluation_output
16
21
  from .config import EvaluatorConfig
22
+ from .resolver import ModelResolver, ResolutionError
17
23
 
18
24
 
19
25
  def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -43,20 +49,28 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
43
49
  return 1
44
50
  project_config = load_config()
45
51
 
46
- # 3. Check aider available
52
+ # 3. Resolve model (ADV-0015: dual-field support)
53
+ resolver = ModelResolver()
54
+ try:
55
+ resolved_model, resolved_api_key_env = resolver.resolve(config)
56
+ except ResolutionError as e:
57
+ print(f"{RED}Error: {e}{RESET}")
58
+ return 1
59
+
60
+ # 4. Check aider available
47
61
  if not shutil.which("aider"):
48
62
  print(f"{RED}Error: Aider not found{RESET}")
49
63
  _print_aider_help()
50
64
  return 1
51
65
 
52
- # 4. Check API key
53
- api_key = os.environ.get(config.api_key_env)
66
+ # 5. Check API key (using resolved api_key_env)
67
+ api_key = os.environ.get(resolved_api_key_env)
54
68
  if not api_key:
55
- print(f"{RED}Error: {config.api_key_env} not set{RESET}")
56
- print(f" Set in .env or export {config.api_key_env}=your-key")
69
+ print(f"{RED}Error: {resolved_api_key_env} not set{RESET}")
70
+ print(f" Set in .env or export {resolved_api_key_env}=your-key")
57
71
  return 1
58
72
 
59
- # 5. Pre-flight file size check
73
+ # 6. Pre-flight file size check
60
74
  line_count, estimated_tokens = _check_file_size(file_path)
61
75
  if line_count > 500 or estimated_tokens > 20000:
62
76
  _warn_large_file(line_count, estimated_tokens)
@@ -65,11 +79,11 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
65
79
  print("Evaluation cancelled.")
66
80
  return 0
67
81
 
68
- # 6. Determine execution method
82
+ # 7. Determine execution method
69
83
  if config.source == "builtin":
70
84
  return _run_builtin_evaluator(config, file_path, project_config, timeout)
71
85
  else:
72
- return _run_custom_evaluator(config, file_path, project_config, timeout)
86
+ return _run_custom_evaluator(config, file_path, project_config, timeout, resolved_model)
73
87
 
74
88
 
75
89
  def _run_builtin_evaluator(
@@ -99,8 +113,17 @@ def _run_custom_evaluator(
99
113
  file_path: str,
100
114
  project_config: dict,
101
115
  timeout: int,
116
+ resolved_model: str,
102
117
  ) -> int:
103
- """Run a custom evaluator by invoking aider directly."""
118
+ """Run a custom evaluator by invoking aider directly.
119
+
120
+ Args:
121
+ config: Evaluator configuration
122
+ file_path: Path to file to evaluate
123
+ project_config: Project configuration dict
124
+ timeout: Timeout in seconds
125
+ resolved_model: Resolved model ID from ModelResolver
126
+ """
104
127
  # Prepare output path
105
128
  logs_dir = Path(project_config["log_directory"])
106
129
  logs_dir.mkdir(parents=True, exist_ok=True)
@@ -131,13 +154,13 @@ def _run_custom_evaluator(
131
154
  prefix = config.log_prefix or config.name.upper()
132
155
 
133
156
  try:
134
- print(f"{prefix}: Using model {config.model}")
157
+ print(f"{prefix}: Using model {resolved_model}")
135
158
 
136
159
  # Build aider command
137
160
  cmd = [
138
161
  "aider",
139
162
  "--model",
140
- config.model,
163
+ resolved_model,
141
164
  "--yes",
142
165
  "--no-detect-urls",
143
166
  "--no-git",
@@ -168,7 +191,7 @@ def _run_custom_evaluator(
168
191
 
169
192
  **Source**: {file_path}
170
193
  **Evaluator**: {config.name}
171
- **Model**: {config.model}
194
+ **Model**: {resolved_model}
172
195
  **Generated**: {timestamp}
173
196
 
174
197
  ---
@@ -0,0 +1,56 @@
1
+ """Evaluator library client for adversarial-workflow.
2
+
3
+ This module provides functionality to browse, install, and update evaluator
4
+ configurations from the community adversarial-evaluator-library.
5
+
6
+ Philosophy: "Copy, Don't Link"
7
+ - Evaluators are copied to projects, not referenced at runtime
8
+ - Projects remain self-contained and work offline
9
+ - Users can customize their local copies freely
10
+ - Updates are explicit and user-controlled
11
+ """
12
+
13
+ from .cache import DEFAULT_CACHE_DIR, DEFAULT_CACHE_TTL, CacheManager
14
+ from .client import (
15
+ DEFAULT_LIBRARY_URL,
16
+ LibraryClient,
17
+ LibraryClientError,
18
+ NetworkError,
19
+ ParseError,
20
+ )
21
+ from .commands import (
22
+ library_check_updates,
23
+ library_info,
24
+ library_install,
25
+ library_list,
26
+ library_update,
27
+ )
28
+ from .config import LibraryConfig, get_library_config
29
+ from .models import EvaluatorEntry, IndexData, InstalledEvaluatorMeta, UpdateInfo
30
+
31
+ __all__ = [
32
+ # Client
33
+ "LibraryClient",
34
+ "LibraryClientError",
35
+ "NetworkError",
36
+ "ParseError",
37
+ "DEFAULT_LIBRARY_URL",
38
+ # Models
39
+ "EvaluatorEntry",
40
+ "IndexData",
41
+ "InstalledEvaluatorMeta",
42
+ "UpdateInfo",
43
+ # Cache
44
+ "CacheManager",
45
+ "DEFAULT_CACHE_DIR",
46
+ "DEFAULT_CACHE_TTL",
47
+ # Config
48
+ "LibraryConfig",
49
+ "get_library_config",
50
+ # Commands
51
+ "library_list",
52
+ "library_info",
53
+ "library_install",
54
+ "library_check_updates",
55
+ "library_update",
56
+ ]