adversarial-workflow 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ """
2
+ Model resolver for evaluator configurations (ADV-0015: Model Routing Layer - Phase 1).
3
+
4
+ This module provides the ModelResolver class that resolves model requirements
5
+ to actual model IDs using an embedded registry. It supports:
6
+ - model_requirement field (new structured format)
7
+ - model field (legacy string format)
8
+ - Fallback from model_requirement to model on resolution failure
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import warnings
14
+ from typing import TYPE_CHECKING, ClassVar
15
+
16
+ if TYPE_CHECKING:
17
+ from adversarial_workflow.evaluators.config import EvaluatorConfig, ModelRequirement
18
+
19
+
20
+ class ResolutionError(Exception):
21
+ """Raised when model resolution fails."""
22
+
23
+
24
+ class ModelResolver:
25
+ """Resolves model requirements to actual model IDs.
26
+
27
+ Uses an embedded registry (matching adversarial-evaluator-library/providers/registry.yml)
28
+ to map family/tier pairs to concrete model identifiers.
29
+
30
+ Resolution order:
31
+ 1. If model_requirement present: resolve via registry
32
+ 2. If resolution fails AND model present: warn + fallback to legacy
33
+ 3. If resolution fails AND no model: raise ResolutionError
34
+ 4. If no model_requirement AND model present: use legacy directly
35
+ 5. If neither: raise ResolutionError
36
+ """
37
+
38
+ # Default registry - matches adversarial-evaluator-library/providers/registry.yml
39
+ # Updated 2026-02-03 per Library team handoff (ADR-0005)
40
+ DEFAULT_REGISTRY: ClassVar[dict[str, dict[str, dict[str, list[str] | str]]]] = {
41
+ "claude": {
42
+ "opus": {
43
+ "models": ["claude-4-opus-20260115", "claude-opus-4-5-20251101"],
44
+ "prefix": "anthropic/",
45
+ },
46
+ "sonnet": {
47
+ "models": ["claude-4-sonnet-20260115"],
48
+ "prefix": "anthropic/",
49
+ },
50
+ "haiku": {
51
+ "models": ["claude-4-haiku-20260115"],
52
+ "prefix": "anthropic/",
53
+ },
54
+ },
55
+ "gpt": {
56
+ "flagship": {
57
+ "models": ["gpt-4o", "gpt-4o-2024-08-06"],
58
+ "prefix": "",
59
+ },
60
+ "standard": {
61
+ "models": ["gpt-4-turbo", "gpt-4"],
62
+ "prefix": "",
63
+ },
64
+ "mini": {
65
+ "models": ["gpt-4o-mini"],
66
+ "prefix": "",
67
+ },
68
+ },
69
+ "o": {
70
+ "flagship": {
71
+ "models": ["o1", "o1-2024-12-17"],
72
+ "prefix": "",
73
+ },
74
+ "mini": {
75
+ "models": ["o3-mini"],
76
+ "prefix": "",
77
+ },
78
+ },
79
+ "gemini": {
80
+ "pro": {
81
+ "models": ["gemini-2.5-pro"],
82
+ "prefix": "gemini/",
83
+ },
84
+ "flash": {
85
+ "models": ["gemini-2.5-flash"],
86
+ "prefix": "gemini/",
87
+ },
88
+ },
89
+ "mistral": {
90
+ "large": {
91
+ "models": ["mistral-large-latest"],
92
+ "prefix": "mistral/",
93
+ },
94
+ "small": {
95
+ "models": ["mistral-small-latest"],
96
+ "prefix": "mistral/",
97
+ },
98
+ },
99
+ "codestral": {
100
+ "latest": {
101
+ "models": ["codestral-latest"],
102
+ "prefix": "mistral/",
103
+ },
104
+ },
105
+ "llama": {
106
+ "large": {
107
+ "models": ["llama-3.3-70b"],
108
+ "prefix": "", # varies by host
109
+ },
110
+ "medium": {
111
+ "models": ["llama-3.1-8b"],
112
+ "prefix": "",
113
+ },
114
+ },
115
+ }
116
+
117
+ # API key environment variable mapping by family
118
+ API_KEY_MAP: ClassVar[dict[str, str]] = {
119
+ "claude": "ANTHROPIC_API_KEY",
120
+ "gpt": "OPENAI_API_KEY",
121
+ "o": "OPENAI_API_KEY",
122
+ "gemini": "GEMINI_API_KEY",
123
+ "mistral": "MISTRAL_API_KEY",
124
+ "codestral": "MISTRAL_API_KEY",
125
+ "llama": "TOGETHER_API_KEY",
126
+ }
127
+
128
+ def resolve(self, config: EvaluatorConfig) -> tuple[str, str]:
129
+ """Resolve evaluator config to (model_id, api_key_env).
130
+
131
+ Args:
132
+ config: EvaluatorConfig with model and/or model_requirement
133
+
134
+ Returns:
135
+ (model_id, api_key_env) tuple
136
+
137
+ Raises:
138
+ ResolutionError: If resolution fails and no fallback available
139
+ """
140
+ if config.model_requirement:
141
+ try:
142
+ return self._resolve_requirement(config.model_requirement)
143
+ except ResolutionError as e:
144
+ if config.model:
145
+ # Fall back to legacy with warning
146
+ warnings.warn(
147
+ f"model_requirement resolution failed for {config.name}: {e}. "
148
+ f"Falling back to legacy model field: {config.model}",
149
+ UserWarning,
150
+ stacklevel=2,
151
+ )
152
+ return (config.model, config.api_key_env)
153
+ raise
154
+
155
+ # Legacy only
156
+ if config.model:
157
+ return (config.model, config.api_key_env)
158
+
159
+ raise ResolutionError("No model or model_requirement specified")
160
+
161
+ def _resolve_requirement(self, req: ModelRequirement) -> tuple[str, str]:
162
+ """Resolve requirement to model ID using registry.
163
+
164
+ Args:
165
+ req: ModelRequirement with family and tier
166
+
167
+ Returns:
168
+ (model_id, api_key_env) tuple
169
+
170
+ Raises:
171
+ ResolutionError: If family or tier not found in registry
172
+ """
173
+ # TODO(Phase 2): ModelRequirement.min_version and ModelRequirement.min_context
174
+ # are currently parsed but not used for filtering. Phase 1 only performs
175
+ # family/tier matching. Phase 2 will implement filtering by min_version
176
+ # and min_context requirements.
177
+ family = self.DEFAULT_REGISTRY.get(req.family)
178
+ if not family:
179
+ raise ResolutionError(f"Unknown model family: {req.family}")
180
+
181
+ tier_data = family.get(req.tier)
182
+ if not tier_data:
183
+ raise ResolutionError(f"Unknown tier '{req.tier}' for family '{req.family}'")
184
+
185
+ # Return first (latest) model in tier
186
+ models = tier_data.get("models", [])
187
+ if not models:
188
+ raise ResolutionError(f"No models defined for {req.family}/{req.tier}")
189
+ # Registry type is list[str] | str for flexibility; actual values are always lists
190
+ model_id = models[0] # type: ignore[index]
191
+
192
+ # Apply provider prefix for LiteLLM compatibility
193
+ prefix = tier_data.get("prefix", "")
194
+ if prefix:
195
+ model_id = f"{prefix}{model_id}"
196
+
197
+ # Determine API key env from family
198
+ api_key_env = self._get_api_key_env(req.family)
199
+
200
+ return (model_id, api_key_env)
201
+
202
+ def _get_api_key_env(self, family: str) -> str:
203
+ """Get default API key environment variable for family.
204
+
205
+ Args:
206
+ family: Model family name
207
+
208
+ Returns:
209
+ Environment variable name for API key
210
+ """
211
+ return self.API_KEY_MAP.get(family, f"{family.upper()}_API_KEY")
@@ -1,4 +1,9 @@
1
- """Generic evaluator runner."""
1
+ """Generic evaluator runner.
2
+
3
+ Supports dual-field model specification (ADV-0015):
4
+ - Legacy: model + api_key_env fields (backwards compatible)
5
+ - New: model_requirement field (resolved via ModelResolver)
6
+ """
2
7
 
3
8
  from __future__ import annotations
4
9
 
@@ -14,6 +19,7 @@ from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
14
19
  from ..utils.config import load_config
15
20
  from ..utils.validation import validate_evaluation_output
16
21
  from .config import EvaluatorConfig
22
+ from .resolver import ModelResolver, ResolutionError
17
23
 
18
24
 
19
25
  def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -43,20 +49,28 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
43
49
  return 1
44
50
  project_config = load_config()
45
51
 
46
- # 3. Check aider available
52
+ # 3. Resolve model (ADV-0015: dual-field support)
53
+ resolver = ModelResolver()
54
+ try:
55
+ resolved_model, resolved_api_key_env = resolver.resolve(config)
56
+ except ResolutionError as e:
57
+ print(f"{RED}Error: {e}{RESET}")
58
+ return 1
59
+
60
+ # 4. Check aider available
47
61
  if not shutil.which("aider"):
48
62
  print(f"{RED}Error: Aider not found{RESET}")
49
63
  _print_aider_help()
50
64
  return 1
51
65
 
52
- # 4. Check API key
53
- api_key = os.environ.get(config.api_key_env)
66
+ # 5. Check API key (using resolved api_key_env)
67
+ api_key = os.environ.get(resolved_api_key_env)
54
68
  if not api_key:
55
- print(f"{RED}Error: {config.api_key_env} not set{RESET}")
56
- print(f" Set in .env or export {config.api_key_env}=your-key")
69
+ print(f"{RED}Error: {resolved_api_key_env} not set{RESET}")
70
+ print(f" Set in .env or export {resolved_api_key_env}=your-key")
57
71
  return 1
58
72
 
59
- # 5. Pre-flight file size check
73
+ # 6. Pre-flight file size check
60
74
  line_count, estimated_tokens = _check_file_size(file_path)
61
75
  if line_count > 500 or estimated_tokens > 20000:
62
76
  _warn_large_file(line_count, estimated_tokens)
@@ -65,11 +79,11 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
65
79
  print("Evaluation cancelled.")
66
80
  return 0
67
81
 
68
- # 6. Determine execution method
82
+ # 7. Determine execution method
69
83
  if config.source == "builtin":
70
84
  return _run_builtin_evaluator(config, file_path, project_config, timeout)
71
85
  else:
72
- return _run_custom_evaluator(config, file_path, project_config, timeout)
86
+ return _run_custom_evaluator(config, file_path, project_config, timeout, resolved_model)
73
87
 
74
88
 
75
89
  def _run_builtin_evaluator(
@@ -99,8 +113,17 @@ def _run_custom_evaluator(
99
113
  file_path: str,
100
114
  project_config: dict,
101
115
  timeout: int,
116
+ resolved_model: str,
102
117
  ) -> int:
103
- """Run a custom evaluator by invoking aider directly."""
118
+ """Run a custom evaluator by invoking aider directly.
119
+
120
+ Args:
121
+ config: Evaluator configuration
122
+ file_path: Path to file to evaluate
123
+ project_config: Project configuration dict
124
+ timeout: Timeout in seconds
125
+ resolved_model: Resolved model ID from ModelResolver
126
+ """
104
127
  # Prepare output path
105
128
  logs_dir = Path(project_config["log_directory"])
106
129
  logs_dir.mkdir(parents=True, exist_ok=True)
@@ -131,13 +154,13 @@ def _run_custom_evaluator(
131
154
  prefix = config.log_prefix or config.name.upper()
132
155
 
133
156
  try:
134
- print(f"{prefix}: Using model {config.model}")
157
+ print(f"{prefix}: Using model {resolved_model}")
135
158
 
136
159
  # Build aider command
137
160
  cmd = [
138
161
  "aider",
139
162
  "--model",
140
- config.model,
163
+ resolved_model,
141
164
  "--yes",
142
165
  "--no-detect-urls",
143
166
  "--no-git",
@@ -168,7 +191,7 @@ def _run_custom_evaluator(
168
191
 
169
192
  **Source**: {file_path}
170
193
  **Evaluator**: {config.name}
171
- **Model**: {config.model}
194
+ **Model**: {resolved_model}
172
195
  **Generated**: {timestamp}
173
196
 
174
197
  ---
@@ -0,0 +1,56 @@
1
+ """Evaluator library client for adversarial-workflow.
2
+
3
+ This module provides functionality to browse, install, and update evaluator
4
+ configurations from the community adversarial-evaluator-library.
5
+
6
+ Philosophy: "Copy, Don't Link"
7
+ - Evaluators are copied to projects, not referenced at runtime
8
+ - Projects remain self-contained and work offline
9
+ - Users can customize their local copies freely
10
+ - Updates are explicit and user-controlled
11
+ """
12
+
13
+ from .cache import DEFAULT_CACHE_DIR, DEFAULT_CACHE_TTL, CacheManager
14
+ from .client import (
15
+ DEFAULT_LIBRARY_URL,
16
+ LibraryClient,
17
+ LibraryClientError,
18
+ NetworkError,
19
+ ParseError,
20
+ )
21
+ from .commands import (
22
+ library_check_updates,
23
+ library_info,
24
+ library_install,
25
+ library_list,
26
+ library_update,
27
+ )
28
+ from .config import LibraryConfig, get_library_config
29
+ from .models import EvaluatorEntry, IndexData, InstalledEvaluatorMeta, UpdateInfo
30
+
31
+ __all__ = [
32
+ # Client
33
+ "LibraryClient",
34
+ "LibraryClientError",
35
+ "NetworkError",
36
+ "ParseError",
37
+ "DEFAULT_LIBRARY_URL",
38
+ # Models
39
+ "EvaluatorEntry",
40
+ "IndexData",
41
+ "InstalledEvaluatorMeta",
42
+ "UpdateInfo",
43
+ # Cache
44
+ "CacheManager",
45
+ "DEFAULT_CACHE_DIR",
46
+ "DEFAULT_CACHE_TTL",
47
+ # Config
48
+ "LibraryConfig",
49
+ "get_library_config",
50
+ # Commands
51
+ "library_list",
52
+ "library_info",
53
+ "library_install",
54
+ "library_check_updates",
55
+ "library_update",
56
+ ]
@@ -0,0 +1,184 @@
1
+ """Cache management for the evaluator library client."""
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Optional
8
+
9
+ # Default cache TTL: 1 hour (3600 seconds)
10
+ DEFAULT_CACHE_TTL = 3600
11
+
12
+ # Cache directory
13
+ DEFAULT_CACHE_DIR = Path.home() / ".cache" / "adversarial-workflow"
14
+
15
+
16
+ class CacheManager:
17
+ """Manages caching for the library client."""
18
+
19
+ def __init__(
20
+ self,
21
+ cache_dir: Optional[Path] = None,
22
+ ttl: int = DEFAULT_CACHE_TTL,
23
+ ):
24
+ """
25
+ Initialize the cache manager.
26
+
27
+ Args:
28
+ cache_dir: Directory to store cache files. Defaults to ~/.cache/adversarial-workflow
29
+ ttl: Time-to-live in seconds. Defaults to 3600 (1 hour).
30
+ """
31
+ self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
32
+ self.ttl = ttl
33
+ self._ensure_cache_dir()
34
+
35
+ def _ensure_cache_dir(self) -> None:
36
+ """Ensure the cache directory exists."""
37
+ try:
38
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
39
+ except OSError:
40
+ # If we can't create the cache dir, we'll operate without caching
41
+ pass
42
+
43
+ def _get_cache_path(self, key: str) -> Path:
44
+ """Get the path for a cache entry."""
45
+ # Sanitize key for filesystem
46
+ safe_key = key.replace("/", "_").replace(":", "_")
47
+ return self.cache_dir / f"{safe_key}.json"
48
+
49
+ def _is_expired(self, cache_path: Path) -> bool:
50
+ """Check if a cache entry is expired."""
51
+ if not cache_path.exists():
52
+ return True
53
+ try:
54
+ mtime = cache_path.stat().st_mtime
55
+ return (time.time() - mtime) > self.ttl
56
+ except OSError:
57
+ return True
58
+
59
+ def get(self, key: str) -> Optional[Dict[str, Any]]:
60
+ """
61
+ Get a value from the cache.
62
+
63
+ Args:
64
+ key: The cache key.
65
+
66
+ Returns:
67
+ The cached value, or None if not found or expired.
68
+ """
69
+ cache_path = self._get_cache_path(key)
70
+
71
+ if not cache_path.exists():
72
+ return None
73
+
74
+ if self._is_expired(cache_path):
75
+ return None
76
+
77
+ try:
78
+ with open(cache_path, "r", encoding="utf-8") as f:
79
+ return json.load(f)
80
+ except (json.JSONDecodeError, OSError):
81
+ return None
82
+
83
+ def get_stale(self, key: str) -> Optional[Dict[str, Any]]:
84
+ """
85
+ Get a value from the cache even if expired.
86
+
87
+ Useful for offline fallback scenarios.
88
+
89
+ Args:
90
+ key: The cache key.
91
+
92
+ Returns:
93
+ The cached value, or None if not found.
94
+ """
95
+ cache_path = self._get_cache_path(key)
96
+
97
+ if not cache_path.exists():
98
+ return None
99
+
100
+ try:
101
+ with open(cache_path, "r", encoding="utf-8") as f:
102
+ return json.load(f)
103
+ except (json.JSONDecodeError, OSError):
104
+ return None
105
+
106
+ def set(self, key: str, value: Dict[str, Any]) -> bool:
107
+ """
108
+ Store a value in the cache.
109
+
110
+ Args:
111
+ key: The cache key.
112
+ value: The value to cache.
113
+
114
+ Returns:
115
+ True if successfully cached, False otherwise.
116
+ """
117
+ cache_path = self._get_cache_path(key)
118
+
119
+ try:
120
+ self._ensure_cache_dir()
121
+ with open(cache_path, "w", encoding="utf-8") as f:
122
+ json.dump(value, f, indent=2)
123
+ return True
124
+ except OSError:
125
+ return False
126
+
127
+ def invalidate(self, key: str) -> bool:
128
+ """
129
+ Invalidate a cache entry.
130
+
131
+ Args:
132
+ key: The cache key.
133
+
134
+ Returns:
135
+ True if successfully invalidated, False otherwise.
136
+ """
137
+ cache_path = self._get_cache_path(key)
138
+
139
+ try:
140
+ if cache_path.exists():
141
+ cache_path.unlink()
142
+ return True
143
+ except OSError:
144
+ return False
145
+
146
+ def clear(self) -> int:
147
+ """
148
+ Clear all cache entries.
149
+
150
+ Returns:
151
+ The number of entries cleared.
152
+ """
153
+ count = 0
154
+ try:
155
+ for cache_file in self.cache_dir.glob("*.json"):
156
+ try:
157
+ cache_file.unlink()
158
+ count += 1
159
+ except OSError:
160
+ pass
161
+ except OSError:
162
+ pass
163
+ return count
164
+
165
+ def get_age(self, key: str) -> Optional[float]:
166
+ """
167
+ Get the age of a cache entry in seconds.
168
+
169
+ Args:
170
+ key: The cache key.
171
+
172
+ Returns:
173
+ Age in seconds, or None if not found.
174
+ """
175
+ cache_path = self._get_cache_path(key)
176
+
177
+ if not cache_path.exists():
178
+ return None
179
+
180
+ try:
181
+ mtime = cache_path.stat().st_mtime
182
+ return time.time() - mtime
183
+ except OSError:
184
+ return None