adversarial-workflow 0.6.6__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/cli.py +351 -5
- adversarial_workflow/evaluators/__init__.py +11 -2
- adversarial_workflow/evaluators/config.py +39 -2
- adversarial_workflow/evaluators/discovery.py +97 -9
- adversarial_workflow/evaluators/resolver.py +211 -0
- adversarial_workflow/evaluators/runner.py +36 -13
- adversarial_workflow/library/__init__.py +56 -0
- adversarial_workflow/library/cache.py +184 -0
- adversarial_workflow/library/client.py +224 -0
- adversarial_workflow/library/commands.py +849 -0
- adversarial_workflow/library/config.py +81 -0
- adversarial_workflow/library/models.py +129 -0
- adversarial_workflow/utils/citations.py +643 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/METADATA +160 -3
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/RECORD +20 -12
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/WHEEL +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Cache management for the evaluator library client."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
# Default cache TTL: 1 hour (3600 seconds)
|
|
10
|
+
DEFAULT_CACHE_TTL = 3600
|
|
11
|
+
|
|
12
|
+
# Cache directory
|
|
13
|
+
DEFAULT_CACHE_DIR = Path.home() / ".cache" / "adversarial-workflow"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CacheManager:
|
|
17
|
+
"""Manages caching for the library client."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
cache_dir: Optional[Path] = None,
|
|
22
|
+
ttl: int = DEFAULT_CACHE_TTL,
|
|
23
|
+
):
|
|
24
|
+
"""
|
|
25
|
+
Initialize the cache manager.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
cache_dir: Directory to store cache files. Defaults to ~/.cache/adversarial-workflow
|
|
29
|
+
ttl: Time-to-live in seconds. Defaults to 3600 (1 hour).
|
|
30
|
+
"""
|
|
31
|
+
self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
|
|
32
|
+
self.ttl = ttl
|
|
33
|
+
self._ensure_cache_dir()
|
|
34
|
+
|
|
35
|
+
def _ensure_cache_dir(self) -> None:
|
|
36
|
+
"""Ensure the cache directory exists."""
|
|
37
|
+
try:
|
|
38
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
except OSError:
|
|
40
|
+
# If we can't create the cache dir, we'll operate without caching
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def _get_cache_path(self, key: str) -> Path:
|
|
44
|
+
"""Get the path for a cache entry."""
|
|
45
|
+
# Sanitize key for filesystem
|
|
46
|
+
safe_key = key.replace("/", "_").replace(":", "_")
|
|
47
|
+
return self.cache_dir / f"{safe_key}.json"
|
|
48
|
+
|
|
49
|
+
def _is_expired(self, cache_path: Path) -> bool:
|
|
50
|
+
"""Check if a cache entry is expired."""
|
|
51
|
+
if not cache_path.exists():
|
|
52
|
+
return True
|
|
53
|
+
try:
|
|
54
|
+
mtime = cache_path.stat().st_mtime
|
|
55
|
+
return (time.time() - mtime) > self.ttl
|
|
56
|
+
except OSError:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def get(self, key: str) -> Optional[Dict[str, Any]]:
|
|
60
|
+
"""
|
|
61
|
+
Get a value from the cache.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
key: The cache key.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The cached value, or None if not found or expired.
|
|
68
|
+
"""
|
|
69
|
+
cache_path = self._get_cache_path(key)
|
|
70
|
+
|
|
71
|
+
if not cache_path.exists():
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if self._is_expired(cache_path):
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
with open(cache_path, "r", encoding="utf-8") as f:
|
|
79
|
+
return json.load(f)
|
|
80
|
+
except (json.JSONDecodeError, OSError):
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def get_stale(self, key: str) -> Optional[Dict[str, Any]]:
|
|
84
|
+
"""
|
|
85
|
+
Get a value from the cache even if expired.
|
|
86
|
+
|
|
87
|
+
Useful for offline fallback scenarios.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
key: The cache key.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The cached value, or None if not found.
|
|
94
|
+
"""
|
|
95
|
+
cache_path = self._get_cache_path(key)
|
|
96
|
+
|
|
97
|
+
if not cache_path.exists():
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
with open(cache_path, "r", encoding="utf-8") as f:
|
|
102
|
+
return json.load(f)
|
|
103
|
+
except (json.JSONDecodeError, OSError):
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
def set(self, key: str, value: Dict[str, Any]) -> bool:
|
|
107
|
+
"""
|
|
108
|
+
Store a value in the cache.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
key: The cache key.
|
|
112
|
+
value: The value to cache.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if successfully cached, False otherwise.
|
|
116
|
+
"""
|
|
117
|
+
cache_path = self._get_cache_path(key)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
self._ensure_cache_dir()
|
|
121
|
+
with open(cache_path, "w", encoding="utf-8") as f:
|
|
122
|
+
json.dump(value, f, indent=2)
|
|
123
|
+
return True
|
|
124
|
+
except OSError:
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
def invalidate(self, key: str) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Invalidate a cache entry.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
key: The cache key.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
True if successfully invalidated, False otherwise.
|
|
136
|
+
"""
|
|
137
|
+
cache_path = self._get_cache_path(key)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
if cache_path.exists():
|
|
141
|
+
cache_path.unlink()
|
|
142
|
+
return True
|
|
143
|
+
except OSError:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
def clear(self) -> int:
|
|
147
|
+
"""
|
|
148
|
+
Clear all cache entries.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
The number of entries cleared.
|
|
152
|
+
"""
|
|
153
|
+
count = 0
|
|
154
|
+
try:
|
|
155
|
+
for cache_file in self.cache_dir.glob("*.json"):
|
|
156
|
+
try:
|
|
157
|
+
cache_file.unlink()
|
|
158
|
+
count += 1
|
|
159
|
+
except OSError:
|
|
160
|
+
pass
|
|
161
|
+
except OSError:
|
|
162
|
+
pass
|
|
163
|
+
return count
|
|
164
|
+
|
|
165
|
+
def get_age(self, key: str) -> Optional[float]:
|
|
166
|
+
"""
|
|
167
|
+
Get the age of a cache entry in seconds.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
key: The cache key.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Age in seconds, or None if not found.
|
|
174
|
+
"""
|
|
175
|
+
cache_path = self._get_cache_path(key)
|
|
176
|
+
|
|
177
|
+
if not cache_path.exists():
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
mtime = cache_path.stat().st_mtime
|
|
182
|
+
return time.time() - mtime
|
|
183
|
+
except OSError:
|
|
184
|
+
return None
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""HTTP client for the evaluator library."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import urllib.error
|
|
5
|
+
import urllib.request
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from .cache import CacheManager
|
|
10
|
+
from .config import LibraryConfig, get_library_config
|
|
11
|
+
from .models import IndexData
|
|
12
|
+
|
|
13
|
+
# Library repository URLs (used as fallback defaults)
|
|
14
|
+
# Note: DEFAULT_LIBRARY_URL uses 'main' branch; use ADVERSARIAL_LIBRARY_REF env var to override
|
|
15
|
+
DEFAULT_LIBRARY_URL_TEMPLATE = (
|
|
16
|
+
"https://raw.githubusercontent.com/movito/adversarial-evaluator-library/{ref}"
|
|
17
|
+
)
|
|
18
|
+
DEFAULT_LIBRARY_URL = DEFAULT_LIBRARY_URL_TEMPLATE.format(ref="main")
|
|
19
|
+
INDEX_PATH = "evaluators/index.json"
|
|
20
|
+
EVALUATOR_PATH_TEMPLATE = "evaluators/{provider}/{name}/evaluator.yml"
|
|
21
|
+
README_PATH_TEMPLATE = "evaluators/{provider}/{name}/README.md"
|
|
22
|
+
|
|
23
|
+
# HTTP settings
|
|
24
|
+
DEFAULT_TIMEOUT = 10 # seconds
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LibraryClientError(Exception):
|
|
28
|
+
"""Base exception for library client errors."""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class NetworkError(LibraryClientError):
|
|
34
|
+
"""Network-related errors."""
|
|
35
|
+
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ParseError(LibraryClientError):
|
|
40
|
+
"""Parsing errors for malformed responses."""
|
|
41
|
+
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LibraryClient:
|
|
46
|
+
"""Client for fetching evaluators from the community library."""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
base_url: Optional[str] = None,
|
|
51
|
+
cache_dir: Optional[Path] = None,
|
|
52
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
53
|
+
config: Optional[LibraryConfig] = None,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Initialize the library client.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
base_url: Base URL for the library repository. If not provided, uses config.
|
|
60
|
+
cache_dir: Directory for caching. If not provided, uses config.
|
|
61
|
+
timeout: HTTP timeout in seconds.
|
|
62
|
+
config: Optional LibraryConfig. If not provided, loads from get_library_config().
|
|
63
|
+
"""
|
|
64
|
+
# Load config if not provided
|
|
65
|
+
if config is None:
|
|
66
|
+
config = get_library_config()
|
|
67
|
+
|
|
68
|
+
# Store ref for potential use (e.g., logging, debugging)
|
|
69
|
+
self.ref = config.ref
|
|
70
|
+
|
|
71
|
+
# URL precedence: explicit arg > config.url (if customized) > default template
|
|
72
|
+
if base_url:
|
|
73
|
+
# Explicit base_url argument takes highest precedence
|
|
74
|
+
self.base_url = base_url.rstrip("/")
|
|
75
|
+
elif config.url != DEFAULT_LIBRARY_URL:
|
|
76
|
+
# User has customized the URL (via env var or config file), use it directly
|
|
77
|
+
self.base_url = config.url.rstrip("/")
|
|
78
|
+
else:
|
|
79
|
+
# Use default template with ref for branch switching
|
|
80
|
+
self.base_url = DEFAULT_LIBRARY_URL_TEMPLATE.format(ref=config.ref)
|
|
81
|
+
self.timeout = timeout
|
|
82
|
+
self.cache = CacheManager(
|
|
83
|
+
cache_dir=cache_dir or config.cache_dir,
|
|
84
|
+
ttl=config.cache_ttl,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _fetch_url(self, url: str) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Fetch content from a URL.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
url: The URL to fetch.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The response content as a string.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
NetworkError: If the request fails.
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
request = urllib.request.Request(
|
|
102
|
+
url,
|
|
103
|
+
headers={"User-Agent": "adversarial-workflow-library-client"},
|
|
104
|
+
)
|
|
105
|
+
with urllib.request.urlopen(request, timeout=self.timeout) as response:
|
|
106
|
+
return response.read().decode("utf-8")
|
|
107
|
+
except urllib.error.HTTPError as e:
|
|
108
|
+
# HTTPError must be caught before URLError (HTTPError subclasses URLError)
|
|
109
|
+
raise NetworkError(f"HTTP error {e.code} fetching {url}: {e.reason}") from e
|
|
110
|
+
except urllib.error.URLError as e:
|
|
111
|
+
raise NetworkError(f"Failed to fetch {url}: {e}") from e
|
|
112
|
+
except TimeoutError as e:
|
|
113
|
+
raise NetworkError(f"Timeout fetching {url}") from e
|
|
114
|
+
except OSError as e:
|
|
115
|
+
raise NetworkError(f"Network error fetching {url}: {e}") from e
|
|
116
|
+
|
|
117
|
+
def fetch_index(self, no_cache: bool = False) -> Tuple[IndexData, bool]:
|
|
118
|
+
"""
|
|
119
|
+
Fetch the library index.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
no_cache: If True, bypass the cache and fetch fresh data.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Tuple of (IndexData, from_cache) where from_cache indicates if
|
|
126
|
+
the data came from cache.
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
NetworkError: If the request fails and no cache is available.
|
|
130
|
+
ParseError: If the response cannot be parsed.
|
|
131
|
+
"""
|
|
132
|
+
cache_key = "library-index"
|
|
133
|
+
|
|
134
|
+
# Try cache first (unless no_cache is set)
|
|
135
|
+
if not no_cache:
|
|
136
|
+
cached_data = self.cache.get(cache_key)
|
|
137
|
+
if cached_data:
|
|
138
|
+
try:
|
|
139
|
+
return IndexData.from_dict(cached_data), True
|
|
140
|
+
except (KeyError, TypeError) as e:
|
|
141
|
+
# Cache data is invalid, will try to fetch fresh
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
# Fetch fresh data
|
|
145
|
+
url = f"{self.base_url}/{INDEX_PATH}"
|
|
146
|
+
try:
|
|
147
|
+
content = self._fetch_url(url)
|
|
148
|
+
data = json.loads(content)
|
|
149
|
+
except NetworkError:
|
|
150
|
+
# Try stale cache as fallback
|
|
151
|
+
stale_data = self.cache.get_stale(cache_key)
|
|
152
|
+
if stale_data:
|
|
153
|
+
try:
|
|
154
|
+
return IndexData.from_dict(stale_data), True
|
|
155
|
+
except (KeyError, TypeError):
|
|
156
|
+
pass
|
|
157
|
+
raise
|
|
158
|
+
except json.JSONDecodeError as e:
|
|
159
|
+
raise ParseError(f"Invalid JSON in index: {e}") from e
|
|
160
|
+
|
|
161
|
+
# Validate and parse
|
|
162
|
+
try:
|
|
163
|
+
index_data = IndexData.from_dict(data)
|
|
164
|
+
except (KeyError, TypeError) as e:
|
|
165
|
+
raise ParseError(f"Invalid index structure: {e}") from e
|
|
166
|
+
|
|
167
|
+
# Update cache
|
|
168
|
+
self.cache.set(cache_key, data)
|
|
169
|
+
|
|
170
|
+
return index_data, False
|
|
171
|
+
|
|
172
|
+
def fetch_evaluator(self, provider: str, name: str) -> str:
|
|
173
|
+
"""
|
|
174
|
+
Fetch an evaluator configuration.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
provider: The provider name (e.g., 'google', 'openai').
|
|
178
|
+
name: The evaluator name (e.g., 'gemini-flash').
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
The raw YAML content of the evaluator configuration.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
NetworkError: If the request fails.
|
|
185
|
+
"""
|
|
186
|
+
path = EVALUATOR_PATH_TEMPLATE.format(provider=provider, name=name)
|
|
187
|
+
url = f"{self.base_url}/{path}"
|
|
188
|
+
return self._fetch_url(url)
|
|
189
|
+
|
|
190
|
+
def fetch_readme(self, provider: str, name: str) -> Optional[str]:
|
|
191
|
+
"""
|
|
192
|
+
Fetch an evaluator's README.md for extended info.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
provider: The provider name (e.g., 'google', 'openai').
|
|
196
|
+
name: The evaluator name (e.g., 'gemini-flash').
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
The README content as a string, or None if not found.
|
|
200
|
+
"""
|
|
201
|
+
path = README_PATH_TEMPLATE.format(provider=provider, name=name)
|
|
202
|
+
url = f"{self.base_url}/{path}"
|
|
203
|
+
try:
|
|
204
|
+
return self._fetch_url(url)
|
|
205
|
+
except NetworkError:
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
def get_cache_age(self) -> Optional[float]:
|
|
209
|
+
"""
|
|
210
|
+
Get the age of the cached index in seconds.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Age in seconds, or None if not cached.
|
|
214
|
+
"""
|
|
215
|
+
return self.cache.get_age("library-index")
|
|
216
|
+
|
|
217
|
+
def clear_cache(self) -> int:
|
|
218
|
+
"""
|
|
219
|
+
Clear all cached data.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Number of cache entries cleared.
|
|
223
|
+
"""
|
|
224
|
+
return self.cache.clear()
|