adversarial-workflow 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/cli.py +193 -5
- adversarial_workflow/evaluators/__init__.py +11 -2
- adversarial_workflow/evaluators/config.py +39 -2
- adversarial_workflow/evaluators/discovery.py +97 -9
- adversarial_workflow/evaluators/resolver.py +211 -0
- adversarial_workflow/evaluators/runner.py +36 -13
- adversarial_workflow/library/__init__.py +56 -0
- adversarial_workflow/library/cache.py +184 -0
- adversarial_workflow/library/client.py +224 -0
- adversarial_workflow/library/commands.py +849 -0
- adversarial_workflow/library/config.py +81 -0
- adversarial_workflow/library/models.py +129 -0
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/METADATA +158 -3
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/RECORD +19 -12
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/WHEEL +0 -0
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.7.0.dist-info → adversarial_workflow-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,849 @@
|
|
|
1
|
+
"""CLI commands for the evaluator library."""
|
|
2
|
+
|
|
3
|
+
import difflib
|
|
4
|
+
import sys
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from .client import LibraryClient, LibraryClientError, NetworkError, ParseError
|
|
12
|
+
from .models import IndexData, InstalledEvaluatorMeta, UpdateInfo
|
|
13
|
+
|
|
14
|
+
# ANSI color codes (matching cli.py)
|
|
15
|
+
RESET = "\033[0m"
|
|
16
|
+
BOLD = "\033[1m"
|
|
17
|
+
GREEN = "\033[92m"
|
|
18
|
+
YELLOW = "\033[93m"
|
|
19
|
+
RED = "\033[91m"
|
|
20
|
+
CYAN = "\033[96m"
|
|
21
|
+
GRAY = "\033[90m"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_evaluators_dir() -> Path:
|
|
25
|
+
"""Get the evaluators directory for the current project."""
|
|
26
|
+
return Path.cwd() / ".adversarial" / "evaluators"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def format_table(
|
|
30
|
+
headers: List[str], rows: List[List[str]], widths: Optional[List[int]] = None
|
|
31
|
+
) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Format data as a simple table.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
headers: Column headers.
|
|
37
|
+
rows: List of row data (each row is a list of strings).
|
|
38
|
+
widths: Optional column widths. If None, auto-calculated.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Formatted table string.
|
|
42
|
+
"""
|
|
43
|
+
if not widths:
|
|
44
|
+
# Calculate widths from data
|
|
45
|
+
widths = [len(h) for h in headers]
|
|
46
|
+
for row in rows:
|
|
47
|
+
for i, cell in enumerate(row):
|
|
48
|
+
if i < len(widths):
|
|
49
|
+
widths[i] = max(widths[i], len(str(cell)))
|
|
50
|
+
|
|
51
|
+
# Format header
|
|
52
|
+
header_line = " ".join(h.ljust(w) for h, w in zip(headers, widths))
|
|
53
|
+
lines = [header_line]
|
|
54
|
+
|
|
55
|
+
# Format rows
|
|
56
|
+
for row in rows:
|
|
57
|
+
row_line = " ".join(str(c).ljust(w) for c, w in zip(row, widths))
|
|
58
|
+
lines.append(row_line)
|
|
59
|
+
|
|
60
|
+
return "\n".join(lines)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def generate_provenance_header(provider: str, name: str, version: str) -> str:
|
|
64
|
+
"""Generate the provenance header for installed evaluators."""
|
|
65
|
+
timestamp = datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
|
|
66
|
+
return f"""# Installed from adversarial-evaluator-library
|
|
67
|
+
# Source: {provider}/{name}
|
|
68
|
+
# Version: {version}
|
|
69
|
+
# Installed: {timestamp}
|
|
70
|
+
#
|
|
71
|
+
# To check for updates: adversarial library check-updates
|
|
72
|
+
# To update: adversarial library update {name}
|
|
73
|
+
#
|
|
74
|
+
# Feel free to edit this file - it's yours now!
|
|
75
|
+
|
|
76
|
+
_meta:
|
|
77
|
+
source: adversarial-evaluator-library
|
|
78
|
+
source_path: {provider}/{name}
|
|
79
|
+
version: "{version}"
|
|
80
|
+
installed: "{timestamp}"
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def scan_installed_evaluators() -> List[InstalledEvaluatorMeta]:
|
|
86
|
+
"""
|
|
87
|
+
Scan the evaluators directory for installed library evaluators.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of metadata for installed evaluators with _meta blocks.
|
|
91
|
+
"""
|
|
92
|
+
evaluators_dir = get_evaluators_dir()
|
|
93
|
+
if not evaluators_dir.exists():
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
installed = []
|
|
97
|
+
for yaml_file in evaluators_dir.glob("*.yml"):
|
|
98
|
+
try:
|
|
99
|
+
with open(yaml_file, "r", encoding="utf-8") as f:
|
|
100
|
+
data = yaml.safe_load(f)
|
|
101
|
+
|
|
102
|
+
if data and "_meta" in data:
|
|
103
|
+
meta = InstalledEvaluatorMeta.from_dict(data["_meta"])
|
|
104
|
+
if meta and meta.source == "adversarial-evaluator-library":
|
|
105
|
+
meta.file_path = str(yaml_file) # Track file path for updates
|
|
106
|
+
installed.append(meta)
|
|
107
|
+
except (yaml.YAMLError, OSError):
|
|
108
|
+
# Skip files that can't be parsed
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
return installed
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def library_list(
|
|
115
|
+
provider: Optional[str] = None,
|
|
116
|
+
category: Optional[str] = None,
|
|
117
|
+
verbose: bool = False,
|
|
118
|
+
no_cache: bool = False,
|
|
119
|
+
) -> int:
|
|
120
|
+
"""
|
|
121
|
+
List available evaluators from the library.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
provider: Filter by provider name.
|
|
125
|
+
category: Filter by category name.
|
|
126
|
+
verbose: Show detailed information.
|
|
127
|
+
no_cache: Bypass cache and fetch fresh data.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Exit code (0 for success, 1 for error).
|
|
131
|
+
"""
|
|
132
|
+
client = LibraryClient()
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
index, from_cache = client.fetch_index(no_cache=no_cache)
|
|
136
|
+
except NetworkError as e:
|
|
137
|
+
print(f"{RED}Error: Network unavailable{RESET}")
|
|
138
|
+
print(f" {e}")
|
|
139
|
+
print()
|
|
140
|
+
print("Check your internet connection and try again.")
|
|
141
|
+
print(f"Or use {CYAN}--no-cache{RESET} to force a fresh fetch.")
|
|
142
|
+
return 1
|
|
143
|
+
except ParseError as e:
|
|
144
|
+
print(f"{RED}Error: Could not parse library index{RESET}")
|
|
145
|
+
print(f" {e}")
|
|
146
|
+
return 1
|
|
147
|
+
|
|
148
|
+
# Filter evaluators
|
|
149
|
+
evaluators = index.evaluators
|
|
150
|
+
|
|
151
|
+
if provider:
|
|
152
|
+
evaluators = [e for e in evaluators if e.provider == provider]
|
|
153
|
+
if not evaluators:
|
|
154
|
+
print(f"{YELLOW}No evaluators found for provider: {provider}{RESET}")
|
|
155
|
+
print()
|
|
156
|
+
print("Available providers:")
|
|
157
|
+
providers = sorted(set(e.provider for e in index.evaluators))
|
|
158
|
+
for p in providers:
|
|
159
|
+
print(f" - {p}")
|
|
160
|
+
return 1
|
|
161
|
+
|
|
162
|
+
if category:
|
|
163
|
+
evaluators = [e for e in evaluators if e.category == category]
|
|
164
|
+
if not evaluators:
|
|
165
|
+
print(f"{YELLOW}No evaluators found for category: {category}{RESET}")
|
|
166
|
+
print()
|
|
167
|
+
print("Available categories:")
|
|
168
|
+
for cat_name, cat_desc in sorted(index.categories.items()):
|
|
169
|
+
print(f" - {cat_name}: {cat_desc}")
|
|
170
|
+
return 1
|
|
171
|
+
|
|
172
|
+
# Print header
|
|
173
|
+
cache_note = f" {GRAY}(cached){RESET}" if from_cache else ""
|
|
174
|
+
print()
|
|
175
|
+
print(
|
|
176
|
+
f"{BOLD}Available evaluators from adversarial-evaluator-library (v{index.version}){RESET}{cache_note}"
|
|
177
|
+
)
|
|
178
|
+
print()
|
|
179
|
+
|
|
180
|
+
if verbose:
|
|
181
|
+
# Detailed view
|
|
182
|
+
for e in evaluators:
|
|
183
|
+
print(f"{CYAN}{e.provider}/{e.name}{RESET}")
|
|
184
|
+
print(f" Model: {e.model}")
|
|
185
|
+
print(f" Category: {e.category}")
|
|
186
|
+
print(f" Description: {e.description}")
|
|
187
|
+
print()
|
|
188
|
+
else:
|
|
189
|
+
# Table view
|
|
190
|
+
headers = ["PROVIDER", "NAME", "CATEGORY", "DESCRIPTION"]
|
|
191
|
+
rows = []
|
|
192
|
+
for e in evaluators:
|
|
193
|
+
# Truncate description if too long
|
|
194
|
+
desc = e.description
|
|
195
|
+
if len(desc) > 40:
|
|
196
|
+
desc = desc[:37] + "..."
|
|
197
|
+
rows.append([e.provider, e.name, e.category, desc])
|
|
198
|
+
|
|
199
|
+
print(format_table(headers, rows))
|
|
200
|
+
print()
|
|
201
|
+
|
|
202
|
+
# Summary
|
|
203
|
+
count = len(evaluators)
|
|
204
|
+
total = len(index.evaluators)
|
|
205
|
+
if provider or category:
|
|
206
|
+
print(f"{count} evaluators shown (of {total} total).")
|
|
207
|
+
else:
|
|
208
|
+
print(f"{count} evaluators available.")
|
|
209
|
+
print()
|
|
210
|
+
print(f"Use '{CYAN}adversarial library install <provider>/<name>{RESET}' to install.")
|
|
211
|
+
|
|
212
|
+
return 0
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def library_info(evaluator_spec: str) -> int:
|
|
216
|
+
"""
|
|
217
|
+
Show detailed information about a library evaluator.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
evaluator_spec: Evaluator in 'provider/name' format.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Exit code (0 for success, 1 for error).
|
|
224
|
+
"""
|
|
225
|
+
client = LibraryClient()
|
|
226
|
+
|
|
227
|
+
# Parse spec
|
|
228
|
+
parts = evaluator_spec.split("/")
|
|
229
|
+
if len(parts) != 2:
|
|
230
|
+
print(f"{RED}Error: Invalid format. Use provider/name (e.g., google/gemini-flash){RESET}")
|
|
231
|
+
return 1
|
|
232
|
+
|
|
233
|
+
provider, name = parts
|
|
234
|
+
|
|
235
|
+
# Fetch index
|
|
236
|
+
try:
|
|
237
|
+
index, _ = client.fetch_index()
|
|
238
|
+
except NetworkError as e:
|
|
239
|
+
print(f"{RED}Error: Network unavailable{RESET}")
|
|
240
|
+
print(f" {e}")
|
|
241
|
+
return 1
|
|
242
|
+
except ParseError as e:
|
|
243
|
+
print(f"{RED}Error: Could not parse library index{RESET}")
|
|
244
|
+
print(f" {e}")
|
|
245
|
+
return 1
|
|
246
|
+
|
|
247
|
+
# Get evaluator entry
|
|
248
|
+
entry = index.get_evaluator(provider, name)
|
|
249
|
+
if not entry:
|
|
250
|
+
print(f"{RED}Error: Evaluator not found: {evaluator_spec}{RESET}")
|
|
251
|
+
print()
|
|
252
|
+
print("Use 'adversarial library list' to see available evaluators.")
|
|
253
|
+
return 1
|
|
254
|
+
|
|
255
|
+
# Display basic info from index
|
|
256
|
+
print()
|
|
257
|
+
print(f"{BOLD}{provider}/{name}{RESET}")
|
|
258
|
+
print()
|
|
259
|
+
print(f"Version: {index.version}")
|
|
260
|
+
print(f"Provider: {provider}")
|
|
261
|
+
print(f"Model: {entry.model}")
|
|
262
|
+
print(f"Category: {entry.category}")
|
|
263
|
+
print()
|
|
264
|
+
print("Description:")
|
|
265
|
+
print(f" {entry.description}")
|
|
266
|
+
print()
|
|
267
|
+
|
|
268
|
+
# Try to fetch extended info from README
|
|
269
|
+
readme = client.fetch_readme(provider, name)
|
|
270
|
+
if readme:
|
|
271
|
+
_display_extended_info(readme)
|
|
272
|
+
else:
|
|
273
|
+
print(f"{GRAY}Extended info unavailable (README.md not found).{RESET}")
|
|
274
|
+
print()
|
|
275
|
+
|
|
276
|
+
# Installation hint
|
|
277
|
+
print(f"Install: {CYAN}adversarial library install {evaluator_spec}{RESET}")
|
|
278
|
+
print()
|
|
279
|
+
|
|
280
|
+
return 0
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _display_extended_info(readme: str) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Parse and display extended info from README.md.
|
|
286
|
+
|
|
287
|
+
Extracts changelog, cost estimates, and API key info from README.
|
|
288
|
+
"""
|
|
289
|
+
lines = readme.split("\n")
|
|
290
|
+
|
|
291
|
+
# Look for API Key section
|
|
292
|
+
api_key = None
|
|
293
|
+
for line in lines:
|
|
294
|
+
if "api_key" in line.lower() or "API key" in line:
|
|
295
|
+
# Try to extract env var name
|
|
296
|
+
import re
|
|
297
|
+
|
|
298
|
+
match = re.search(r"([A-Z_]+_API_KEY)", line)
|
|
299
|
+
if match:
|
|
300
|
+
api_key = match.group(1)
|
|
301
|
+
break
|
|
302
|
+
|
|
303
|
+
if api_key:
|
|
304
|
+
print(f"API Key: {api_key}")
|
|
305
|
+
print()
|
|
306
|
+
|
|
307
|
+
# Look for Changelog section
|
|
308
|
+
in_changelog = False
|
|
309
|
+
changelog_lines = []
|
|
310
|
+
for line in lines:
|
|
311
|
+
if line.strip().lower().startswith("## changelog") or line.strip().lower().startswith(
|
|
312
|
+
"# changelog"
|
|
313
|
+
):
|
|
314
|
+
in_changelog = True
|
|
315
|
+
continue
|
|
316
|
+
if in_changelog:
|
|
317
|
+
if line.strip().startswith("##") or line.strip().startswith("# "):
|
|
318
|
+
break
|
|
319
|
+
if line.strip():
|
|
320
|
+
changelog_lines.append(line.strip())
|
|
321
|
+
if len(changelog_lines) >= 5: # Limit changelog entries
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
if changelog_lines:
|
|
325
|
+
print("Changelog:")
|
|
326
|
+
for cl in changelog_lines[:5]:
|
|
327
|
+
print(f" {cl}")
|
|
328
|
+
print()
|
|
329
|
+
|
|
330
|
+
# Look for Cost section
|
|
331
|
+
in_cost = False
|
|
332
|
+
cost_lines = []
|
|
333
|
+
for line in lines:
|
|
334
|
+
if "cost" in line.lower() and line.strip().startswith("#"):
|
|
335
|
+
in_cost = True
|
|
336
|
+
continue
|
|
337
|
+
if in_cost:
|
|
338
|
+
if line.strip().startswith("#"):
|
|
339
|
+
break
|
|
340
|
+
if line.strip():
|
|
341
|
+
cost_lines.append(line.strip())
|
|
342
|
+
if len(cost_lines) >= 3:
|
|
343
|
+
break
|
|
344
|
+
|
|
345
|
+
if cost_lines:
|
|
346
|
+
print("Estimated Cost:")
|
|
347
|
+
for cl in cost_lines[:3]:
|
|
348
|
+
print(f" {cl}")
|
|
349
|
+
print()
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def library_install(
|
|
353
|
+
evaluator_specs: List[str],
|
|
354
|
+
force: bool = False,
|
|
355
|
+
skip_validation: bool = False,
|
|
356
|
+
dry_run: bool = False,
|
|
357
|
+
category: Optional[str] = None,
|
|
358
|
+
yes: bool = False,
|
|
359
|
+
) -> int:
|
|
360
|
+
"""
|
|
361
|
+
Install evaluators from the library.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
evaluator_specs: List of evaluator specs in 'provider/name' format.
|
|
365
|
+
force: Overwrite existing files.
|
|
366
|
+
skip_validation: Skip schema validation.
|
|
367
|
+
dry_run: Preview without making changes.
|
|
368
|
+
category: Install all evaluators in this category.
|
|
369
|
+
yes: Skip confirmation prompts (required for non-TTY).
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
Exit code (0 for success, 1 for error).
|
|
373
|
+
"""
|
|
374
|
+
client = LibraryClient()
|
|
375
|
+
|
|
376
|
+
# Non-TTY detection: require --yes for non-interactive mode (unless dry-run)
|
|
377
|
+
if not yes and not dry_run and not sys.stdin.isatty():
|
|
378
|
+
print(f"{RED}Error: Use --yes for non-interactive mode{RESET}")
|
|
379
|
+
return 1
|
|
380
|
+
|
|
381
|
+
# Fetch index first
|
|
382
|
+
try:
|
|
383
|
+
index, _ = client.fetch_index()
|
|
384
|
+
except NetworkError as e:
|
|
385
|
+
print(f"{RED}Error: Network unavailable{RESET}")
|
|
386
|
+
print(f" {e}")
|
|
387
|
+
return 1
|
|
388
|
+
except ParseError as e:
|
|
389
|
+
print(f"{RED}Error: Could not parse library index{RESET}")
|
|
390
|
+
print(f" {e}")
|
|
391
|
+
return 1
|
|
392
|
+
|
|
393
|
+
# Handle --category flag: get all evaluators in that category
|
|
394
|
+
if category:
|
|
395
|
+
matching = index.filter_by_category(category)
|
|
396
|
+
if not matching:
|
|
397
|
+
print(f"{RED}Error: No evaluators found in category '{category}'{RESET}")
|
|
398
|
+
print()
|
|
399
|
+
print("Available categories:")
|
|
400
|
+
for cat_name, cat_desc in sorted(index.categories.items()):
|
|
401
|
+
print(f" - {cat_name}: {cat_desc}")
|
|
402
|
+
return 1
|
|
403
|
+
|
|
404
|
+
print(f"Installing all evaluators in category '{category}':")
|
|
405
|
+
print()
|
|
406
|
+
for e in matching:
|
|
407
|
+
print(f" - {e.provider}/{e.name} (v{index.version})")
|
|
408
|
+
print()
|
|
409
|
+
|
|
410
|
+
# Skip confirmation for --yes or --dry-run (dry-run makes no changes)
|
|
411
|
+
if not yes and not dry_run:
|
|
412
|
+
response = input("Proceed? [y/N]: ").strip().lower()
|
|
413
|
+
if response not in ("y", "yes"):
|
|
414
|
+
print("Cancelled.")
|
|
415
|
+
return 0
|
|
416
|
+
|
|
417
|
+
evaluator_specs = [f"{e.provider}/{e.name}" for e in matching]
|
|
418
|
+
|
|
419
|
+
# Require at least one evaluator spec
|
|
420
|
+
if not evaluator_specs:
|
|
421
|
+
print(f"{RED}Error: No evaluators specified{RESET}")
|
|
422
|
+
print()
|
|
423
|
+
print("Usage:")
|
|
424
|
+
print(" adversarial library install <provider>/<name> [<provider>/<name> ...]")
|
|
425
|
+
print(" adversarial library install --category <category-name>")
|
|
426
|
+
return 1
|
|
427
|
+
|
|
428
|
+
evaluators_dir = get_evaluators_dir()
|
|
429
|
+
if not dry_run:
|
|
430
|
+
evaluators_dir.mkdir(parents=True, exist_ok=True)
|
|
431
|
+
|
|
432
|
+
success_count = 0
|
|
433
|
+
for spec in evaluator_specs:
|
|
434
|
+
# Parse spec (provider/name or provider/name@version - version ignored for now)
|
|
435
|
+
if "@" in spec:
|
|
436
|
+
spec = spec.split("@")[0] # Strip version for now
|
|
437
|
+
|
|
438
|
+
parts = spec.split("/")
|
|
439
|
+
if len(parts) != 2:
|
|
440
|
+
print(f"{RED}Error: Invalid evaluator spec: {spec}{RESET}")
|
|
441
|
+
print(" Expected format: provider/name (e.g., google/gemini-flash)")
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
provider, name = parts
|
|
445
|
+
|
|
446
|
+
# Check if evaluator exists in index
|
|
447
|
+
entry = index.get_evaluator(provider, name)
|
|
448
|
+
if not entry:
|
|
449
|
+
print(f"{RED}Error: Evaluator not found: {spec}{RESET}")
|
|
450
|
+
print(" Use 'adversarial library list' to see available evaluators.")
|
|
451
|
+
continue
|
|
452
|
+
|
|
453
|
+
# Check if file already exists (use provider-name format to avoid collisions)
|
|
454
|
+
dest_path = evaluators_dir / f"{provider}-{name}.yml"
|
|
455
|
+
|
|
456
|
+
if dry_run:
|
|
457
|
+
# Dry-run mode: show preview without making changes
|
|
458
|
+
print(f"Dry run: Would install {CYAN}{spec}{RESET} (v{index.version})")
|
|
459
|
+
print()
|
|
460
|
+
print(f" Target: {dest_path}")
|
|
461
|
+
if dest_path.exists():
|
|
462
|
+
print(f" Status: {YELLOW}File exists (would overwrite with --force){RESET}")
|
|
463
|
+
else:
|
|
464
|
+
print(f" Status: {GREEN}New file (clean install){RESET}")
|
|
465
|
+
print()
|
|
466
|
+
|
|
467
|
+
# Fetch and preview evaluator config
|
|
468
|
+
preview_success = False
|
|
469
|
+
try:
|
|
470
|
+
yaml_content = client.fetch_evaluator(provider, name)
|
|
471
|
+
yaml_content_clean = yaml_content.lstrip()
|
|
472
|
+
if yaml_content_clean.startswith("---"):
|
|
473
|
+
yaml_content_clean = yaml_content_clean[3:].lstrip("\n")
|
|
474
|
+
|
|
475
|
+
print(" Evaluator config preview:")
|
|
476
|
+
print(" " + "─" * 25)
|
|
477
|
+
preview_lines = yaml_content_clean.split("\n")[:10]
|
|
478
|
+
for line in preview_lines:
|
|
479
|
+
print(f" {line}")
|
|
480
|
+
if len(yaml_content_clean.split("\n")) > 10:
|
|
481
|
+
print(" ...")
|
|
482
|
+
print()
|
|
483
|
+
preview_success = True
|
|
484
|
+
except NetworkError as e:
|
|
485
|
+
print(f" {RED}Error: Could not fetch preview{RESET}")
|
|
486
|
+
print(f" {e}")
|
|
487
|
+
print()
|
|
488
|
+
|
|
489
|
+
print(f"{YELLOW}No changes made (dry run).{RESET}")
|
|
490
|
+
print()
|
|
491
|
+
if preview_success:
|
|
492
|
+
success_count += 1
|
|
493
|
+
continue
|
|
494
|
+
|
|
495
|
+
if dest_path.exists() and not force:
|
|
496
|
+
print(f"{YELLOW}Skipping: {provider}-{name}.yml already exists{RESET}")
|
|
497
|
+
print(f" Use {CYAN}--force{RESET} to overwrite.")
|
|
498
|
+
continue
|
|
499
|
+
|
|
500
|
+
# Fetch evaluator config
|
|
501
|
+
print(f"Installing {CYAN}{spec}{RESET}...")
|
|
502
|
+
try:
|
|
503
|
+
yaml_content = client.fetch_evaluator(provider, name)
|
|
504
|
+
except NetworkError as e:
|
|
505
|
+
print(f" {RED}Error: Failed to fetch evaluator{RESET}")
|
|
506
|
+
print(f" {e}")
|
|
507
|
+
continue
|
|
508
|
+
|
|
509
|
+
# Strip leading YAML document separator to prevent multi-document issues
|
|
510
|
+
yaml_content_clean = yaml_content.lstrip()
|
|
511
|
+
if yaml_content_clean.startswith("---"):
|
|
512
|
+
# Remove the document separator and any following newline
|
|
513
|
+
yaml_content_clean = yaml_content_clean[3:].lstrip("\n")
|
|
514
|
+
|
|
515
|
+
# Validate YAML
|
|
516
|
+
try:
|
|
517
|
+
parsed = yaml.safe_load(yaml_content_clean)
|
|
518
|
+
if not parsed:
|
|
519
|
+
raise ValueError("Empty YAML content")
|
|
520
|
+
except (yaml.YAMLError, ValueError) as e:
|
|
521
|
+
print(f" {RED}Error: Invalid YAML in evaluator{RESET}")
|
|
522
|
+
print(f" {e}")
|
|
523
|
+
if skip_validation:
|
|
524
|
+
print(f" {YELLOW}Warning: --skip-validation is set, continuing anyway{RESET}")
|
|
525
|
+
else:
|
|
526
|
+
continue
|
|
527
|
+
|
|
528
|
+
# Add provenance header
|
|
529
|
+
full_content = (
|
|
530
|
+
generate_provenance_header(provider, name, index.version) + yaml_content_clean
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Write file
|
|
534
|
+
try:
|
|
535
|
+
with open(dest_path, "w", encoding="utf-8") as f:
|
|
536
|
+
f.write(full_content)
|
|
537
|
+
print(f" {GREEN}Installed: {dest_path}{RESET}")
|
|
538
|
+
success_count += 1
|
|
539
|
+
except OSError as e:
|
|
540
|
+
print(f" {RED}Error: Could not write file{RESET}")
|
|
541
|
+
print(f" {e}")
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
# Summary
|
|
545
|
+
print()
|
|
546
|
+
if dry_run:
|
|
547
|
+
if success_count == 0 and len(evaluator_specs) > 0:
|
|
548
|
+
print(f"{RED}Dry run failed: No evaluators could be previewed.{RESET}")
|
|
549
|
+
return 1
|
|
550
|
+
print(
|
|
551
|
+
f"{CYAN}Dry run complete. {success_count} evaluator(s) previewed successfully.{RESET}"
|
|
552
|
+
)
|
|
553
|
+
return 0
|
|
554
|
+
elif success_count == len(evaluator_specs):
|
|
555
|
+
print(f"{GREEN}All {success_count} evaluator(s) installed successfully.{RESET}")
|
|
556
|
+
elif success_count > 0:
|
|
557
|
+
print(f"{YELLOW}{success_count} of {len(evaluator_specs)} evaluator(s) installed.{RESET}")
|
|
558
|
+
else:
|
|
559
|
+
print(f"{RED}No evaluators installed.{RESET}")
|
|
560
|
+
return 1
|
|
561
|
+
|
|
562
|
+
if not dry_run:
|
|
563
|
+
print()
|
|
564
|
+
print("Next steps:")
|
|
565
|
+
print(f" 1. Configure API keys if needed (check {CYAN}.env{RESET})")
|
|
566
|
+
print(f" 2. Run: {CYAN}adversarial <evaluator-name> <task-file>{RESET}")
|
|
567
|
+
|
|
568
|
+
return 0
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def library_check_updates(name: Optional[str] = None, no_cache: bool = False) -> int:
|
|
572
|
+
"""
|
|
573
|
+
Check for available updates to installed evaluators.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
name: Specific evaluator name to check (optional).
|
|
577
|
+
no_cache: Bypass cache and fetch fresh data.
|
|
578
|
+
|
|
579
|
+
Returns:
|
|
580
|
+
Exit code (0 for success, 1 for error).
|
|
581
|
+
"""
|
|
582
|
+
client = LibraryClient()
|
|
583
|
+
|
|
584
|
+
# Scan installed evaluators
|
|
585
|
+
installed = scan_installed_evaluators()
|
|
586
|
+
if not installed:
|
|
587
|
+
print(f"{YELLOW}No library-installed evaluators found.{RESET}")
|
|
588
|
+
print()
|
|
589
|
+
print("Install evaluators with: adversarial library install <provider>/<name>")
|
|
590
|
+
return 0
|
|
591
|
+
|
|
592
|
+
# Filter by name if specified
|
|
593
|
+
if name:
|
|
594
|
+
installed = [m for m in installed if m.name == name]
|
|
595
|
+
if not installed:
|
|
596
|
+
print(f"{YELLOW}Evaluator '{name}' not found or not from library.{RESET}")
|
|
597
|
+
return 1
|
|
598
|
+
|
|
599
|
+
# Fetch index
|
|
600
|
+
print("Checking for evaluator updates...")
|
|
601
|
+
print()
|
|
602
|
+
|
|
603
|
+
try:
|
|
604
|
+
index, from_cache = client.fetch_index(no_cache=no_cache)
|
|
605
|
+
except NetworkError as e:
|
|
606
|
+
print(f"{RED}Error: Network unavailable{RESET}")
|
|
607
|
+
print(f" {e}")
|
|
608
|
+
return 1
|
|
609
|
+
except ParseError as e:
|
|
610
|
+
print(f"{RED}Error: Could not parse library index{RESET}")
|
|
611
|
+
print(f" {e}")
|
|
612
|
+
return 1
|
|
613
|
+
|
|
614
|
+
# Compare versions
|
|
615
|
+
updates: List[UpdateInfo] = []
|
|
616
|
+
for meta in installed:
|
|
617
|
+
entry = index.get_evaluator(meta.provider, meta.name)
|
|
618
|
+
if entry:
|
|
619
|
+
is_outdated = meta.version != index.version
|
|
620
|
+
updates.append(
|
|
621
|
+
UpdateInfo(
|
|
622
|
+
name=meta.name,
|
|
623
|
+
installed_version=meta.version,
|
|
624
|
+
available_version=index.version,
|
|
625
|
+
is_outdated=is_outdated,
|
|
626
|
+
)
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
# Evaluator no longer in index
|
|
630
|
+
updates.append(
|
|
631
|
+
UpdateInfo(
|
|
632
|
+
name=meta.name,
|
|
633
|
+
installed_version=meta.version,
|
|
634
|
+
available_version="-",
|
|
635
|
+
is_outdated=False,
|
|
636
|
+
is_local_only=True,
|
|
637
|
+
)
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Print table
|
|
641
|
+
headers = ["EVALUATOR", "INSTALLED", "AVAILABLE", "STATUS"]
|
|
642
|
+
rows = []
|
|
643
|
+
for u in updates:
|
|
644
|
+
status = u.status
|
|
645
|
+
if u.is_outdated:
|
|
646
|
+
status = f"{YELLOW}{status}{RESET}"
|
|
647
|
+
elif u.is_local_only:
|
|
648
|
+
status = f"{GRAY}{status}{RESET}"
|
|
649
|
+
else:
|
|
650
|
+
status = f"{GREEN}{status}{RESET}"
|
|
651
|
+
rows.append([u.name, u.installed_version, u.available_version, status])
|
|
652
|
+
|
|
653
|
+
print(format_table(headers, rows))
|
|
654
|
+
print()
|
|
655
|
+
|
|
656
|
+
# Summary
|
|
657
|
+
outdated_count = sum(1 for u in updates if u.is_outdated)
|
|
658
|
+
if outdated_count > 0:
|
|
659
|
+
print(f"{YELLOW}{outdated_count} update(s) available.{RESET}")
|
|
660
|
+
print()
|
|
661
|
+
print(f"Run '{CYAN}adversarial library update <name>{RESET}' to update.")
|
|
662
|
+
print(f"Or '{CYAN}adversarial library update --all{RESET}' to update all.")
|
|
663
|
+
else:
|
|
664
|
+
print(f"{GREEN}All evaluators are up to date.{RESET}")
|
|
665
|
+
|
|
666
|
+
return 0
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def library_update(
|
|
670
|
+
name: Optional[str] = None,
|
|
671
|
+
all_evaluators: bool = False,
|
|
672
|
+
yes: bool = False,
|
|
673
|
+
diff_only: bool = False,
|
|
674
|
+
no_cache: bool = False,
|
|
675
|
+
dry_run: bool = False,
|
|
676
|
+
) -> int:
|
|
677
|
+
"""
|
|
678
|
+
Update installed evaluators to newer versions.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
name: Specific evaluator name to update.
|
|
682
|
+
all_evaluators: Update all outdated evaluators.
|
|
683
|
+
yes: Skip confirmation prompts.
|
|
684
|
+
diff_only: Show diff without applying changes (same as dry_run).
|
|
685
|
+
no_cache: Bypass cache.
|
|
686
|
+
dry_run: Preview without making changes (same as diff_only).
|
|
687
|
+
|
|
688
|
+
Returns:
|
|
689
|
+
Exit code (0 for success, 1 for error).
|
|
690
|
+
"""
|
|
691
|
+
# Combine dry_run and diff_only (they do the same thing)
|
|
692
|
+
preview_only = dry_run or diff_only
|
|
693
|
+
|
|
694
|
+
client = LibraryClient()
|
|
695
|
+
|
|
696
|
+
# Non-TTY detection: require --yes for non-interactive mode (unless preview mode)
|
|
697
|
+
if not yes and not preview_only and not sys.stdin.isatty():
|
|
698
|
+
print(f"{RED}Error: Use --yes for non-interactive mode{RESET}")
|
|
699
|
+
return 1
|
|
700
|
+
|
|
701
|
+
if not name and not all_evaluators:
|
|
702
|
+
print(f"{RED}Error: Specify an evaluator name or use --all{RESET}")
|
|
703
|
+
print()
|
|
704
|
+
print("Usage:")
|
|
705
|
+
print(" adversarial library update <name>")
|
|
706
|
+
print(" adversarial library update --all")
|
|
707
|
+
return 1
|
|
708
|
+
|
|
709
|
+
# Scan installed evaluators
|
|
710
|
+
installed = scan_installed_evaluators()
|
|
711
|
+
if not installed:
|
|
712
|
+
print(f"{YELLOW}No library-installed evaluators found.{RESET}")
|
|
713
|
+
return 0
|
|
714
|
+
|
|
715
|
+
# Fetch index
|
|
716
|
+
try:
|
|
717
|
+
index, _ = client.fetch_index(no_cache=no_cache)
|
|
718
|
+
except NetworkError as e:
|
|
719
|
+
print(f"{RED}Error: Network unavailable{RESET}")
|
|
720
|
+
print(f" {e}")
|
|
721
|
+
return 1
|
|
722
|
+
except ParseError as e:
|
|
723
|
+
print(f"{RED}Error: Could not parse library index{RESET}")
|
|
724
|
+
print(f" {e}")
|
|
725
|
+
return 1
|
|
726
|
+
|
|
727
|
+
# Find evaluators to update
|
|
728
|
+
to_update = []
|
|
729
|
+
for meta in installed:
|
|
730
|
+
if name and meta.name != name:
|
|
731
|
+
continue
|
|
732
|
+
|
|
733
|
+
entry = index.get_evaluator(meta.provider, meta.name)
|
|
734
|
+
if not entry:
|
|
735
|
+
if name:
|
|
736
|
+
print(f"{YELLOW}Evaluator '{name}' not found in library.{RESET}")
|
|
737
|
+
continue
|
|
738
|
+
|
|
739
|
+
if meta.version != index.version:
|
|
740
|
+
to_update.append((meta, entry))
|
|
741
|
+
elif name:
|
|
742
|
+
print(f"{GREEN}Evaluator '{name}' is already up to date (v{meta.version}).{RESET}")
|
|
743
|
+
return 0
|
|
744
|
+
|
|
745
|
+
if not to_update:
|
|
746
|
+
if all_evaluators:
|
|
747
|
+
print(f"{GREEN}All evaluators are up to date.{RESET}")
|
|
748
|
+
return 0
|
|
749
|
+
|
|
750
|
+
evaluators_dir = get_evaluators_dir()
|
|
751
|
+
updated_count = 0
|
|
752
|
+
|
|
753
|
+
for meta, entry in to_update:
|
|
754
|
+
print()
|
|
755
|
+
print(f"Updating {CYAN}{meta.name}{RESET} ({meta.version} → {index.version})...")
|
|
756
|
+
|
|
757
|
+
# Fetch new content
|
|
758
|
+
try:
|
|
759
|
+
new_yaml = client.fetch_evaluator(entry.provider, entry.name)
|
|
760
|
+
except NetworkError as e:
|
|
761
|
+
print(f" {RED}Error: Failed to fetch evaluator{RESET}")
|
|
762
|
+
print(f" {e}")
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
# Read current content using tracked file path
|
|
766
|
+
if meta.file_path:
|
|
767
|
+
current_path = Path(meta.file_path)
|
|
768
|
+
else:
|
|
769
|
+
# Fallback: check both old and new naming conventions
|
|
770
|
+
new_path = evaluators_dir / f"{meta.provider}-{meta.name}.yml"
|
|
771
|
+
old_path = evaluators_dir / f"{meta.name}.yml"
|
|
772
|
+
current_path = new_path if new_path.exists() else old_path
|
|
773
|
+
try:
|
|
774
|
+
with open(current_path, "r", encoding="utf-8") as f:
|
|
775
|
+
current_content = f.read()
|
|
776
|
+
except OSError as e:
|
|
777
|
+
print(f" {RED}Error: Could not read current file{RESET}")
|
|
778
|
+
print(f" {e}")
|
|
779
|
+
continue
|
|
780
|
+
|
|
781
|
+
# Generate new content with updated provenance
|
|
782
|
+
# Strip leading YAML document separator to prevent multi-document issues
|
|
783
|
+
new_yaml_clean = new_yaml.lstrip()
|
|
784
|
+
if new_yaml_clean.startswith("---"):
|
|
785
|
+
new_yaml_clean = new_yaml_clean[3:].lstrip("\n")
|
|
786
|
+
new_content = (
|
|
787
|
+
generate_provenance_header(entry.provider, entry.name, index.version) + new_yaml_clean
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
# Show diff
|
|
791
|
+
print()
|
|
792
|
+
diff = list(
|
|
793
|
+
difflib.unified_diff(
|
|
794
|
+
current_content.splitlines(keepends=True),
|
|
795
|
+
new_content.splitlines(keepends=True),
|
|
796
|
+
fromfile=f"{meta.name}.yml (current)",
|
|
797
|
+
tofile=f"{meta.name}.yml (new)",
|
|
798
|
+
)
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
if not diff:
|
|
802
|
+
print(f" {GRAY}No changes in evaluator content.{RESET}")
|
|
803
|
+
continue
|
|
804
|
+
|
|
805
|
+
print(" Changes:")
|
|
806
|
+
for line in diff[:50]: # Limit diff output
|
|
807
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
808
|
+
print(f" {GREEN}{line.rstrip()}{RESET}")
|
|
809
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
810
|
+
print(f" {RED}{line.rstrip()}{RESET}")
|
|
811
|
+
else:
|
|
812
|
+
print(f" {line.rstrip()}")
|
|
813
|
+
|
|
814
|
+
if len(diff) > 50:
|
|
815
|
+
print(f" {GRAY}... ({len(diff) - 50} more lines){RESET}")
|
|
816
|
+
|
|
817
|
+
if preview_only:
|
|
818
|
+
print()
|
|
819
|
+
print(f" {GRAY}(dry run mode, no changes applied){RESET}")
|
|
820
|
+
continue
|
|
821
|
+
|
|
822
|
+
# Confirm update
|
|
823
|
+
if not yes:
|
|
824
|
+
print()
|
|
825
|
+
response = input(f" Apply update? [y/N]: ").strip().lower()
|
|
826
|
+
if response not in ("y", "yes"):
|
|
827
|
+
print(f" {GRAY}Skipped.{RESET}")
|
|
828
|
+
continue
|
|
829
|
+
|
|
830
|
+
# Apply update
|
|
831
|
+
try:
|
|
832
|
+
with open(current_path, "w", encoding="utf-8") as f:
|
|
833
|
+
f.write(new_content)
|
|
834
|
+
print(f" {GREEN}Updated!{RESET}")
|
|
835
|
+
updated_count += 1
|
|
836
|
+
except OSError as e:
|
|
837
|
+
print(f" {RED}Error: Could not write file{RESET}")
|
|
838
|
+
print(f" {e}")
|
|
839
|
+
|
|
840
|
+
# Summary
|
|
841
|
+
print()
|
|
842
|
+
if preview_only:
|
|
843
|
+
print(f"Dry run complete. Use without {CYAN}--dry-run{RESET} to apply changes.")
|
|
844
|
+
elif updated_count > 0:
|
|
845
|
+
print(f"{GREEN}{updated_count} evaluator(s) updated.{RESET}")
|
|
846
|
+
else:
|
|
847
|
+
print(f"{YELLOW}No evaluators were updated.{RESET}")
|
|
848
|
+
|
|
849
|
+
return 0
|