adversarial-workflow 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ Usage:
12
12
  adversarial validate "pytest"
13
13
  """
14
14
 
15
- __version__ = "0.7.0"
15
+ __version__ = "0.9.0"
16
16
  __author__ = "Fredrik Matheson"
17
17
  __license__ = "MIT"
18
18
 
@@ -30,7 +30,7 @@ from typing import Dict, List, Optional, Tuple
30
30
  import yaml
31
31
  from dotenv import dotenv_values, load_dotenv
32
32
 
33
- __version__ = "0.7.0"
33
+ __version__ = "0.9.0"
34
34
 
35
35
  # ANSI color codes for better output
36
36
  RESET = "\033[0m"
@@ -2944,6 +2944,7 @@ def main():
2944
2944
 
2945
2945
  from adversarial_workflow.evaluators import (
2946
2946
  BUILTIN_EVALUATORS,
2947
+ discover_local_evaluators,
2947
2948
  get_all_evaluators,
2948
2949
  run_evaluator,
2949
2950
  )
@@ -2959,6 +2960,7 @@ def main():
2959
2960
  "health",
2960
2961
  "quickstart",
2961
2962
  "agent",
2963
+ "library",
2962
2964
  "split",
2963
2965
  "validate",
2964
2966
  "review",
@@ -2982,6 +2984,8 @@ Examples:
2982
2984
  adversarial validate "npm test" # Validate with tests
2983
2985
  adversarial split large-task.md # Split large files
2984
2986
  adversarial check-citations doc.md # Verify URLs in document
2987
+ adversarial library list # Browse available evaluators
2988
+ adversarial library install google/gemini-flash # Install evaluator
2985
2989
 
2986
2990
  For more information: https://github.com/movito/adversarial-workflow
2987
2991
  """,
@@ -3028,6 +3032,98 @@ For more information: https://github.com/movito/adversarial-workflow
3028
3032
  "--path", default=".", help="Project path (default: current directory)"
3029
3033
  )
3030
3034
 
3035
+ # library command (with subcommands)
3036
+ library_parser = subparsers.add_parser(
3037
+ "library", help="Browse and install evaluators from the community library"
3038
+ )
3039
+ library_subparsers = library_parser.add_subparsers(
3040
+ dest="library_subcommand", help="Library subcommand"
3041
+ )
3042
+
3043
+ # library list subcommand
3044
+ library_list_parser = library_subparsers.add_parser(
3045
+ "list", help="List available evaluators from the library"
3046
+ )
3047
+ library_list_parser.add_argument(
3048
+ "--provider", "-p", help="Filter by provider (e.g., google, openai)"
3049
+ )
3050
+ library_list_parser.add_argument(
3051
+ "--category", "-c", help="Filter by category (e.g., quick-check, deep-reasoning)"
3052
+ )
3053
+ library_list_parser.add_argument(
3054
+ "--verbose", "-v", action="store_true", help="Show detailed information"
3055
+ )
3056
+ library_list_parser.add_argument(
3057
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3058
+ )
3059
+
3060
+ # library info subcommand
3061
+ library_info_parser = library_subparsers.add_parser(
3062
+ "info", help="Show detailed information about an evaluator"
3063
+ )
3064
+ library_info_parser.add_argument(
3065
+ "evaluator_spec", help="Evaluator to show info for (format: provider/name)"
3066
+ )
3067
+
3068
+ # library install subcommand
3069
+ library_install_parser = library_subparsers.add_parser(
3070
+ "install", help="Install evaluator(s) from the library"
3071
+ )
3072
+ library_install_parser.add_argument(
3073
+ "evaluators", nargs="*", help="Evaluator(s) to install (format: provider/name)"
3074
+ )
3075
+ library_install_parser.add_argument(
3076
+ "--force", "-f", action="store_true", help="Overwrite existing files"
3077
+ )
3078
+ library_install_parser.add_argument(
3079
+ "--skip-validation", action="store_true", help="Skip YAML validation (advanced)"
3080
+ )
3081
+ library_install_parser.add_argument(
3082
+ "--dry-run", action="store_true", help="Preview without making changes"
3083
+ )
3084
+ library_install_parser.add_argument("--category", help="Install all evaluators in a category")
3085
+ library_install_parser.add_argument(
3086
+ "--yes", "-y", action="store_true", help="Skip confirmation prompts (required for CI/CD)"
3087
+ )
3088
+
3089
+ # library check-updates subcommand
3090
+ library_check_parser = library_subparsers.add_parser(
3091
+ "check-updates", help="Check for updates to installed evaluators"
3092
+ )
3093
+ library_check_parser.add_argument(
3094
+ "name", nargs="?", help="Specific evaluator to check (optional)"
3095
+ )
3096
+ library_check_parser.add_argument(
3097
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3098
+ )
3099
+
3100
+ # library update subcommand
3101
+ library_update_parser = library_subparsers.add_parser(
3102
+ "update", help="Update installed evaluator(s) to newer versions"
3103
+ )
3104
+ library_update_parser.add_argument("name", nargs="?", help="Evaluator name to update")
3105
+ library_update_parser.add_argument(
3106
+ "--all",
3107
+ "-a",
3108
+ action="store_true",
3109
+ dest="all_evaluators",
3110
+ help="Update all outdated evaluators",
3111
+ )
3112
+ library_update_parser.add_argument(
3113
+ "--yes", "-y", action="store_true", help="Skip confirmation prompts"
3114
+ )
3115
+ library_update_parser.add_argument(
3116
+ "--diff-only", action="store_true", help="Show diff without applying changes"
3117
+ )
3118
+ library_update_parser.add_argument(
3119
+ "--dry-run",
3120
+ action="store_true",
3121
+ help="Preview without making changes (same as --diff-only)",
3122
+ )
3123
+ library_update_parser.add_argument(
3124
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3125
+ )
3126
+
3031
3127
  # review command (static - reviews git changes, no file argument)
3032
3128
  subparsers.add_parser("review", help="Run Phase 3: Code review")
3033
3129
 
@@ -3149,6 +3245,15 @@ For more information: https://github.com/movito/adversarial-workflow
3149
3245
  action="store_true",
3150
3246
  help="Verify URLs in document before evaluation",
3151
3247
  )
3248
+ # Add --evaluator flag for the "evaluate" command only
3249
+ # This allows selecting a library-installed evaluator
3250
+ if config.name == "evaluate":
3251
+ eval_parser.add_argument(
3252
+ "--evaluator",
3253
+ "-e",
3254
+ metavar="NAME",
3255
+ help="Use a specific evaluator from .adversarial/evaluators/",
3256
+ )
3152
3257
  # Store config for later execution
3153
3258
  eval_parser.set_defaults(evaluator_config=config)
3154
3259
 
@@ -3160,15 +3265,45 @@ For more information: https://github.com/movito/adversarial-workflow
3160
3265
 
3161
3266
  # Check for evaluator command first (has evaluator_config attribute)
3162
3267
  if hasattr(args, "evaluator_config"):
3268
+ # Default to the command's evaluator config
3269
+ config_to_use = args.evaluator_config
3270
+
3271
+ # Check if --evaluator flag was specified (only on evaluate command)
3272
+ evaluator_override = getattr(args, "evaluator", None)
3273
+ if evaluator_override:
3274
+ local_evaluators = discover_local_evaluators()
3275
+
3276
+ if not local_evaluators:
3277
+ print(f"{RED}Error: No evaluators installed.{RESET}")
3278
+ print("Install evaluators with: adversarial library install <name>")
3279
+ return 1
3280
+
3281
+ if evaluator_override not in local_evaluators:
3282
+ print(f"{RED}Error: Evaluator '{evaluator_override}' not found.{RESET}")
3283
+ print()
3284
+ print("Available evaluators:")
3285
+ # Show unique evaluators (avoid duplicates from aliases)
3286
+ seen = set()
3287
+ for _, cfg in sorted(local_evaluators.items()):
3288
+ if id(cfg) not in seen:
3289
+ print(f" {cfg.name}")
3290
+ if cfg.aliases:
3291
+ print(f" aliases: {', '.join(cfg.aliases)}")
3292
+ seen.add(id(cfg))
3293
+ return 1
3294
+
3295
+ config_to_use = local_evaluators[evaluator_override]
3296
+ print(f"Using evaluator: {config_to_use.name}")
3297
+
3163
3298
  # Determine timeout: CLI flag > YAML config > default (180s)
3164
3299
  if args.timeout is not None:
3165
3300
  timeout = args.timeout
3166
3301
  source = "CLI override"
3167
- elif args.evaluator_config.timeout != 180:
3168
- timeout = args.evaluator_config.timeout
3302
+ elif config_to_use.timeout != 180:
3303
+ timeout = config_to_use.timeout
3169
3304
  source = "evaluator config"
3170
3305
  else:
3171
- timeout = args.evaluator_config.timeout # 180 (default)
3306
+ timeout = config_to_use.timeout # 180 (default)
3172
3307
  source = "default"
3173
3308
 
3174
3309
  # Validate CLI timeout (consistent with YAML validation)
@@ -3195,7 +3330,7 @@ For more information: https://github.com/movito/adversarial-workflow
3195
3330
  print()
3196
3331
 
3197
3332
  return run_evaluator(
3198
- args.evaluator_config,
3333
+ config_to_use,
3199
3334
  args.file,
3200
3335
  timeout=timeout,
3201
3336
  )
@@ -3220,6 +3355,59 @@ For more information: https://github.com/movito/adversarial-workflow
3220
3355
  print(f"{RED}Error: agent command requires a subcommand{RESET}")
3221
3356
  print("Usage: adversarial agent onboard")
3222
3357
  return 1
3358
+ elif args.command == "library":
3359
+ from adversarial_workflow.library import (
3360
+ library_check_updates,
3361
+ library_info,
3362
+ library_install,
3363
+ library_list,
3364
+ library_update,
3365
+ )
3366
+
3367
+ if args.library_subcommand == "list":
3368
+ return library_list(
3369
+ provider=args.provider,
3370
+ category=args.category,
3371
+ verbose=args.verbose,
3372
+ no_cache=args.no_cache,
3373
+ )
3374
+ elif args.library_subcommand == "info":
3375
+ return library_info(
3376
+ evaluator_spec=args.evaluator_spec,
3377
+ )
3378
+ elif args.library_subcommand == "install":
3379
+ return library_install(
3380
+ evaluator_specs=args.evaluators,
3381
+ force=args.force,
3382
+ skip_validation=args.skip_validation,
3383
+ dry_run=args.dry_run,
3384
+ category=args.category,
3385
+ yes=args.yes,
3386
+ )
3387
+ elif args.library_subcommand == "check-updates":
3388
+ return library_check_updates(
3389
+ name=args.name,
3390
+ no_cache=args.no_cache,
3391
+ )
3392
+ elif args.library_subcommand == "update":
3393
+ return library_update(
3394
+ name=args.name,
3395
+ all_evaluators=args.all_evaluators,
3396
+ yes=args.yes,
3397
+ diff_only=args.diff_only,
3398
+ no_cache=args.no_cache,
3399
+ dry_run=args.dry_run,
3400
+ )
3401
+ else:
3402
+ # No subcommand provided
3403
+ print(f"{RED}Error: library command requires a subcommand{RESET}")
3404
+ print("Usage:")
3405
+ print(" adversarial library list")
3406
+ print(" adversarial library info <provider>/<name>")
3407
+ print(" adversarial library install <provider>/<name>")
3408
+ print(" adversarial library check-updates")
3409
+ print(" adversarial library update <name>")
3410
+ return 1
3223
3411
  elif args.command == "review":
3224
3412
  return review()
3225
3413
  elif args.command == "validate":
@@ -1,12 +1,18 @@
1
- """Evaluators module for adversarial-workflow plugin architecture."""
1
+ """Evaluators module for adversarial-workflow plugin architecture.
2
+
3
+ Supports dual-field model specification (ADV-0015):
4
+ - Legacy: model + api_key_env fields (backwards compatible)
5
+ - New: model_requirement field (resolved via ModelResolver)
6
+ """
2
7
 
3
8
  from .builtins import BUILTIN_EVALUATORS
4
- from .config import EvaluatorConfig
9
+ from .config import EvaluatorConfig, ModelRequirement
5
10
  from .discovery import (
6
11
  EvaluatorParseError,
7
12
  discover_local_evaluators,
8
13
  parse_evaluator_yaml,
9
14
  )
15
+ from .resolver import ModelResolver, ResolutionError
10
16
  from .runner import run_evaluator
11
17
 
12
18
 
@@ -38,6 +44,9 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
38
44
  __all__ = [
39
45
  "EvaluatorConfig",
40
46
  "EvaluatorParseError",
47
+ "ModelRequirement",
48
+ "ModelResolver",
49
+ "ResolutionError",
41
50
  "run_evaluator",
42
51
  "get_all_evaluators",
43
52
  "discover_local_evaluators",
@@ -1,5 +1,9 @@
1
1
  """
2
2
  EvaluatorConfig dataclass for evaluator definitions.
3
+
4
+ Supports dual-field model specification (ADV-0015):
5
+ - Legacy: model + api_key_env fields (backwards compatible)
6
+ - New: model_requirement field (structured capability requirements)
3
7
  """
4
8
 
5
9
  from __future__ import annotations
@@ -7,6 +11,27 @@ from __future__ import annotations
7
11
  from dataclasses import dataclass, field
8
12
 
9
13
 
14
+ @dataclass
15
+ class ModelRequirement:
16
+ """Model capability requirements (from library).
17
+
18
+ This dataclass represents structured model requirements that can be
19
+ resolved to actual model IDs via the ModelResolver. It separates
20
+ WHAT capability is needed from HOW to access it.
21
+
22
+ Attributes:
23
+ family: Model family (e.g., "claude", "gpt", "o", "gemini", "mistral", "codestral", "llama")
24
+ tier: Performance tier (e.g., "opus", "sonnet", "haiku", "flagship", "mini", "latest")
25
+ min_version: Optional minimum model generation (e.g., "4" for Claude 4+)
26
+ min_context: Optional minimum context window in tokens (e.g., 128000)
27
+ """
28
+
29
+ family: str
30
+ tier: str
31
+ min_version: str = ""
32
+ min_context: int = 0
33
+
34
+
10
35
  @dataclass
11
36
  class EvaluatorConfig:
12
37
  """Configuration for an evaluator (built-in or custom).
@@ -15,11 +40,18 @@ class EvaluatorConfig:
15
40
  whether built-in (evaluate, proofread, review) or custom
16
41
  (defined in .adversarial/evaluators/*.yml).
17
42
 
43
+ Supports dual-field model specification (ADV-0015):
44
+ - Legacy: model + api_key_env fields (always backwards compatible)
45
+ - New: model_requirement field (resolved via ModelResolver)
46
+
47
+ When both are present, model_requirement takes precedence. If resolution
48
+ fails, falls back to legacy model field with a warning.
49
+
18
50
  Attributes:
19
51
  name: Command name (e.g., "evaluate", "athena")
20
52
  description: Help text shown in CLI
21
- model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro")
22
- api_key_env: Environment variable name for API key
53
+ model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro") - legacy field
54
+ api_key_env: Environment variable name for API key - legacy field
23
55
  prompt: The evaluation prompt template
24
56
  output_suffix: Log file suffix (e.g., "PLAN-EVALUATION")
25
57
  log_prefix: CLI output prefix (e.g., "ATHENA")
@@ -27,6 +59,7 @@ class EvaluatorConfig:
27
59
  aliases: Alternative command names
28
60
  version: Evaluator version
29
61
  timeout: Timeout in seconds (default: 180, max: 600)
62
+ model_requirement: Structured model requirement (resolved via ModelResolver)
30
63
  source: "builtin" or "local" (set internally)
31
64
  config_file: Path to YAML file if local (set internally)
32
65
  """
@@ -46,6 +79,10 @@ class EvaluatorConfig:
46
79
  version: str = "1.0.0"
47
80
  timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
48
81
 
82
+ # NEW: Structured model requirement (Phase 1 - ADV-0015)
83
+ # When present, resolved via ModelResolver to actual model ID
84
+ model_requirement: ModelRequirement | None = None
85
+
49
86
  # Metadata (set internally during discovery, not from YAML)
50
87
  source: str = "builtin"
51
88
  config_file: str | None = None
@@ -4,6 +4,10 @@ YAML parsing and discovery for custom evaluators.
4
4
  This module handles discovering evaluator definitions from
5
5
  .adversarial/evaluators/*.yml files and parsing them into
6
6
  EvaluatorConfig objects.
7
+
8
+ Supports dual-field model specification (ADV-0015):
9
+ - Legacy: model + api_key_env fields (backwards compatible)
10
+ - New: model_requirement field (resolved via ModelResolver)
7
11
  """
8
12
 
9
13
  from __future__ import annotations
@@ -14,7 +18,7 @@ from pathlib import Path
14
18
 
15
19
  import yaml
16
20
 
17
- from .config import EvaluatorConfig
21
+ from .config import EvaluatorConfig, ModelRequirement
18
22
 
19
23
  logger = logging.getLogger(__name__)
20
24
 
@@ -54,26 +58,39 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
54
58
  raise EvaluatorParseError(f"YAML must be a mapping, got {type(data).__name__}: {yml_file}")
55
59
 
56
60
  # Validate required fields exist
57
- required = [
61
+ # model and api_key_env are only required if model_requirement is not present
62
+ always_required = [
58
63
  "name",
59
64
  "description",
60
- "model",
61
- "api_key_env",
62
65
  "prompt",
63
66
  "output_suffix",
64
67
  ]
65
- missing = [f for f in required if f not in data]
68
+ has_model_requirement = "model_requirement" in data
69
+ if not has_model_requirement:
70
+ # Legacy format: model and api_key_env are required
71
+ always_required.extend(["model", "api_key_env"])
72
+
73
+ missing = [f for f in always_required if f not in data]
66
74
  if missing:
67
75
  raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
68
76
 
69
77
  # Validate required fields are strings (YAML can parse 'yes' as bool, '123' as int)
70
- for field in required:
78
+ for field in always_required:
71
79
  value = data[field]
72
80
  if not isinstance(value, str):
73
81
  raise EvaluatorParseError(
74
82
  f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
75
83
  )
76
84
 
85
+ # Validate model and api_key_env are strings if present (even when optional)
86
+ for field in ["model", "api_key_env"]:
87
+ if field in data and data[field] is not None:
88
+ value = data[field]
89
+ if not isinstance(value, str):
90
+ raise EvaluatorParseError(
91
+ f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
92
+ )
93
+
77
94
  # Validate name format (valid CLI command name)
78
95
  name = data["name"]
79
96
  if not re.match(r"^[a-zA-Z][a-zA-Z0-9_-]*$", name):
@@ -143,6 +160,67 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
143
160
  )
144
161
  data["timeout"] = 600
145
162
 
163
+ # Parse model_requirement if present (ADV-0015)
164
+ model_requirement = None
165
+ if "model_requirement" in data:
166
+ req_data = data["model_requirement"]
167
+
168
+ # Validate model_requirement is a mapping
169
+ if not isinstance(req_data, dict):
170
+ raise EvaluatorParseError(
171
+ f"model_requirement must be a mapping, got {type(req_data).__name__}"
172
+ )
173
+
174
+ # Validate required fields in model_requirement
175
+ if "family" not in req_data:
176
+ raise EvaluatorParseError("model_requirement.family is required")
177
+ if "tier" not in req_data:
178
+ raise EvaluatorParseError("model_requirement.tier is required")
179
+
180
+ # Validate family and tier are strings
181
+ family = req_data["family"]
182
+ tier = req_data["tier"]
183
+ if not isinstance(family, str):
184
+ raise EvaluatorParseError(
185
+ f"model_requirement.family must be a string, got {type(family).__name__}"
186
+ )
187
+ if not isinstance(tier, str):
188
+ raise EvaluatorParseError(
189
+ f"model_requirement.tier must be a string, got {type(tier).__name__}"
190
+ )
191
+
192
+ # Validate optional min_version is string if present
193
+ min_version = req_data.get("min_version", "")
194
+ # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
195
+ if isinstance(min_version, bool):
196
+ raise EvaluatorParseError(
197
+ f"model_requirement.min_version must be a string, got bool: {min_version!r}"
198
+ )
199
+ # Convert integers to strings (YAML parses '0' as int 0)
200
+ if isinstance(min_version, int):
201
+ min_version = str(min_version)
202
+ elif min_version and not isinstance(min_version, str):
203
+ raise EvaluatorParseError(
204
+ f"model_requirement.min_version must be a string, got {type(min_version).__name__}"
205
+ )
206
+
207
+ # Validate optional min_context is integer if present
208
+ min_context = req_data.get("min_context", 0)
209
+ # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
210
+ if isinstance(min_context, bool):
211
+ raise EvaluatorParseError("model_requirement.min_context must be an integer, got bool")
212
+ if min_context and not isinstance(min_context, int):
213
+ raise EvaluatorParseError(
214
+ f"model_requirement.min_context must be an integer, got {type(min_context).__name__}"
215
+ )
216
+
217
+ model_requirement = ModelRequirement(
218
+ family=family,
219
+ tier=tier,
220
+ min_version=min_version,
221
+ min_context=min_context,
222
+ )
223
+
146
224
  # Filter to known fields only (log unknown fields)
147
225
  known_fields = {
148
226
  "name",
@@ -156,17 +234,27 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
156
234
  "aliases",
157
235
  "version",
158
236
  "timeout",
237
+ "model_requirement", # ADV-0015
159
238
  }
160
239
  unknown = set(data.keys()) - known_fields
161
240
  if unknown:
162
241
  logger.warning("Unknown fields in %s: %s", yml_file.name, ", ".join(sorted(unknown)))
163
242
 
164
- # Build filtered data dict
165
- filtered_data = {k: v for k, v in data.items() if k in known_fields}
243
+ # Build filtered data dict (exclude model_requirement as it's handled separately)
244
+ scalar_fields = known_fields - {"model_requirement"}
245
+ filtered_data = {k: v for k, v in data.items() if k in scalar_fields}
246
+
247
+ # Set defaults for optional model/api_key_env when model_requirement is present
248
+ # Also handle explicit null values (YAML parses empty or null as None)
249
+ if "model" not in filtered_data or filtered_data["model"] is None:
250
+ filtered_data["model"] = ""
251
+ if "api_key_env" not in filtered_data or filtered_data["api_key_env"] is None:
252
+ filtered_data["api_key_env"] = ""
166
253
 
167
- # Create config with metadata
254
+ # Create config with metadata and model_requirement
168
255
  config = EvaluatorConfig(
169
256
  **filtered_data,
257
+ model_requirement=model_requirement,
170
258
  source="local",
171
259
  config_file=str(yml_file),
172
260
  )