PyPI - adversarial-workflow - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

adversarial-workflow 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

adversarial_workflow/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ Usage:
     adversarial validate "pytest"
 """
-__version__ = "0.7.0"
+__version__ = "0.9.0"
 __author__ = "Fredrik Matheson"
 __license__ = "MIT"

adversarial_workflow/cli.py CHANGED Viewed

@@ -30,7 +30,7 @@ from typing import Dict, List, Optional, Tuple
 import yaml
 from dotenv import dotenv_values, load_dotenv
-__version__ = "0.7.0"
+__version__ = "0.9.0"
 # ANSI color codes for better output
 RESET = "\033[0m"
@@ -2944,6 +2944,7 @@ def main():
     from adversarial_workflow.evaluators import (
         BUILTIN_EVALUATORS,
+        discover_local_evaluators,
         get_all_evaluators,
         run_evaluator,
     )
@@ -2959,6 +2960,7 @@ def main():
         "health",
         "quickstart",
         "agent",
+        "library",
         "split",
         "validate",
         "review",
@@ -2982,6 +2984,8 @@ Examples:
   adversarial validate "npm test"       # Validate with tests
   adversarial split large-task.md       # Split large files
   adversarial check-citations doc.md    # Verify URLs in document
+  adversarial library list              # Browse available evaluators
+  adversarial library install google/gemini-flash  # Install evaluator
 For more information: https://github.com/movito/adversarial-workflow
         """,
@@ -3028,6 +3032,98 @@ For more information: https://github.com/movito/adversarial-workflow
         "--path", default=".", help="Project path (default: current directory)"
     )
+    # library command (with subcommands)
+    library_parser = subparsers.add_parser(
+        "library", help="Browse and install evaluators from the community library"
+    )
+    library_subparsers = library_parser.add_subparsers(
+        dest="library_subcommand", help="Library subcommand"
+    )
+    # library list subcommand
+    library_list_parser = library_subparsers.add_parser(
+        "list", help="List available evaluators from the library"
+    )
+    library_list_parser.add_argument(
+        "--provider", "-p", help="Filter by provider (e.g., google, openai)"
+    )
+    library_list_parser.add_argument(
+        "--category", "-c", help="Filter by category (e.g., quick-check, deep-reasoning)"
+    )
+    library_list_parser.add_argument(
+        "--verbose", "-v", action="store_true", help="Show detailed information"
+    )
+    library_list_parser.add_argument(
+        "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
+    )
+    # library info subcommand
+    library_info_parser = library_subparsers.add_parser(
+        "info", help="Show detailed information about an evaluator"
+    )
+    library_info_parser.add_argument(
+        "evaluator_spec", help="Evaluator to show info for (format: provider/name)"
+    )
+    # library install subcommand
+    library_install_parser = library_subparsers.add_parser(
+        "install", help="Install evaluator(s) from the library"
+    )
+    library_install_parser.add_argument(
+        "evaluators", nargs="*", help="Evaluator(s) to install (format: provider/name)"
+    )
+    library_install_parser.add_argument(
+        "--force", "-f", action="store_true", help="Overwrite existing files"
+    )
+    library_install_parser.add_argument(
+        "--skip-validation", action="store_true", help="Skip YAML validation (advanced)"
+    )
+    library_install_parser.add_argument(
+        "--dry-run", action="store_true", help="Preview without making changes"
+    )
+    library_install_parser.add_argument("--category", help="Install all evaluators in a category")
+    library_install_parser.add_argument(
+        "--yes", "-y", action="store_true", help="Skip confirmation prompts (required for CI/CD)"
+    )
+    # library check-updates subcommand
+    library_check_parser = library_subparsers.add_parser(
+        "check-updates", help="Check for updates to installed evaluators"
+    )
+    library_check_parser.add_argument(
+        "name", nargs="?", help="Specific evaluator to check (optional)"
+    )
+    library_check_parser.add_argument(
+        "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
+    )
+    # library update subcommand
+    library_update_parser = library_subparsers.add_parser(
+        "update", help="Update installed evaluator(s) to newer versions"
+    )
+    library_update_parser.add_argument("name", nargs="?", help="Evaluator name to update")
+    library_update_parser.add_argument(
+        "--all",
+        "-a",
+        action="store_true",
+        dest="all_evaluators",
+        help="Update all outdated evaluators",
+    )
+    library_update_parser.add_argument(
+        "--yes", "-y", action="store_true", help="Skip confirmation prompts"
+    )
+    library_update_parser.add_argument(
+        "--diff-only", action="store_true", help="Show diff without applying changes"
+    )
+    library_update_parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Preview without making changes (same as --diff-only)",
+    )
+    library_update_parser.add_argument(
+        "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
+    )
     # review command (static - reviews git changes, no file argument)
     subparsers.add_parser("review", help="Run Phase 3: Code review")
@@ -3149,6 +3245,15 @@ For more information: https://github.com/movito/adversarial-workflow
             action="store_true",
             help="Verify URLs in document before evaluation",
         )
+        # Add --evaluator flag for the "evaluate" command only
+        # This allows selecting a library-installed evaluator
+        if config.name == "evaluate":
+            eval_parser.add_argument(
+                "--evaluator",
+                "-e",
+                metavar="NAME",
+                help="Use a specific evaluator from .adversarial/evaluators/",
+            )
         # Store config for later execution
         eval_parser.set_defaults(evaluator_config=config)
@@ -3160,15 +3265,45 @@ For more information: https://github.com/movito/adversarial-workflow
     # Check for evaluator command first (has evaluator_config attribute)
     if hasattr(args, "evaluator_config"):
+        # Default to the command's evaluator config
+        config_to_use = args.evaluator_config
+        # Check if --evaluator flag was specified (only on evaluate command)
+        evaluator_override = getattr(args, "evaluator", None)
+        if evaluator_override:
+            local_evaluators = discover_local_evaluators()
+            if not local_evaluators:
+                print(f"{RED}Error: No evaluators installed.{RESET}")
+                print("Install evaluators with: adversarial library install <name>")
+                return 1
+            if evaluator_override not in local_evaluators:
+                print(f"{RED}Error: Evaluator '{evaluator_override}' not found.{RESET}")
+                print()
+                print("Available evaluators:")
+                # Show unique evaluators (avoid duplicates from aliases)
+                seen = set()
+                for _, cfg in sorted(local_evaluators.items()):
+                    if id(cfg) not in seen:
+                        print(f"  {cfg.name}")
+                        if cfg.aliases:
+                            print(f"    aliases: {', '.join(cfg.aliases)}")
+                        seen.add(id(cfg))
+                return 1
+            config_to_use = local_evaluators[evaluator_override]
+            print(f"Using evaluator: {config_to_use.name}")
         # Determine timeout: CLI flag > YAML config > default (180s)
         if args.timeout is not None:
             timeout = args.timeout
             source = "CLI override"
-        elif args.evaluator_config.timeout != 180:
-            timeout = args.evaluator_config.timeout
+        elif config_to_use.timeout != 180:
+            timeout = config_to_use.timeout
             source = "evaluator config"
         else:
-            timeout = args.evaluator_config.timeout  # 180 (default)
+            timeout = config_to_use.timeout  # 180 (default)
             source = "default"
         # Validate CLI timeout (consistent with YAML validation)
@@ -3195,7 +3330,7 @@ For more information: https://github.com/movito/adversarial-workflow
             print()
         return run_evaluator(
-            args.evaluator_config,
+            config_to_use,
             args.file,
             timeout=timeout,
         )
@@ -3220,6 +3355,59 @@ For more information: https://github.com/movito/adversarial-workflow
             print(f"{RED}Error: agent command requires a subcommand{RESET}")
             print("Usage: adversarial agent onboard")
             return 1
+    elif args.command == "library":
+        from adversarial_workflow.library import (
+            library_check_updates,
+            library_info,
+            library_install,
+            library_list,
+            library_update,
+        )
+        if args.library_subcommand == "list":
+            return library_list(
+                provider=args.provider,
+                category=args.category,
+                verbose=args.verbose,
+                no_cache=args.no_cache,
+            )
+        elif args.library_subcommand == "info":
+            return library_info(
+                evaluator_spec=args.evaluator_spec,
+            )
+        elif args.library_subcommand == "install":
+            return library_install(
+                evaluator_specs=args.evaluators,
+                force=args.force,
+                skip_validation=args.skip_validation,
+                dry_run=args.dry_run,
+                category=args.category,
+                yes=args.yes,
+            )
+        elif args.library_subcommand == "check-updates":
+            return library_check_updates(
+                name=args.name,
+                no_cache=args.no_cache,
+            )
+        elif args.library_subcommand == "update":
+            return library_update(
+                name=args.name,
+                all_evaluators=args.all_evaluators,
+                yes=args.yes,
+                diff_only=args.diff_only,
+                no_cache=args.no_cache,
+                dry_run=args.dry_run,
+            )
+        else:
+            # No subcommand provided
+            print(f"{RED}Error: library command requires a subcommand{RESET}")
+            print("Usage:")
+            print("  adversarial library list")
+            print("  adversarial library info <provider>/<name>")
+            print("  adversarial library install <provider>/<name>")
+            print("  adversarial library check-updates")
+            print("  adversarial library update <name>")
+            return 1
     elif args.command == "review":
         return review()
     elif args.command == "validate":

adversarial_workflow/evaluators/__init__.py CHANGED Viewed

@@ -1,12 +1,18 @@
-"""Evaluators module for adversarial-workflow plugin architecture."""
+"""Evaluators module for adversarial-workflow plugin architecture.
+Supports dual-field model specification (ADV-0015):
+- Legacy: model + api_key_env fields (backwards compatible)
+- New: model_requirement field (resolved via ModelResolver)
+"""
 from .builtins import BUILTIN_EVALUATORS
-from .config import EvaluatorConfig
+from .config import EvaluatorConfig, ModelRequirement
 from .discovery import (
     EvaluatorParseError,
     discover_local_evaluators,
     parse_evaluator_yaml,
 )
+from .resolver import ModelResolver, ResolutionError
 from .runner import run_evaluator
@@ -38,6 +44,9 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
 __all__ = [
     "EvaluatorConfig",
     "EvaluatorParseError",
+    "ModelRequirement",
+    "ModelResolver",
+    "ResolutionError",
     "run_evaluator",
     "get_all_evaluators",
     "discover_local_evaluators",

adversarial_workflow/evaluators/config.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 EvaluatorConfig dataclass for evaluator definitions.
+Supports dual-field model specification (ADV-0015):
+- Legacy: model + api_key_env fields (backwards compatible)
+- New: model_requirement field (structured capability requirements)
 """
 from __future__ import annotations
@@ -7,6 +11,27 @@ from __future__ import annotations
 from dataclasses import dataclass, field
+@dataclass
+class ModelRequirement:
+    """Model capability requirements (from library).
+    This dataclass represents structured model requirements that can be
+    resolved to actual model IDs via the ModelResolver. It separates
+    WHAT capability is needed from HOW to access it.
+    Attributes:
+        family: Model family (e.g., "claude", "gpt", "o", "gemini", "mistral", "codestral", "llama")
+        tier: Performance tier (e.g., "opus", "sonnet", "haiku", "flagship", "mini", "latest")
+        min_version: Optional minimum model generation (e.g., "4" for Claude 4+)
+        min_context: Optional minimum context window in tokens (e.g., 128000)
+    """
+    family: str
+    tier: str
+    min_version: str = ""
+    min_context: int = 0
 @dataclass
 class EvaluatorConfig:
     """Configuration for an evaluator (built-in or custom).
@@ -15,11 +40,18 @@ class EvaluatorConfig:
     whether built-in (evaluate, proofread, review) or custom
     (defined in .adversarial/evaluators/*.yml).
+    Supports dual-field model specification (ADV-0015):
+    - Legacy: model + api_key_env fields (always backwards compatible)
+    - New: model_requirement field (resolved via ModelResolver)
+    When both are present, model_requirement takes precedence. If resolution
+    fails, falls back to legacy model field with a warning.
     Attributes:
         name: Command name (e.g., "evaluate", "athena")
         description: Help text shown in CLI
-        model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro")
-        api_key_env: Environment variable name for API key
+        model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro") - legacy field
+        api_key_env: Environment variable name for API key - legacy field
         prompt: The evaluation prompt template
         output_suffix: Log file suffix (e.g., "PLAN-EVALUATION")
         log_prefix: CLI output prefix (e.g., "ATHENA")
@@ -27,6 +59,7 @@ class EvaluatorConfig:
         aliases: Alternative command names
         version: Evaluator version
         timeout: Timeout in seconds (default: 180, max: 600)
+        model_requirement: Structured model requirement (resolved via ModelResolver)
         source: "builtin" or "local" (set internally)
         config_file: Path to YAML file if local (set internally)
     """
@@ -46,6 +79,10 @@ class EvaluatorConfig:
     version: str = "1.0.0"
     timeout: int = 180  # Timeout in seconds (default: 180, max: 600)
+    # NEW: Structured model requirement (Phase 1 - ADV-0015)
+    # When present, resolved via ModelResolver to actual model ID
+    model_requirement: ModelRequirement | None = None
     # Metadata (set internally during discovery, not from YAML)
     source: str = "builtin"
     config_file: str | None = None

adversarial_workflow/evaluators/discovery.py CHANGED Viewed

@@ -4,6 +4,10 @@ YAML parsing and discovery for custom evaluators.
 This module handles discovering evaluator definitions from
 .adversarial/evaluators/*.yml files and parsing them into
 EvaluatorConfig objects.
+Supports dual-field model specification (ADV-0015):
+- Legacy: model + api_key_env fields (backwards compatible)
+- New: model_requirement field (resolved via ModelResolver)
 """
 from __future__ import annotations
@@ -14,7 +18,7 @@ from pathlib import Path
 import yaml
-from .config import EvaluatorConfig
+from .config import EvaluatorConfig, ModelRequirement
 logger = logging.getLogger(__name__)
@@ -54,26 +58,39 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
         raise EvaluatorParseError(f"YAML must be a mapping, got {type(data).__name__}: {yml_file}")
     # Validate required fields exist
-    required = [
+    # model and api_key_env are only required if model_requirement is not present
+    always_required = [
         "name",
         "description",
-        "model",
-        "api_key_env",
         "prompt",
         "output_suffix",
     ]
-    missing = [f for f in required if f not in data]
+    has_model_requirement = "model_requirement" in data
+    if not has_model_requirement:
+        # Legacy format: model and api_key_env are required
+        always_required.extend(["model", "api_key_env"])
+    missing = [f for f in always_required if f not in data]
     if missing:
         raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
     # Validate required fields are strings (YAML can parse 'yes' as bool, '123' as int)
-    for field in required:
+    for field in always_required:
         value = data[field]
         if not isinstance(value, str):
             raise EvaluatorParseError(
                 f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
             )
+    # Validate model and api_key_env are strings if present (even when optional)
+    for field in ["model", "api_key_env"]:
+        if field in data and data[field] is not None:
+            value = data[field]
+            if not isinstance(value, str):
+                raise EvaluatorParseError(
+                    f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
+                )
     # Validate name format (valid CLI command name)
     name = data["name"]
     if not re.match(r"^[a-zA-Z][a-zA-Z0-9_-]*$", name):
@@ -143,6 +160,67 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
             )
             data["timeout"] = 600
+    # Parse model_requirement if present (ADV-0015)
+    model_requirement = None
+    if "model_requirement" in data:
+        req_data = data["model_requirement"]
+        # Validate model_requirement is a mapping
+        if not isinstance(req_data, dict):
+            raise EvaluatorParseError(
+                f"model_requirement must be a mapping, got {type(req_data).__name__}"
+            )
+        # Validate required fields in model_requirement
+        if "family" not in req_data:
+            raise EvaluatorParseError("model_requirement.family is required")
+        if "tier" not in req_data:
+            raise EvaluatorParseError("model_requirement.tier is required")
+        # Validate family and tier are strings
+        family = req_data["family"]
+        tier = req_data["tier"]
+        if not isinstance(family, str):
+            raise EvaluatorParseError(
+                f"model_requirement.family must be a string, got {type(family).__name__}"
+            )
+        if not isinstance(tier, str):
+            raise EvaluatorParseError(
+                f"model_requirement.tier must be a string, got {type(tier).__name__}"
+            )
+        # Validate optional min_version is string if present
+        min_version = req_data.get("min_version", "")
+        # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
+        if isinstance(min_version, bool):
+            raise EvaluatorParseError(
+                f"model_requirement.min_version must be a string, got bool: {min_version!r}"
+            )
+        # Convert integers to strings (YAML parses '0' as int 0)
+        if isinstance(min_version, int):
+            min_version = str(min_version)
+        elif min_version and not isinstance(min_version, str):
+            raise EvaluatorParseError(
+                f"model_requirement.min_version must be a string, got {type(min_version).__name__}"
+            )
+        # Validate optional min_context is integer if present
+        min_context = req_data.get("min_context", 0)
+        # Reject booleans explicitly (YAML parses 'yes'/'no'/'true'/'false' as bool)
+        if isinstance(min_context, bool):
+            raise EvaluatorParseError("model_requirement.min_context must be an integer, got bool")
+        if min_context and not isinstance(min_context, int):
+            raise EvaluatorParseError(
+                f"model_requirement.min_context must be an integer, got {type(min_context).__name__}"
+            )
+        model_requirement = ModelRequirement(
+            family=family,
+            tier=tier,
+            min_version=min_version,
+            min_context=min_context,
+        )
     # Filter to known fields only (log unknown fields)
     known_fields = {
         "name",
@@ -156,17 +234,27 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
         "aliases",
         "version",
         "timeout",
+        "model_requirement",  # ADV-0015
     }
     unknown = set(data.keys()) - known_fields
     if unknown:
         logger.warning("Unknown fields in %s: %s", yml_file.name, ", ".join(sorted(unknown)))
-    # Build filtered data dict
-    filtered_data = {k: v for k, v in data.items() if k in known_fields}
+    # Build filtered data dict (exclude model_requirement as it's handled separately)
+    scalar_fields = known_fields - {"model_requirement"}
+    filtered_data = {k: v for k, v in data.items() if k in scalar_fields}
+    # Set defaults for optional model/api_key_env when model_requirement is present
+    # Also handle explicit null values (YAML parses empty or null as None)
+    if "model" not in filtered_data or filtered_data["model"] is None:
+        filtered_data["model"] = ""
+    if "api_key_env" not in filtered_data or filtered_data["api_key_env"] is None:
+        filtered_data["api_key_env"] = ""
-    # Create config with metadata
+    # Create config with metadata and model_requirement
     config = EvaluatorConfig(
         **filtered_data,
+        model_requirement=model_requirement,
         source="local",
         config_file=str(yml_file),
     )

adversarial-workflow 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

adversarial-workflow 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl