PyPI - adversarial-workflow - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

adversarial-workflow 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

adversarial_workflow/evaluators/resolver.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""
+Model resolver for evaluator configurations (ADV-0015: Model Routing Layer - Phase 1).
+This module provides the ModelResolver class that resolves model requirements
+to actual model IDs using an embedded registry. It supports:
+- model_requirement field (new structured format)
+- model field (legacy string format)
+- Fallback from model_requirement to model on resolution failure
+"""
+from __future__ import annotations
+import warnings
+from typing import TYPE_CHECKING, ClassVar
+if TYPE_CHECKING:
+    from adversarial_workflow.evaluators.config import EvaluatorConfig, ModelRequirement
+class ResolutionError(Exception):
+    """Raised when model resolution fails."""
+class ModelResolver:
+    """Resolves model requirements to actual model IDs.
+    Uses an embedded registry (matching adversarial-evaluator-library/providers/registry.yml)
+    to map family/tier pairs to concrete model identifiers.
+    Resolution order:
+    1. If model_requirement present: resolve via registry
+    2. If resolution fails AND model present: warn + fallback to legacy
+    3. If resolution fails AND no model: raise ResolutionError
+    4. If no model_requirement AND model present: use legacy directly
+    5. If neither: raise ResolutionError
+    """
+    # Default registry - matches adversarial-evaluator-library/providers/registry.yml
+    # Updated 2026-02-03 per Library team handoff (ADR-0005)
+    DEFAULT_REGISTRY: ClassVar[dict[str, dict[str, dict[str, list[str] | str]]]] = {
+        "claude": {
+            "opus": {
+                "models": ["claude-4-opus-20260115", "claude-opus-4-5-20251101"],
+                "prefix": "anthropic/",
+            },
+            "sonnet": {
+                "models": ["claude-4-sonnet-20260115"],
+                "prefix": "anthropic/",
+            },
+            "haiku": {
+                "models": ["claude-4-haiku-20260115"],
+                "prefix": "anthropic/",
+            },
+        },
+        "gpt": {
+            "flagship": {
+                "models": ["gpt-4o", "gpt-4o-2024-08-06"],
+                "prefix": "",
+            },
+            "standard": {
+                "models": ["gpt-4-turbo", "gpt-4"],
+                "prefix": "",
+            },
+            "mini": {
+                "models": ["gpt-4o-mini"],
+                "prefix": "",
+            },
+        },
+        "o": {
+            "flagship": {
+                "models": ["o1", "o1-2024-12-17"],
+                "prefix": "",
+            },
+            "mini": {
+                "models": ["o3-mini"],
+                "prefix": "",
+            },
+        },
+        "gemini": {
+            "pro": {
+                "models": ["gemini-2.5-pro"],
+                "prefix": "gemini/",
+            },
+            "flash": {
+                "models": ["gemini-2.5-flash"],
+                "prefix": "gemini/",
+            },
+        },
+        "mistral": {
+            "large": {
+                "models": ["mistral-large-latest"],
+                "prefix": "mistral/",
+            },
+            "small": {
+                "models": ["mistral-small-latest"],
+                "prefix": "mistral/",
+            },
+        },
+        "codestral": {
+            "latest": {
+                "models": ["codestral-latest"],
+                "prefix": "mistral/",
+            },
+        },
+        "llama": {
+            "large": {
+                "models": ["llama-3.3-70b"],
+                "prefix": "",  # varies by host
+            },
+            "medium": {
+                "models": ["llama-3.1-8b"],
+                "prefix": "",
+            },
+        },
+    }
+    # API key environment variable mapping by family
+    API_KEY_MAP: ClassVar[dict[str, str]] = {
+        "claude": "ANTHROPIC_API_KEY",
+        "gpt": "OPENAI_API_KEY",
+        "o": "OPENAI_API_KEY",
+        "gemini": "GEMINI_API_KEY",
+        "mistral": "MISTRAL_API_KEY",
+        "codestral": "MISTRAL_API_KEY",
+        "llama": "TOGETHER_API_KEY",
+    }
+    def resolve(self, config: EvaluatorConfig) -> tuple[str, str]:
+        """Resolve evaluator config to (model_id, api_key_env).
+        Args:
+            config: EvaluatorConfig with model and/or model_requirement
+        Returns:
+            (model_id, api_key_env) tuple
+        Raises:
+            ResolutionError: If resolution fails and no fallback available
+        """
+        if config.model_requirement:
+            try:
+                return self._resolve_requirement(config.model_requirement)
+            except ResolutionError as e:
+                if config.model:
+                    # Fall back to legacy with warning
+                    warnings.warn(
+                        f"model_requirement resolution failed for {config.name}: {e}. "
+                        f"Falling back to legacy model field: {config.model}",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                    return (config.model, config.api_key_env)
+                raise
+        # Legacy only
+        if config.model:
+            return (config.model, config.api_key_env)
+        raise ResolutionError("No model or model_requirement specified")
+    def _resolve_requirement(self, req: ModelRequirement) -> tuple[str, str]:
+        """Resolve requirement to model ID using registry.
+        Args:
+            req: ModelRequirement with family and tier
+        Returns:
+            (model_id, api_key_env) tuple
+        Raises:
+            ResolutionError: If family or tier not found in registry
+        """
+        # TODO(Phase 2): ModelRequirement.min_version and ModelRequirement.min_context
+        # are currently parsed but not used for filtering. Phase 1 only performs
+        # family/tier matching. Phase 2 will implement filtering by min_version
+        # and min_context requirements.
+        family = self.DEFAULT_REGISTRY.get(req.family)
+        if not family:
+            raise ResolutionError(f"Unknown model family: {req.family}")
+        tier_data = family.get(req.tier)
+        if not tier_data:
+            raise ResolutionError(f"Unknown tier '{req.tier}' for family '{req.family}'")
+        # Return first (latest) model in tier
+        models = tier_data.get("models", [])
+        if not models:
+            raise ResolutionError(f"No models defined for {req.family}/{req.tier}")
+        # Registry type is list[str] | str for flexibility; actual values are always lists
+        model_id = models[0]  # type: ignore[index]
+        # Apply provider prefix for LiteLLM compatibility
+        prefix = tier_data.get("prefix", "")
+        if prefix:
+            model_id = f"{prefix}{model_id}"
+        # Determine API key env from family
+        api_key_env = self._get_api_key_env(req.family)
+        return (model_id, api_key_env)
+    def _get_api_key_env(self, family: str) -> str:
+        """Get default API key environment variable for family.
+        Args:
+            family: Model family name
+        Returns:
+            Environment variable name for API key
+        """
+        return self.API_KEY_MAP.get(family, f"{family.upper()}_API_KEY")

adversarial_workflow/evaluators/runner.py CHANGED Viewed

@@ -1,4 +1,9 @@
-"""Generic evaluator runner."""
+"""Generic evaluator runner.
+Supports dual-field model specification (ADV-0015):
+- Legacy: model + api_key_env fields (backwards compatible)
+- New: model_requirement field (resolved via ModelResolver)
+"""
 from __future__ import annotations
@@ -14,6 +19,7 @@ from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
 from ..utils.config import load_config
 from ..utils.validation import validate_evaluation_output
 from .config import EvaluatorConfig
+from .resolver import ModelResolver, ResolutionError
 def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -43,20 +49,28 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
         return 1
     project_config = load_config()
-    # 3. Check aider available
+    # 3. Resolve model (ADV-0015: dual-field support)
+    resolver = ModelResolver()
+    try:
+        resolved_model, resolved_api_key_env = resolver.resolve(config)
+    except ResolutionError as e:
+        print(f"{RED}Error: {e}{RESET}")
+        return 1
+    # 4. Check aider available
     if not shutil.which("aider"):
         print(f"{RED}Error: Aider not found{RESET}")
         _print_aider_help()
         return 1
-    # 4. Check API key
-    api_key = os.environ.get(config.api_key_env)
+    # 5. Check API key (using resolved api_key_env)
+    api_key = os.environ.get(resolved_api_key_env)
     if not api_key:
-        print(f"{RED}Error: {config.api_key_env} not set{RESET}")
-        print(f"   Set in .env or export {config.api_key_env}=your-key")
+        print(f"{RED}Error: {resolved_api_key_env} not set{RESET}")
+        print(f"   Set in .env or export {resolved_api_key_env}=your-key")
         return 1
-    # 5. Pre-flight file size check
+    # 6. Pre-flight file size check
     line_count, estimated_tokens = _check_file_size(file_path)
     if line_count > 500 or estimated_tokens > 20000:
         _warn_large_file(line_count, estimated_tokens)
@@ -65,11 +79,11 @@ def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -
                 print("Evaluation cancelled.")
                 return 0
-    # 6. Determine execution method
+    # 7. Determine execution method
     if config.source == "builtin":
         return _run_builtin_evaluator(config, file_path, project_config, timeout)
     else:
-        return _run_custom_evaluator(config, file_path, project_config, timeout)
+        return _run_custom_evaluator(config, file_path, project_config, timeout, resolved_model)
 def _run_builtin_evaluator(
@@ -99,8 +113,17 @@ def _run_custom_evaluator(
     file_path: str,
     project_config: dict,
     timeout: int,
+    resolved_model: str,
 ) -> int:
-    """Run a custom evaluator by invoking aider directly."""
+    """Run a custom evaluator by invoking aider directly.
+    Args:
+        config: Evaluator configuration
+        file_path: Path to file to evaluate
+        project_config: Project configuration dict
+        timeout: Timeout in seconds
+        resolved_model: Resolved model ID from ModelResolver
+    """
     # Prepare output path
     logs_dir = Path(project_config["log_directory"])
     logs_dir.mkdir(parents=True, exist_ok=True)
@@ -131,13 +154,13 @@ def _run_custom_evaluator(
     prefix = config.log_prefix or config.name.upper()
     try:
-        print(f"{prefix}: Using model {config.model}")
+        print(f"{prefix}: Using model {resolved_model}")
         # Build aider command
         cmd = [
             "aider",
             "--model",
-            config.model,
+            resolved_model,
             "--yes",
             "--no-detect-urls",
             "--no-git",
@@ -168,7 +191,7 @@ def _run_custom_evaluator(
 **Source**: {file_path}
 **Evaluator**: {config.name}
-**Model**: {config.model}
+**Model**: {resolved_model}
 **Generated**: {timestamp}
 ---

adversarial_workflow/library/__init__.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Evaluator library client for adversarial-workflow.
+This module provides functionality to browse, install, and update evaluator
+configurations from the community adversarial-evaluator-library.
+Philosophy: "Copy, Don't Link"
+- Evaluators are copied to projects, not referenced at runtime
+- Projects remain self-contained and work offline
+- Users can customize their local copies freely
+- Updates are explicit and user-controlled
+"""
+from .cache import DEFAULT_CACHE_DIR, DEFAULT_CACHE_TTL, CacheManager
+from .client import (
+    DEFAULT_LIBRARY_URL,
+    LibraryClient,
+    LibraryClientError,
+    NetworkError,
+    ParseError,
+)
+from .commands import (
+    library_check_updates,
+    library_info,
+    library_install,
+    library_list,
+    library_update,
+)
+from .config import LibraryConfig, get_library_config
+from .models import EvaluatorEntry, IndexData, InstalledEvaluatorMeta, UpdateInfo
+__all__ = [
+    # Client
+    "LibraryClient",
+    "LibraryClientError",
+    "NetworkError",
+    "ParseError",
+    "DEFAULT_LIBRARY_URL",
+    # Models
+    "EvaluatorEntry",
+    "IndexData",
+    "InstalledEvaluatorMeta",
+    "UpdateInfo",
+    # Cache
+    "CacheManager",
+    "DEFAULT_CACHE_DIR",
+    "DEFAULT_CACHE_TTL",
+    # Config
+    "LibraryConfig",
+    "get_library_config",
+    # Commands
+    "library_list",
+    "library_info",
+    "library_install",
+    "library_check_updates",
+    "library_update",
+]

adversarial_workflow/library/cache.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Cache management for the evaluator library client."""
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional
+# Default cache TTL: 1 hour (3600 seconds)
+DEFAULT_CACHE_TTL = 3600
+# Cache directory
+DEFAULT_CACHE_DIR = Path.home() / ".cache" / "adversarial-workflow"
+class CacheManager:
+    """Manages caching for the library client."""
+    def __init__(
+        self,
+        cache_dir: Optional[Path] = None,
+        ttl: int = DEFAULT_CACHE_TTL,
+    ):
+        """
+        Initialize the cache manager.
+        Args:
+            cache_dir: Directory to store cache files. Defaults to ~/.cache/adversarial-workflow
+            ttl: Time-to-live in seconds. Defaults to 3600 (1 hour).
+        """
+        self.cache_dir = cache_dir or DEFAULT_CACHE_DIR
+        self.ttl = ttl
+        self._ensure_cache_dir()
+    def _ensure_cache_dir(self) -> None:
+        """Ensure the cache directory exists."""
+        try:
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+        except OSError:
+            # If we can't create the cache dir, we'll operate without caching
+            pass
+    def _get_cache_path(self, key: str) -> Path:
+        """Get the path for a cache entry."""
+        # Sanitize key for filesystem
+        safe_key = key.replace("/", "_").replace(":", "_")
+        return self.cache_dir / f"{safe_key}.json"
+    def _is_expired(self, cache_path: Path) -> bool:
+        """Check if a cache entry is expired."""
+        if not cache_path.exists():
+            return True
+        try:
+            mtime = cache_path.stat().st_mtime
+            return (time.time() - mtime) > self.ttl
+        except OSError:
+            return True
+    def get(self, key: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a value from the cache.
+        Args:
+            key: The cache key.
+        Returns:
+            The cached value, or None if not found or expired.
+        """
+        cache_path = self._get_cache_path(key)
+        if not cache_path.exists():
+            return None
+        if self._is_expired(cache_path):
+            return None
+        try:
+            with open(cache_path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except (json.JSONDecodeError, OSError):
+            return None
+    def get_stale(self, key: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a value from the cache even if expired.
+        Useful for offline fallback scenarios.
+        Args:
+            key: The cache key.
+        Returns:
+            The cached value, or None if not found.
+        """
+        cache_path = self._get_cache_path(key)
+        if not cache_path.exists():
+            return None
+        try:
+            with open(cache_path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except (json.JSONDecodeError, OSError):
+            return None
+    def set(self, key: str, value: Dict[str, Any]) -> bool:
+        """
+        Store a value in the cache.
+        Args:
+            key: The cache key.
+            value: The value to cache.
+        Returns:
+            True if successfully cached, False otherwise.
+        """
+        cache_path = self._get_cache_path(key)
+        try:
+            self._ensure_cache_dir()
+            with open(cache_path, "w", encoding="utf-8") as f:
+                json.dump(value, f, indent=2)
+            return True
+        except OSError:
+            return False
+    def invalidate(self, key: str) -> bool:
+        """
+        Invalidate a cache entry.
+        Args:
+            key: The cache key.
+        Returns:
+            True if successfully invalidated, False otherwise.
+        """
+        cache_path = self._get_cache_path(key)
+        try:
+            if cache_path.exists():
+                cache_path.unlink()
+            return True
+        except OSError:
+            return False
+    def clear(self) -> int:
+        """
+        Clear all cache entries.
+        Returns:
+            The number of entries cleared.
+        """
+        count = 0
+        try:
+            for cache_file in self.cache_dir.glob("*.json"):
+                try:
+                    cache_file.unlink()
+                    count += 1
+                except OSError:
+                    pass
+        except OSError:
+            pass
+        return count
+    def get_age(self, key: str) -> Optional[float]:
+        """
+        Get the age of a cache entry in seconds.
+        Args:
+            key: The cache key.
+        Returns:
+            Age in seconds, or None if not found.
+        """
+        cache_path = self._get_cache_path(key)
+        if not cache_path.exists():
+            return None
+        try:
+            mtime = cache_path.stat().st_mtime
+            return time.time() - mtime
+        except OSError:
+            return None

adversarial-workflow 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

adversarial-workflow 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl