PyPI - adversarial-workflow - Versions diffs - 0.6.1__tar.gz → 0.6.3__tar.gz - Mend

adversarial-workflow 0.6.1tar.gz → 0.6.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: adversarial-workflow
-Version: 0.6.1
+Version: 0.6.3
 Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
 Author: Fredrik Matheson
 License: MIT
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
 - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
 - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
-## What's New in v0.6.0
+## What's New in v0.6.3
-🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
+### Upgrade
+```bash
+pip install --upgrade adversarial-workflow
+```
+### v0.6.3 - Configurable Timeouts
+- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
+- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
+- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
+- **Safety limits**: Maximum 600 seconds to prevent runaway processes
+### v0.6.2 - .env Loading & Stability
+- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
+- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
+- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
+### v0.6.0 - Plugin Architecture
+🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
 ```bash
 # Create a custom evaluator
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
 | `aliases` | No | Alternative command names |
 | `log_prefix` | No | CLI output prefix |
 | `fallback_model` | No | Fallback model if primary fails |
+| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
 | `version` | No | Evaluator version (default: 1.0.0) |
 ### Listing Available Evaluators

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/README.md RENAMED Viewed

@@ -20,9 +20,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
 - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
 - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
-## What's New in v0.6.0
+## What's New in v0.6.3
-🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
+### Upgrade
+```bash
+pip install --upgrade adversarial-workflow
+```
+### v0.6.3 - Configurable Timeouts
+- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
+- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
+- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
+- **Safety limits**: Maximum 600 seconds to prevent runaway processes
+### v0.6.2 - .env Loading & Stability
+- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
+- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
+- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
+### v0.6.0 - Plugin Architecture
+🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
 ```bash
 # Create a custom evaluator
@@ -424,6 +445,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
 | `aliases` | No | Alternative command names |
 | `log_prefix` | No | CLI output prefix |
 | `fallback_model` | No | Fallback model if primary fails |
+| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
 | `version` | No | Evaluator version (default: 1.0.0) |
 ### Listing Available Evaluators

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ Usage:
     adversarial validate "pytest"
 """
-__version__ = "0.6.1"
+__version__ = "0.6.2"
 __author__ = "Fredrik Matheson"
 __license__ = "MIT"

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__main__.py RENAMED Viewed

@@ -1,4 +1,5 @@
 """Allow execution via python -m adversarial_workflow."""
 from .cli import main
 if __name__ == "__main__":

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/cli.py RENAMED Viewed

@@ -27,9 +27,9 @@ from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 import yaml
-from dotenv import load_dotenv, dotenv_values
+from dotenv import dotenv_values, load_dotenv
-__version__ = "0.6.1"
+__version__ = "0.6.2"
 # ANSI color codes for better output
 RESET = "\033[0m"
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
         f"{GREEN}✅ Setup Complete!{RESET}",
         [
             "Created:",
-            "  ✓ .env (with your API keys - added to .gitignore)"
-            if (anthropic_key or openai_key)
-            else "  ⚠️ .env (skipped - no API keys provided)",
+            (
+                "  ✓ .env (with your API keys - added to .gitignore)"
+                if (anthropic_key or openai_key)
+                else "  ⚠️ .env (skipped - no API keys provided)"
+            ),
             "  ✓ .adversarial/config.yml",
             "  ✓ .adversarial/scripts/ (3 workflow scripts)",
             "  ✓ .aider.conf.yml (aider configuration)",
             "",
-            "Your configuration:"
-            if (anthropic_key or openai_key)
-            else "Configuration (no API keys yet):",
+            (
+                "Your configuration:"
+                if (anthropic_key or openai_key)
+                else "Configuration (no API keys yet):"
+            ),
             f"  Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
             f"  Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
             f"  Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
@@ -806,15 +810,14 @@ def check() -> int:
     if env_file.exists():
         try:
-            # Load .env into environment (idempotent - safe to call again after main())
-            load_dotenv(env_file)
-            # Use dotenv_values() to count variables directly from file
-            # This gives accurate count regardless of what was already in environment
+            # Count variables by reading file directly (works even if already loaded)
             env_vars = dotenv_values(env_file)
+            var_count = len([k for k, v in env_vars.items() if v is not None])
+            # Still load to ensure environment is set
+            load_dotenv(env_file)
             env_loaded = True
-            good_checks.append(
-                f".env file found ({len(env_vars)} variables configured)"
-            )
+            good_checks.append(f".env file found and loaded ({var_count} variables)")
         except (FileNotFoundError, PermissionError) as e:
             # File access errors
             issues.append(
@@ -2097,10 +2100,6 @@ def evaluate(task_file: str) -> int:
         return 0
 def review() -> int:
     """Run Phase 3: Code review."""
@@ -2289,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
                 )
                 return None
         else:
-            print(f"{RED}❌ ERROR: {template_type} template not found in package{RESET}")
+            print(
+                f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
+            )
             return None
     elif template_type == "custom" and url:
@@ -2739,54 +2740,61 @@ def agent_onboard(project_path: str = ".") -> int:
     return 0
-def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_run: bool = False):
+def split(
+    task_file: str,
+    strategy: str = "sections",
+    max_lines: int = 500,
+    dry_run: bool = False,
+):
     """Split large task files into smaller evaluable chunks.
     Args:
         task_file: Path to the task file to split
         strategy: Split strategy ('sections', 'phases', or 'manual')
         max_lines: Maximum lines per split (default: 500)
         dry_run: Preview splits without creating files
     Returns:
         Exit code (0 for success, 1 for error)
     """
     from .utils.file_splitter import (
-        analyze_task_file,
-        split_by_sections,
-        split_by_phases,
-        generate_split_files
+        analyze_task_file,
+        generate_split_files,
+        split_by_phases,
+        split_by_sections,
     )
     try:
         print_box("File Splitting Utility", CYAN)
         # Validate file exists
         if not os.path.exists(task_file):
             print(f"{RED}Error: File not found: {task_file}{RESET}")
             return 1
         # Analyze file
         print(f"📄 Analyzing task file: {task_file}")
         analysis = analyze_task_file(task_file)
-        lines = analysis['total_lines']
-        tokens = analysis['estimated_tokens']
+        lines = analysis["total_lines"]
+        tokens = analysis["estimated_tokens"]
         print(f"   Lines: {lines}")
         print(f"   Estimated tokens: ~{tokens:,}")
         # Check if splitting is recommended
         if lines <= max_lines:
-            print(f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}")
+            print(
+                f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
+            )
             print("No splitting needed.")
             return 0
         print(f"{YELLOW}⚠️  File exceeds recommended limit ({max_lines} lines){RESET}")
         # Read file content for splitting
-        with open(task_file, 'r', encoding='utf-8') as f:
+        with open(task_file, "r", encoding="utf-8") as f:
             content = f.read()
         # Apply split strategy
         if strategy == "sections":
             splits = split_by_sections(content, max_lines=max_lines)
@@ -2795,42 +2803,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
             splits = split_by_phases(content)
             print(f"\n💡 Suggested splits (by phases):")
         else:
-            print(f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}")
+            print(
+                f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
+            )
             return 1
         # Display split preview
         for i, split in enumerate(splits, 1):
             filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
             print(f"   - {filename} ({split['line_count']} lines)")
         # Dry run mode
         if dry_run:
             print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
             return 0
         # Prompt user for confirmation
         create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
-        if create_files.lower() in ['y', 'yes']:
+        if create_files.lower() in ["y", "yes"]:
             # Create output directory
             output_dir = os.path.join(os.path.dirname(task_file), "splits")
             # Generate split files
             created_files = generate_split_files(task_file, splits, output_dir)
             print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
             for file_path in created_files:
                 print(f"   {file_path}")
             print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
             for file_path in created_files:
                 rel_path = os.path.relpath(file_path)
                 print(f"   adversarial evaluate {rel_path}")
         else:
             print("Cancelled - no files created.")
         return 0
     except Exception as e:
         print(f"{RED}Error during file splitting: {e}{RESET}")
         return 1
@@ -2876,6 +2886,7 @@ def list_evaluators() -> int:
     return 0
 def main():
     """Main CLI entry point."""
     import logging
@@ -2888,10 +2899,20 @@ def main():
     except Exception as e:
         print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
+    # Load .env file before any commands run
+    # Use explicit path to ensure we find .env in current working directory
+    # (load_dotenv() without args can fail to find .env in some contexts)
+    env_file = Path.cwd() / ".env"
+    if env_file.exists():
+        try:
+            load_dotenv(env_file)
+        except (OSError, UnicodeDecodeError) as e:
+            print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
     from adversarial_workflow.evaluators import (
+        BUILTIN_EVALUATORS,
         get_all_evaluators,
         run_evaluator,
-        BUILTIN_EVALUATORS,
     )
     logger = logging.getLogger(__name__)
@@ -2899,8 +2920,16 @@ def main():
     # Commands that cannot be overridden by evaluators
     # Note: 'review' is special - it reviews git changes without a file argument
     STATIC_COMMANDS = {
-        "init", "check", "doctor", "health", "quickstart",
-        "agent", "split", "validate", "review", "list-evaluators"
+        "init",
+        "check",
+        "doctor",
+        "health",
+        "quickstart",
+        "agent",
+        "split",
+        "validate",
+        "review",
+        "list-evaluators",
     }
     parser = argparse.ArgumentParser(
@@ -2989,16 +3018,21 @@ For more information: https://github.com/movito/adversarial-workflow
     )
     split_parser.add_argument("task_file", help="Task file to split")
     split_parser.add_argument(
-        "--strategy", "-s", choices=["sections", "phases"], default="sections",
-        help="Split strategy: 'sections' (default) or 'phases'"
+        "--strategy",
+        "-s",
+        choices=["sections", "phases"],
+        default="sections",
+        help="Split strategy: 'sections' (default) or 'phases'",
     )
     split_parser.add_argument(
-        "--max-lines", "-m", type=int, default=500,
-        help="Maximum lines per split (default: 500)"
+        "--max-lines",
+        "-m",
+        type=int,
+        default=500,
+        help="Maximum lines per split (default: 500)",
     )
     split_parser.add_argument(
-        "--dry-run", action="store_true",
-        help="Preview splits without creating files"
+        "--dry-run", action="store_true", help="Preview splits without creating files"
     )
     # list-evaluators command
@@ -3019,7 +3053,12 @@ For more information: https://github.com/movito/adversarial-workflow
     for name, config in evaluators.items():
         # Skip if name conflicts with static command
         if name in STATIC_COMMANDS:
-            logger.warning("Evaluator '%s' conflicts with CLI command; skipping", name)
+            # Only warn for user-defined evaluators, not built-ins
+            # Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
+            if getattr(config, "source", None) != "builtin":
+                logger.warning(
+                    "Evaluator '%s' conflicts with CLI command; skipping", name
+                )
             # Mark as registered to prevent alias re-registration attempts
             registered_configs.add(id(config))
             continue
@@ -3046,10 +3085,11 @@ For more information: https://github.com/movito/adversarial-workflow
         )
         eval_parser.add_argument("file", help="File to evaluate")
         eval_parser.add_argument(
-            "--timeout", "-t",
+            "--timeout",
+            "-t",
             type=int,
-            default=180,
-            help="Timeout in seconds (default: 180)"
+            default=None,
+            help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
         )
         # Store config for later execution
         eval_parser.set_defaults(evaluator_config=config)
@@ -3062,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
     # Check for evaluator command first (has evaluator_config attribute)
     if hasattr(args, "evaluator_config"):
+        # Determine timeout: CLI flag > YAML config > default (180s)
+        if args.timeout is not None:
+            timeout = args.timeout
+            source = "CLI override"
+        elif args.evaluator_config.timeout != 180:
+            timeout = args.evaluator_config.timeout
+            source = "evaluator config"
+        else:
+            timeout = args.evaluator_config.timeout  # 180 (default)
+            source = "default"
+        # Validate CLI timeout (consistent with YAML validation)
+        if timeout <= 0:
+            print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
+            return 1
+        if timeout > 600:
+            print(
+                f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
+            )
+            timeout = 600
+        # Log actual timeout and source
+        print(f"Using timeout: {timeout}s ({source})")
         return run_evaluator(
             args.evaluator_config,
             args.file,
-            timeout=args.timeout,
+            timeout=timeout,
         )
     # Execute static commands
@@ -3097,7 +3161,7 @@ For more information: https://github.com/movito/adversarial-workflow
             args.task_file,
             strategy=args.strategy,
             max_lines=args.max_lines,
-            dry_run=args.dry_run
+            dry_run=args.dry_run,
         )
     elif args.command == "list-evaluators":
         return list_evaluators()

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/__init__.py RENAMED Viewed

@@ -1,13 +1,13 @@
 """Evaluators module for adversarial-workflow plugin architecture."""
+from .builtins import BUILTIN_EVALUATORS
 from .config import EvaluatorConfig
 from .discovery import (
+    EvaluatorParseError,
     discover_local_evaluators,
     parse_evaluator_yaml,
-    EvaluatorParseError,
 )
 from .runner import run_evaluator
-from .builtins import BUILTIN_EVALUATORS
 def get_all_evaluators() -> dict[str, EvaluatorConfig]:
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
     Aliases from local evaluators are also included in the returned dictionary.
     """
     import logging
     logger = logging.getLogger(__name__)
     evaluators: dict[str, EvaluatorConfig] = {}

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py RENAMED Viewed

@@ -26,6 +26,7 @@ class EvaluatorConfig:
         fallback_model: Fallback model if primary fails
         aliases: Alternative command names
         version: Evaluator version
+        timeout: Timeout in seconds (default: 180, max: 600)
         source: "builtin" or "local" (set internally)
         config_file: Path to YAML file if local (set internally)
     """
@@ -43,6 +44,7 @@ class EvaluatorConfig:
     fallback_model: str | None = None
     aliases: list[str] = field(default_factory=list)
     version: str = "1.0.0"
+    timeout: int = 180  # Timeout in seconds (default: 180, max: 600)
     # Metadata (set internally during discovery, not from YAML)
     source: str = "builtin"

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/discovery.py RENAMED Viewed

@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
     try:
         content = yml_file.read_text(encoding="utf-8")
     except UnicodeDecodeError as e:
-        raise EvaluatorParseError(
-            f"File encoding error (not UTF-8): {yml_file}"
-        ) from e
+        raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
     # Parse YAML
     data = yaml.safe_load(content)
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
         )
     # Validate required fields exist
-    required = ["name", "description", "model", "api_key_env", "prompt", "output_suffix"]
+    required = [
+        "name",
+        "description",
+        "model",
+        "api_key_env",
+        "prompt",
+        "output_suffix",
+    ]
     missing = [f for f in required if f not in data]
     if missing:
         raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
@@ -117,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
                     f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
                 )
+    # Validate timeout if present
+    if "timeout" in data:
+        timeout = data["timeout"]
+        # Handle null/empty values
+        if timeout is None or timeout == "":
+            raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
+        # Check for bool before int (bool is subclass of int in Python)
+        # YAML parses 'yes'/'true' as True, 'no'/'false' as False
+        if isinstance(timeout, bool):
+            raise EvaluatorParseError(
+                f"Field 'timeout' must be an integer, got bool: {timeout!r}"
+            )
+        if not isinstance(timeout, int):
+            raise EvaluatorParseError(
+                f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
+            )
+        # timeout=0 is invalid (does not disable timeout - use a large value instead)
+        if timeout <= 0:
+            raise EvaluatorParseError(
+                f"Field 'timeout' must be positive (> 0), got {timeout}"
+            )
+        if timeout > 600:
+            logger.warning(
+                "Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
+                timeout,
+                yml_file.name,
+            )
+            data["timeout"] = 600
     # Filter to known fields only (log unknown fields)
     known_fields = {
         "name",
@@ -129,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
         "fallback_model",
         "aliases",
         "version",
+        "timeout",
     }
     unknown = set(data.keys()) - known_fields
     if unknown:

{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py RENAMED Viewed

@@ -10,10 +10,10 @@ import tempfile
 from datetime import datetime, timezone
 from pathlib import Path
-from .config import EvaluatorConfig
-from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
+from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
 from ..utils.config import load_config
 from ..utils.validation import validate_evaluation_output
+from .config import EvaluatorConfig
 def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
 """
     # Create temp file for prompt
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
         f.write(full_prompt)
         prompt_file = f.name
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
         # Build aider command
         cmd = [
             "aider",
-            "--model", config.model,
+            "--model",
+            config.model,
             "--yes",
             "--no-git",
             "--no-auto-commits",
-            "--message-file", prompt_file,
-            "--read", file_path,
+            "--message-file",
+            prompt_file,
+            "--read",
+            file_path,
         ]
         result = subprocess.run(
@@ -224,7 +227,10 @@ def _execute_script(
     # Validate output
     file_basename = Path(file_path).stem
-    log_file = Path(project_config["log_directory"]) / f"{file_basename}-{config.output_suffix}.md"
+    log_file = (
+        Path(project_config["log_directory"])
+        / f"{file_basename}-{config.output_suffix}.md"
+    )
     is_valid, verdict, message = validate_evaluation_output(str(log_file))
@@ -235,7 +241,9 @@ def _execute_script(
     return _report_verdict(verdict, log_file, config)
-def _report_verdict(verdict: str | None, log_file: Path, config: EvaluatorConfig) -> int:
+def _report_verdict(
+    verdict: str | None, log_file: Path, config: EvaluatorConfig
+) -> int:
     """Report the evaluation verdict to terminal."""
     print()
     if verdict == "APPROVED":

adversarial-workflow 0.6.1__tar.gz → 0.6.3__tar.gz

adversarial-workflow 0.6.1tar.gz → 0.6.3tar.gz