adversarial-workflow 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/PKG-INFO +34 -4
  2. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/README.md +33 -3
  3. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/__init__.py +1 -1
  4. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/cli.py +111 -58
  5. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/__init__.py +3 -2
  6. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/discovery.py +9 -4
  7. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/runner.py +16 -8
  8. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/file_splitter.py +218 -184
  9. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/validation.py +3 -1
  10. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/PKG-INFO +34 -4
  11. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/SOURCES.txt +2 -0
  12. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/pyproject.toml +3 -1
  13. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_cli.py +24 -69
  14. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_cli_dynamic_commands.py +99 -200
  15. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_config.py +49 -44
  16. adversarial_workflow-0.6.2/tests/test_env_loading.py +176 -0
  17. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_evaluate.py +177 -129
  18. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_evaluator_discovery.py +3 -1
  19. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_evaluator_runner.py +18 -5
  20. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_file_splitter.py +105 -103
  21. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_list_evaluators.py +24 -45
  22. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_python_version.py +16 -16
  23. adversarial_workflow-0.6.2/tests/test_scripts_project.py +120 -0
  24. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_split_command.py +45 -37
  25. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_utils_validation.py +26 -10
  26. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/LICENSE +0 -0
  27. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/__main__.py +0 -0
  28. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/builtins.py +0 -0
  29. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/config.py +0 -0
  30. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
  31. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/.env.example.template +0 -0
  32. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/README.template +0 -0
  33. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
  34. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
  35. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
  36. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
  37. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
  38. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/config.yml.template +0 -0
  39. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
  40. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/example-task.md.template +0 -0
  41. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
  42. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
  43. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
  44. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/__init__.py +0 -0
  45. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/colors.py +0 -0
  46. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/config.py +0 -0
  47. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
  48. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/entry_points.txt +0 -0
  49. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/requires.txt +0 -0
  50. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/top_level.txt +0 -0
  51. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/setup.cfg +0 -0
  52. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/setup.py +0 -0
  53. {adversarial_workflow-0.6.0 → adversarial_workflow-0.6.2}/tests/test_evaluator_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -35,6 +35,10 @@ Dynamic: license-file
35
35
 
36
36
  # Adversarial Workflow
37
37
 
38
+ [![PyPI version](https://badge.fury.io/py/adversarial-workflow.svg)](https://pypi.org/project/adversarial-workflow/)
39
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
40
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
41
+
38
42
  **A multi-stage AI code review system that makes your code better**
39
43
 
40
44
  Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to implement but not delivering) through adversarial verification using independent review stages. A battle-tested workflow from the [thematic-cuts](https://github.com/movito/thematic-cuts) project that achieved 96.9% test pass rate improvement.
@@ -51,6 +55,31 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
51
55
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
52
56
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
53
57
 
58
+ ## What's New in v0.6.0
59
+
60
+ 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
61
+
62
+ ```bash
63
+ # Create a custom evaluator
64
+ mkdir -p .adversarial/evaluators
65
+ cat > .adversarial/evaluators/athena.yml << 'EOF'
66
+ name: athena
67
+ description: Knowledge evaluation using Gemini 2.5 Pro
68
+ model: gemini-2.5-pro
69
+ api_key_env: GEMINI_API_KEY
70
+ prompt: |
71
+ You are Athena, a knowledge evaluation specialist...
72
+ EOF
73
+
74
+ # Use it immediately
75
+ adversarial athena docs/research-plan.md
76
+
77
+ # List all available evaluators
78
+ adversarial list-evaluators
79
+ ```
80
+
81
+ See [Custom Evaluators](#custom-evaluators) for full documentation, or check the [CHANGELOG](CHANGELOG.md) for complete release history.
82
+
54
83
  ## Prerequisites
55
84
 
56
85
  Before installing, ensure you have:
@@ -856,12 +885,13 @@ From the [thematic-cuts](https://github.com/movito/thematic-cuts) project:
856
885
 
857
886
  ## Documentation
858
887
 
859
- - **Interaction Patterns**: How Author-Reviewer collaboration works
888
+ - **[Custom Evaluators Guide](docs/CUSTOM_EVALUATORS.md)**: Create project-specific evaluators
889
+ - **[Integration Guide](docs/INTEGRATION-GUIDE.md)**: Detailed integration strategies
890
+ - **[CHANGELOG](CHANGELOG.md)**: Release history and version notes
891
+ - **Interaction Patterns**: How Author-Evaluator collaboration works
860
892
  - **Token Optimization**: Detailed Aider configuration guide
861
893
  - **Workflow Phases**: Step-by-step guide for each phase
862
894
  - **Troubleshooting**: Common issues and solutions
863
- - **Examples**: Real integration scenarios
864
- - **Terminology**: Official standards for Author/Reviewer concepts
865
895
 
866
896
  See `docs/` directory for comprehensive guides.
867
897
 
@@ -1,5 +1,9 @@
1
1
  # Adversarial Workflow
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/adversarial-workflow.svg)](https://pypi.org/project/adversarial-workflow/)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
3
7
  **A multi-stage AI code review system that makes your code better**
4
8
 
5
9
  Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to implement but not delivering) through adversarial verification using independent review stages. A battle-tested workflow from the [thematic-cuts](https://github.com/movito/thematic-cuts) project that achieved 96.9% test pass rate improvement.
@@ -16,6 +20,31 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
16
20
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
17
21
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
18
22
 
23
+ ## What's New in v0.6.0
24
+
25
+ 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
26
+
27
+ ```bash
28
+ # Create a custom evaluator
29
+ mkdir -p .adversarial/evaluators
30
+ cat > .adversarial/evaluators/athena.yml << 'EOF'
31
+ name: athena
32
+ description: Knowledge evaluation using Gemini 2.5 Pro
33
+ model: gemini-2.5-pro
34
+ api_key_env: GEMINI_API_KEY
35
+ prompt: |
36
+ You are Athena, a knowledge evaluation specialist...
37
+ EOF
38
+
39
+ # Use it immediately
40
+ adversarial athena docs/research-plan.md
41
+
42
+ # List all available evaluators
43
+ adversarial list-evaluators
44
+ ```
45
+
46
+ See [Custom Evaluators](#custom-evaluators) for full documentation, or check the [CHANGELOG](CHANGELOG.md) for complete release history.
47
+
19
48
  ## Prerequisites
20
49
 
21
50
  Before installing, ensure you have:
@@ -821,12 +850,13 @@ From the [thematic-cuts](https://github.com/movito/thematic-cuts) project:
821
850
 
822
851
  ## Documentation
823
852
 
824
- - **Interaction Patterns**: How Author-Reviewer collaboration works
853
+ - **[Custom Evaluators Guide](docs/CUSTOM_EVALUATORS.md)**: Create project-specific evaluators
854
+ - **[Integration Guide](docs/INTEGRATION-GUIDE.md)**: Detailed integration strategies
855
+ - **[CHANGELOG](CHANGELOG.md)**: Release history and version notes
856
+ - **Interaction Patterns**: How Author-Evaluator collaboration works
825
857
  - **Token Optimization**: Detailed Aider configuration guide
826
858
  - **Workflow Phases**: Step-by-step guide for each phase
827
859
  - **Troubleshooting**: Common issues and solutions
828
- - **Examples**: Real integration scenarios
829
- - **Terminology**: Official standards for Author/Reviewer concepts
830
860
 
831
861
  See `docs/` directory for comprehensive guides.
832
862
 
@@ -12,7 +12,7 @@ Usage:
12
12
  adversarial validate "pytest"
13
13
  """
14
14
 
15
- __version__ = "0.6.0"
15
+ __version__ = "0.6.2"
16
16
  __author__ = "Fredrik Matheson"
17
17
  __license__ = "MIT"
18
18
 
@@ -27,9 +27,9 @@ from pathlib import Path
27
27
  from typing import Dict, List, Optional, Tuple
28
28
 
29
29
  import yaml
30
- from dotenv import load_dotenv
30
+ from dotenv import dotenv_values, load_dotenv
31
31
 
32
- __version__ = "0.6.0"
32
+ __version__ = "0.6.2"
33
33
 
34
34
  # ANSI color codes for better output
35
35
  RESET = "\033[0m"
@@ -800,26 +800,36 @@ def check() -> int:
800
800
  issues: List[Dict] = []
801
801
  good_checks: List[str] = []
802
802
 
803
- # Check for .env file first (before loading environment variables)
803
+ # Check for .env file (note: already loaded by main() at startup)
804
804
  env_file = Path(".env")
805
805
  env_loaded = False
806
- env_keys_before = set(os.environ.keys())
807
806
 
808
807
  if env_file.exists():
809
808
  try:
809
+ # Count variables by reading file directly (works even if already loaded)
810
+ env_vars = dotenv_values(env_file)
811
+ var_count = len([k for k, v in env_vars.items() if v is not None])
812
+
813
+ # Still load to ensure environment is set
810
814
  load_dotenv(env_file)
811
- env_keys_after = set(os.environ.keys())
812
- new_keys = env_keys_after - env_keys_before
813
815
  env_loaded = True
814
- good_checks.append(
815
- f".env file found and loaded ({len(new_keys)} variables)"
816
+ good_checks.append(f".env file found and loaded ({var_count} variables)")
817
+ except (FileNotFoundError, PermissionError) as e:
818
+ # File access errors
819
+ issues.append(
820
+ {
821
+ "severity": "WARNING",
822
+ "message": f".env file found but could not be read: {e}",
823
+ "fix": "Check .env file permissions",
824
+ }
816
825
  )
817
- except Exception as e:
826
+ except (OSError, ValueError) as e:
827
+ # Covers UnicodeDecodeError (ValueError subclass) and other OS errors
818
828
  issues.append(
819
829
  {
820
830
  "severity": "WARNING",
821
- "message": f".env file found but could not be loaded: {e}",
822
- "fix": "Check .env file format and permissions",
831
+ "message": f".env file found but could not be parsed: {e}",
832
+ "fix": "Check .env file encoding (should be UTF-8)",
823
833
  }
824
834
  )
825
835
  else:
@@ -2086,10 +2096,6 @@ def evaluate(task_file: str) -> int:
2086
2096
  return 0
2087
2097
 
2088
2098
 
2089
-
2090
-
2091
-
2092
-
2093
2099
  def review() -> int:
2094
2100
  """Run Phase 3: Code review."""
2095
2101
 
@@ -2728,54 +2734,61 @@ def agent_onboard(project_path: str = ".") -> int:
2728
2734
  return 0
2729
2735
 
2730
2736
 
2731
- def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_run: bool = False):
2737
+ def split(
2738
+ task_file: str,
2739
+ strategy: str = "sections",
2740
+ max_lines: int = 500,
2741
+ dry_run: bool = False,
2742
+ ):
2732
2743
  """Split large task files into smaller evaluable chunks.
2733
-
2744
+
2734
2745
  Args:
2735
2746
  task_file: Path to the task file to split
2736
2747
  strategy: Split strategy ('sections', 'phases', or 'manual')
2737
2748
  max_lines: Maximum lines per split (default: 500)
2738
2749
  dry_run: Preview splits without creating files
2739
-
2750
+
2740
2751
  Returns:
2741
2752
  Exit code (0 for success, 1 for error)
2742
2753
  """
2743
2754
  from .utils.file_splitter import (
2744
- analyze_task_file,
2745
- split_by_sections,
2746
- split_by_phases,
2747
- generate_split_files
2755
+ analyze_task_file,
2756
+ generate_split_files,
2757
+ split_by_phases,
2758
+ split_by_sections,
2748
2759
  )
2749
-
2760
+
2750
2761
  try:
2751
2762
  print_box("File Splitting Utility", CYAN)
2752
-
2763
+
2753
2764
  # Validate file exists
2754
2765
  if not os.path.exists(task_file):
2755
2766
  print(f"{RED}Error: File not found: {task_file}{RESET}")
2756
2767
  return 1
2757
-
2768
+
2758
2769
  # Analyze file
2759
2770
  print(f"📄 Analyzing task file: {task_file}")
2760
2771
  analysis = analyze_task_file(task_file)
2761
-
2762
- lines = analysis['total_lines']
2763
- tokens = analysis['estimated_tokens']
2772
+
2773
+ lines = analysis["total_lines"]
2774
+ tokens = analysis["estimated_tokens"]
2764
2775
  print(f" Lines: {lines}")
2765
2776
  print(f" Estimated tokens: ~{tokens:,}")
2766
-
2777
+
2767
2778
  # Check if splitting is recommended
2768
2779
  if lines <= max_lines:
2769
- print(f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}")
2780
+ print(
2781
+ f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
2782
+ )
2770
2783
  print("No splitting needed.")
2771
2784
  return 0
2772
-
2785
+
2773
2786
  print(f"{YELLOW}⚠️ File exceeds recommended limit ({max_lines} lines){RESET}")
2774
-
2787
+
2775
2788
  # Read file content for splitting
2776
- with open(task_file, 'r', encoding='utf-8') as f:
2789
+ with open(task_file, "r", encoding="utf-8") as f:
2777
2790
  content = f.read()
2778
-
2791
+
2779
2792
  # Apply split strategy
2780
2793
  if strategy == "sections":
2781
2794
  splits = split_by_sections(content, max_lines=max_lines)
@@ -2784,42 +2797,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
2784
2797
  splits = split_by_phases(content)
2785
2798
  print(f"\n💡 Suggested splits (by phases):")
2786
2799
  else:
2787
- print(f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}")
2800
+ print(
2801
+ f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
2802
+ )
2788
2803
  return 1
2789
-
2804
+
2790
2805
  # Display split preview
2791
2806
  for i, split in enumerate(splits, 1):
2792
2807
  filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
2793
2808
  print(f" - {filename} ({split['line_count']} lines)")
2794
-
2809
+
2795
2810
  # Dry run mode
2796
2811
  if dry_run:
2797
2812
  print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
2798
2813
  return 0
2799
-
2814
+
2800
2815
  # Prompt user for confirmation
2801
2816
  create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
2802
-
2803
- if create_files.lower() in ['y', 'yes']:
2817
+
2818
+ if create_files.lower() in ["y", "yes"]:
2804
2819
  # Create output directory
2805
2820
  output_dir = os.path.join(os.path.dirname(task_file), "splits")
2806
-
2821
+
2807
2822
  # Generate split files
2808
2823
  created_files = generate_split_files(task_file, splits, output_dir)
2809
-
2824
+
2810
2825
  print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
2811
2826
  for file_path in created_files:
2812
2827
  print(f" {file_path}")
2813
-
2828
+
2814
2829
  print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
2815
2830
  for file_path in created_files:
2816
2831
  rel_path = os.path.relpath(file_path)
2817
2832
  print(f" adversarial evaluate {rel_path}")
2818
2833
  else:
2819
2834
  print("Cancelled - no files created.")
2820
-
2835
+
2821
2836
  return 0
2822
-
2837
+
2823
2838
  except Exception as e:
2824
2839
  print(f"{RED}Error during file splitting: {e}{RESET}")
2825
2840
  return 1
@@ -2865,14 +2880,33 @@ def list_evaluators() -> int:
2865
2880
 
2866
2881
  return 0
2867
2882
 
2883
+
2868
2884
  def main():
2869
2885
  """Main CLI entry point."""
2870
2886
  import logging
2887
+ import sys
2888
+
2889
+ # Load .env file before any commands run
2890
+ # Wrapped in try/except so CLI remains usable even with malformed .env
2891
+ try:
2892
+ load_dotenv()
2893
+ except Exception as e:
2894
+ print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
2895
+
2896
+ # Load .env file before any commands run
2897
+ # Use explicit path to ensure we find .env in current working directory
2898
+ # (load_dotenv() without args can fail to find .env in some contexts)
2899
+ env_file = Path.cwd() / ".env"
2900
+ if env_file.exists():
2901
+ try:
2902
+ load_dotenv(env_file)
2903
+ except (OSError, UnicodeDecodeError) as e:
2904
+ print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
2871
2905
 
2872
2906
  from adversarial_workflow.evaluators import (
2907
+ BUILTIN_EVALUATORS,
2873
2908
  get_all_evaluators,
2874
2909
  run_evaluator,
2875
- BUILTIN_EVALUATORS,
2876
2910
  )
2877
2911
 
2878
2912
  logger = logging.getLogger(__name__)
@@ -2880,8 +2914,16 @@ def main():
2880
2914
  # Commands that cannot be overridden by evaluators
2881
2915
  # Note: 'review' is special - it reviews git changes without a file argument
2882
2916
  STATIC_COMMANDS = {
2883
- "init", "check", "doctor", "health", "quickstart",
2884
- "agent", "split", "validate", "review", "list-evaluators"
2917
+ "init",
2918
+ "check",
2919
+ "doctor",
2920
+ "health",
2921
+ "quickstart",
2922
+ "agent",
2923
+ "split",
2924
+ "validate",
2925
+ "review",
2926
+ "list-evaluators",
2885
2927
  }
2886
2928
 
2887
2929
  parser = argparse.ArgumentParser(
@@ -2970,16 +3012,21 @@ For more information: https://github.com/movito/adversarial-workflow
2970
3012
  )
2971
3013
  split_parser.add_argument("task_file", help="Task file to split")
2972
3014
  split_parser.add_argument(
2973
- "--strategy", "-s", choices=["sections", "phases"], default="sections",
2974
- help="Split strategy: 'sections' (default) or 'phases'"
3015
+ "--strategy",
3016
+ "-s",
3017
+ choices=["sections", "phases"],
3018
+ default="sections",
3019
+ help="Split strategy: 'sections' (default) or 'phases'",
2975
3020
  )
2976
3021
  split_parser.add_argument(
2977
- "--max-lines", "-m", type=int, default=500,
2978
- help="Maximum lines per split (default: 500)"
3022
+ "--max-lines",
3023
+ "-m",
3024
+ type=int,
3025
+ default=500,
3026
+ help="Maximum lines per split (default: 500)",
2979
3027
  )
2980
3028
  split_parser.add_argument(
2981
- "--dry-run", action="store_true",
2982
- help="Preview splits without creating files"
3029
+ "--dry-run", action="store_true", help="Preview splits without creating files"
2983
3030
  )
2984
3031
 
2985
3032
  # list-evaluators command
@@ -3000,7 +3047,12 @@ For more information: https://github.com/movito/adversarial-workflow
3000
3047
  for name, config in evaluators.items():
3001
3048
  # Skip if name conflicts with static command
3002
3049
  if name in STATIC_COMMANDS:
3003
- logger.warning("Evaluator '%s' conflicts with CLI command; skipping", name)
3050
+ # Only warn for user-defined evaluators, not built-ins
3051
+ # Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
3052
+ if getattr(config, "source", None) != "builtin":
3053
+ logger.warning(
3054
+ "Evaluator '%s' conflicts with CLI command; skipping", name
3055
+ )
3004
3056
  # Mark as registered to prevent alias re-registration attempts
3005
3057
  registered_configs.add(id(config))
3006
3058
  continue
@@ -3027,10 +3079,11 @@ For more information: https://github.com/movito/adversarial-workflow
3027
3079
  )
3028
3080
  eval_parser.add_argument("file", help="File to evaluate")
3029
3081
  eval_parser.add_argument(
3030
- "--timeout", "-t",
3082
+ "--timeout",
3083
+ "-t",
3031
3084
  type=int,
3032
3085
  default=180,
3033
- help="Timeout in seconds (default: 180)"
3086
+ help="Timeout in seconds (default: 180)",
3034
3087
  )
3035
3088
  # Store config for later execution
3036
3089
  eval_parser.set_defaults(evaluator_config=config)
@@ -3078,7 +3131,7 @@ For more information: https://github.com/movito/adversarial-workflow
3078
3131
  args.task_file,
3079
3132
  strategy=args.strategy,
3080
3133
  max_lines=args.max_lines,
3081
- dry_run=args.dry_run
3134
+ dry_run=args.dry_run,
3082
3135
  )
3083
3136
  elif args.command == "list-evaluators":
3084
3137
  return list_evaluators()
@@ -1,13 +1,13 @@
1
1
  """Evaluators module for adversarial-workflow plugin architecture."""
2
2
 
3
+ from .builtins import BUILTIN_EVALUATORS
3
4
  from .config import EvaluatorConfig
4
5
  from .discovery import (
6
+ EvaluatorParseError,
5
7
  discover_local_evaluators,
6
8
  parse_evaluator_yaml,
7
- EvaluatorParseError,
8
9
  )
9
10
  from .runner import run_evaluator
10
- from .builtins import BUILTIN_EVALUATORS
11
11
 
12
12
 
13
13
  def get_all_evaluators() -> dict[str, EvaluatorConfig]:
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
17
17
  Aliases from local evaluators are also included in the returned dictionary.
18
18
  """
19
19
  import logging
20
+
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
  evaluators: dict[str, EvaluatorConfig] = {}
@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
40
40
  try:
41
41
  content = yml_file.read_text(encoding="utf-8")
42
42
  except UnicodeDecodeError as e:
43
- raise EvaluatorParseError(
44
- f"File encoding error (not UTF-8): {yml_file}"
45
- ) from e
43
+ raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
46
44
 
47
45
  # Parse YAML
48
46
  data = yaml.safe_load(content)
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
58
56
  )
59
57
 
60
58
  # Validate required fields exist
61
- required = ["name", "description", "model", "api_key_env", "prompt", "output_suffix"]
59
+ required = [
60
+ "name",
61
+ "description",
62
+ "model",
63
+ "api_key_env",
64
+ "prompt",
65
+ "output_suffix",
66
+ ]
62
67
  missing = [f for f in required if f not in data]
63
68
  if missing:
64
69
  raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
@@ -10,10 +10,10 @@ import tempfile
10
10
  from datetime import datetime, timezone
11
11
  from pathlib import Path
12
12
 
13
- from .config import EvaluatorConfig
14
- from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
13
+ from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
15
14
  from ..utils.config import load_config
16
15
  from ..utils.validation import validate_evaluation_output
16
+ from .config import EvaluatorConfig
17
17
 
18
18
 
19
19
  def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
124
124
  """
125
125
 
126
126
  # Create temp file for prompt
127
- with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
127
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
128
128
  f.write(full_prompt)
129
129
  prompt_file = f.name
130
130
 
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
136
136
  # Build aider command
137
137
  cmd = [
138
138
  "aider",
139
- "--model", config.model,
139
+ "--model",
140
+ config.model,
140
141
  "--yes",
141
142
  "--no-git",
142
143
  "--no-auto-commits",
143
- "--message-file", prompt_file,
144
- "--read", file_path,
144
+ "--message-file",
145
+ prompt_file,
146
+ "--read",
147
+ file_path,
145
148
  ]
146
149
 
147
150
  result = subprocess.run(
@@ -224,7 +227,10 @@ def _execute_script(
224
227
 
225
228
  # Validate output
226
229
  file_basename = Path(file_path).stem
227
- log_file = Path(project_config["log_directory"]) / f"{file_basename}-{config.output_suffix}.md"
230
+ log_file = (
231
+ Path(project_config["log_directory"])
232
+ / f"{file_basename}-{config.output_suffix}.md"
233
+ )
228
234
 
229
235
  is_valid, verdict, message = validate_evaluation_output(str(log_file))
230
236
 
@@ -235,7 +241,9 @@ def _execute_script(
235
241
  return _report_verdict(verdict, log_file, config)
236
242
 
237
243
 
238
- def _report_verdict(verdict: str | None, log_file: Path, config: EvaluatorConfig) -> int:
244
+ def _report_verdict(
245
+ verdict: str | None, log_file: Path, config: EvaluatorConfig
246
+ ) -> int:
239
247
  """Report the evaluation verdict to terminal."""
240
248
  print()
241
249
  if verdict == "APPROVED":