adversarial-workflow 0.6.1__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/PKG-INFO +25 -3
  2. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/README.md +24 -2
  3. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__init__.py +1 -1
  4. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__main__.py +1 -0
  5. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/cli.py +129 -65
  6. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/__init__.py +3 -2
  7. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py +2 -0
  8. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/discovery.py +39 -4
  9. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py +16 -8
  10. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/file_splitter.py +218 -184
  11. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/validation.py +3 -1
  12. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/PKG-INFO +25 -3
  13. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/SOURCES.txt +2 -0
  14. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/pyproject.toml +3 -1
  15. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_cli.py +24 -69
  16. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_cli_dynamic_commands.py +154 -200
  17. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_config.py +55 -44
  18. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_env_loading.py +51 -89
  19. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluate.py +188 -129
  20. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_discovery.py +206 -1
  21. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_runner.py +18 -5
  22. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_file_splitter.py +106 -103
  23. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_list_evaluators.py +28 -45
  24. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_python_version.py +17 -16
  25. adversarial_workflow-0.6.3/tests/test_scripts_project.py +120 -0
  26. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_split_command.py +45 -37
  27. adversarial_workflow-0.6.3/tests/test_timeout_integration.py +406 -0
  28. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_utils_validation.py +26 -10
  29. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/LICENSE +0 -0
  30. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/builtins.py +0 -0
  31. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
  32. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.env.example.template +0 -0
  33. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/README.template +0 -0
  34. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
  35. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
  36. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
  37. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
  38. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
  39. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/config.yml.template +0 -0
  40. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
  41. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/example-task.md.template +0 -0
  42. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
  43. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
  44. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
  45. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/__init__.py +0 -0
  46. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/colors.py +0 -0
  47. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/config.py +0 -0
  48. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
  49. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/entry_points.txt +0 -0
  50. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/requires.txt +0 -0
  51. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/top_level.txt +0 -0
  52. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/setup.cfg +0 -0
  53. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/setup.py +0 -0
  54. {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
55
55
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
56
56
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
57
57
 
58
- ## What's New in v0.6.0
58
+ ## What's New in v0.6.3
59
59
 
60
- 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
60
+ ### Upgrade
61
+
62
+ ```bash
63
+ pip install --upgrade adversarial-workflow
64
+ ```
65
+
66
+ ### v0.6.3 - Configurable Timeouts
67
+
68
+ - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
69
+ - **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
70
+ - **Timeout logging**: See which timeout source is used (CLI/YAML/default)
71
+ - **Safety limits**: Maximum 600 seconds to prevent runaway processes
72
+
73
+ ### v0.6.2 - .env Loading & Stability
74
+
75
+ - **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
76
+ - **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
77
+ - **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
78
+
79
+ ### v0.6.0 - Plugin Architecture
80
+
81
+ 🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
61
82
 
62
83
  ```bash
63
84
  # Create a custom evaluator
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
459
480
  | `aliases` | No | Alternative command names |
460
481
  | `log_prefix` | No | CLI output prefix |
461
482
  | `fallback_model` | No | Fallback model if primary fails |
483
+ | `timeout` | No | Timeout in seconds (default: 180, max: 600) |
462
484
  | `version` | No | Evaluator version (default: 1.0.0) |
463
485
 
464
486
  ### Listing Available Evaluators
@@ -20,9 +20,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
20
20
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
21
21
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
22
22
 
23
- ## What's New in v0.6.0
23
+ ## What's New in v0.6.3
24
24
 
25
- 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
25
+ ### Upgrade
26
+
27
+ ```bash
28
+ pip install --upgrade adversarial-workflow
29
+ ```
30
+
31
+ ### v0.6.3 - Configurable Timeouts
32
+
33
+ - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
34
+ - **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
35
+ - **Timeout logging**: See which timeout source is used (CLI/YAML/default)
36
+ - **Safety limits**: Maximum 600 seconds to prevent runaway processes
37
+
38
+ ### v0.6.2 - .env Loading & Stability
39
+
40
+ - **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
41
+ - **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
42
+ - **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
43
+
44
+ ### v0.6.0 - Plugin Architecture
45
+
46
+ 🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
26
47
 
27
48
  ```bash
28
49
  # Create a custom evaluator
@@ -424,6 +445,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
424
445
  | `aliases` | No | Alternative command names |
425
446
  | `log_prefix` | No | CLI output prefix |
426
447
  | `fallback_model` | No | Fallback model if primary fails |
448
+ | `timeout` | No | Timeout in seconds (default: 180, max: 600) |
427
449
  | `version` | No | Evaluator version (default: 1.0.0) |
428
450
 
429
451
  ### Listing Available Evaluators
@@ -12,7 +12,7 @@ Usage:
12
12
  adversarial validate "pytest"
13
13
  """
14
14
 
15
- __version__ = "0.6.1"
15
+ __version__ = "0.6.2"
16
16
  __author__ = "Fredrik Matheson"
17
17
  __license__ = "MIT"
18
18
 
@@ -1,4 +1,5 @@
1
1
  """Allow execution via python -m adversarial_workflow."""
2
+
2
3
  from .cli import main
3
4
 
4
5
  if __name__ == "__main__":
@@ -27,9 +27,9 @@ from pathlib import Path
27
27
  from typing import Dict, List, Optional, Tuple
28
28
 
29
29
  import yaml
30
- from dotenv import load_dotenv, dotenv_values
30
+ from dotenv import dotenv_values, load_dotenv
31
31
 
32
- __version__ = "0.6.1"
32
+ __version__ = "0.6.2"
33
33
 
34
34
  # ANSI color codes for better output
35
35
  RESET = "\033[0m"
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
322
322
  f"{GREEN}✅ Setup Complete!{RESET}",
323
323
  [
324
324
  "Created:",
325
- " ✓ .env (with your API keys - added to .gitignore)"
326
- if (anthropic_key or openai_key)
327
- else " ⚠️ .env (skipped - no API keys provided)",
325
+ (
326
+ " ✓ .env (with your API keys - added to .gitignore)"
327
+ if (anthropic_key or openai_key)
328
+ else " ⚠️ .env (skipped - no API keys provided)"
329
+ ),
328
330
  " ✓ .adversarial/config.yml",
329
331
  " ✓ .adversarial/scripts/ (3 workflow scripts)",
330
332
  " ✓ .aider.conf.yml (aider configuration)",
331
333
  "",
332
- "Your configuration:"
333
- if (anthropic_key or openai_key)
334
- else "Configuration (no API keys yet):",
334
+ (
335
+ "Your configuration:"
336
+ if (anthropic_key or openai_key)
337
+ else "Configuration (no API keys yet):"
338
+ ),
335
339
  f" Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
336
340
  f" Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
337
341
  f" Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
@@ -806,15 +810,14 @@ def check() -> int:
806
810
 
807
811
  if env_file.exists():
808
812
  try:
809
- # Load .env into environment (idempotent - safe to call again after main())
810
- load_dotenv(env_file)
811
- # Use dotenv_values() to count variables directly from file
812
- # This gives accurate count regardless of what was already in environment
813
+ # Count variables by reading file directly (works even if already loaded)
813
814
  env_vars = dotenv_values(env_file)
815
+ var_count = len([k for k, v in env_vars.items() if v is not None])
816
+
817
+ # Still load to ensure environment is set
818
+ load_dotenv(env_file)
814
819
  env_loaded = True
815
- good_checks.append(
816
- f".env file found ({len(env_vars)} variables configured)"
817
- )
820
+ good_checks.append(f".env file found and loaded ({var_count} variables)")
818
821
  except (FileNotFoundError, PermissionError) as e:
819
822
  # File access errors
820
823
  issues.append(
@@ -2097,10 +2100,6 @@ def evaluate(task_file: str) -> int:
2097
2100
  return 0
2098
2101
 
2099
2102
 
2100
-
2101
-
2102
-
2103
-
2104
2103
  def review() -> int:
2105
2104
  """Run Phase 3: Code review."""
2106
2105
 
@@ -2289,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
2289
2288
  )
2290
2289
  return None
2291
2290
  else:
2292
- print(f"{RED}❌ ERROR: {template_type} template not found in package{RESET}")
2291
+ print(
2292
+ f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
2293
+ )
2293
2294
  return None
2294
2295
 
2295
2296
  elif template_type == "custom" and url:
@@ -2739,54 +2740,61 @@ def agent_onboard(project_path: str = ".") -> int:
2739
2740
  return 0
2740
2741
 
2741
2742
 
2742
- def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_run: bool = False):
2743
+ def split(
2744
+ task_file: str,
2745
+ strategy: str = "sections",
2746
+ max_lines: int = 500,
2747
+ dry_run: bool = False,
2748
+ ):
2743
2749
  """Split large task files into smaller evaluable chunks.
2744
-
2750
+
2745
2751
  Args:
2746
2752
  task_file: Path to the task file to split
2747
2753
  strategy: Split strategy ('sections', 'phases', or 'manual')
2748
2754
  max_lines: Maximum lines per split (default: 500)
2749
2755
  dry_run: Preview splits without creating files
2750
-
2756
+
2751
2757
  Returns:
2752
2758
  Exit code (0 for success, 1 for error)
2753
2759
  """
2754
2760
  from .utils.file_splitter import (
2755
- analyze_task_file,
2756
- split_by_sections,
2757
- split_by_phases,
2758
- generate_split_files
2761
+ analyze_task_file,
2762
+ generate_split_files,
2763
+ split_by_phases,
2764
+ split_by_sections,
2759
2765
  )
2760
-
2766
+
2761
2767
  try:
2762
2768
  print_box("File Splitting Utility", CYAN)
2763
-
2769
+
2764
2770
  # Validate file exists
2765
2771
  if not os.path.exists(task_file):
2766
2772
  print(f"{RED}Error: File not found: {task_file}{RESET}")
2767
2773
  return 1
2768
-
2774
+
2769
2775
  # Analyze file
2770
2776
  print(f"📄 Analyzing task file: {task_file}")
2771
2777
  analysis = analyze_task_file(task_file)
2772
-
2773
- lines = analysis['total_lines']
2774
- tokens = analysis['estimated_tokens']
2778
+
2779
+ lines = analysis["total_lines"]
2780
+ tokens = analysis["estimated_tokens"]
2775
2781
  print(f" Lines: {lines}")
2776
2782
  print(f" Estimated tokens: ~{tokens:,}")
2777
-
2783
+
2778
2784
  # Check if splitting is recommended
2779
2785
  if lines <= max_lines:
2780
- print(f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}")
2786
+ print(
2787
+ f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
2788
+ )
2781
2789
  print("No splitting needed.")
2782
2790
  return 0
2783
-
2791
+
2784
2792
  print(f"{YELLOW}⚠️ File exceeds recommended limit ({max_lines} lines){RESET}")
2785
-
2793
+
2786
2794
  # Read file content for splitting
2787
- with open(task_file, 'r', encoding='utf-8') as f:
2795
+ with open(task_file, "r", encoding="utf-8") as f:
2788
2796
  content = f.read()
2789
-
2797
+
2790
2798
  # Apply split strategy
2791
2799
  if strategy == "sections":
2792
2800
  splits = split_by_sections(content, max_lines=max_lines)
@@ -2795,42 +2803,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
2795
2803
  splits = split_by_phases(content)
2796
2804
  print(f"\n💡 Suggested splits (by phases):")
2797
2805
  else:
2798
- print(f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}")
2806
+ print(
2807
+ f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
2808
+ )
2799
2809
  return 1
2800
-
2810
+
2801
2811
  # Display split preview
2802
2812
  for i, split in enumerate(splits, 1):
2803
2813
  filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
2804
2814
  print(f" - {filename} ({split['line_count']} lines)")
2805
-
2815
+
2806
2816
  # Dry run mode
2807
2817
  if dry_run:
2808
2818
  print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
2809
2819
  return 0
2810
-
2820
+
2811
2821
  # Prompt user for confirmation
2812
2822
  create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
2813
-
2814
- if create_files.lower() in ['y', 'yes']:
2823
+
2824
+ if create_files.lower() in ["y", "yes"]:
2815
2825
  # Create output directory
2816
2826
  output_dir = os.path.join(os.path.dirname(task_file), "splits")
2817
-
2827
+
2818
2828
  # Generate split files
2819
2829
  created_files = generate_split_files(task_file, splits, output_dir)
2820
-
2830
+
2821
2831
  print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
2822
2832
  for file_path in created_files:
2823
2833
  print(f" {file_path}")
2824
-
2834
+
2825
2835
  print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
2826
2836
  for file_path in created_files:
2827
2837
  rel_path = os.path.relpath(file_path)
2828
2838
  print(f" adversarial evaluate {rel_path}")
2829
2839
  else:
2830
2840
  print("Cancelled - no files created.")
2831
-
2841
+
2832
2842
  return 0
2833
-
2843
+
2834
2844
  except Exception as e:
2835
2845
  print(f"{RED}Error during file splitting: {e}{RESET}")
2836
2846
  return 1
@@ -2876,6 +2886,7 @@ def list_evaluators() -> int:
2876
2886
 
2877
2887
  return 0
2878
2888
 
2889
+
2879
2890
  def main():
2880
2891
  """Main CLI entry point."""
2881
2892
  import logging
@@ -2888,10 +2899,20 @@ def main():
2888
2899
  except Exception as e:
2889
2900
  print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
2890
2901
 
2902
+ # Load .env file before any commands run
2903
+ # Use explicit path to ensure we find .env in current working directory
2904
+ # (load_dotenv() without args can fail to find .env in some contexts)
2905
+ env_file = Path.cwd() / ".env"
2906
+ if env_file.exists():
2907
+ try:
2908
+ load_dotenv(env_file)
2909
+ except (OSError, UnicodeDecodeError) as e:
2910
+ print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
2911
+
2891
2912
  from adversarial_workflow.evaluators import (
2913
+ BUILTIN_EVALUATORS,
2892
2914
  get_all_evaluators,
2893
2915
  run_evaluator,
2894
- BUILTIN_EVALUATORS,
2895
2916
  )
2896
2917
 
2897
2918
  logger = logging.getLogger(__name__)
@@ -2899,8 +2920,16 @@ def main():
2899
2920
  # Commands that cannot be overridden by evaluators
2900
2921
  # Note: 'review' is special - it reviews git changes without a file argument
2901
2922
  STATIC_COMMANDS = {
2902
- "init", "check", "doctor", "health", "quickstart",
2903
- "agent", "split", "validate", "review", "list-evaluators"
2923
+ "init",
2924
+ "check",
2925
+ "doctor",
2926
+ "health",
2927
+ "quickstart",
2928
+ "agent",
2929
+ "split",
2930
+ "validate",
2931
+ "review",
2932
+ "list-evaluators",
2904
2933
  }
2905
2934
 
2906
2935
  parser = argparse.ArgumentParser(
@@ -2989,16 +3018,21 @@ For more information: https://github.com/movito/adversarial-workflow
2989
3018
  )
2990
3019
  split_parser.add_argument("task_file", help="Task file to split")
2991
3020
  split_parser.add_argument(
2992
- "--strategy", "-s", choices=["sections", "phases"], default="sections",
2993
- help="Split strategy: 'sections' (default) or 'phases'"
3021
+ "--strategy",
3022
+ "-s",
3023
+ choices=["sections", "phases"],
3024
+ default="sections",
3025
+ help="Split strategy: 'sections' (default) or 'phases'",
2994
3026
  )
2995
3027
  split_parser.add_argument(
2996
- "--max-lines", "-m", type=int, default=500,
2997
- help="Maximum lines per split (default: 500)"
3028
+ "--max-lines",
3029
+ "-m",
3030
+ type=int,
3031
+ default=500,
3032
+ help="Maximum lines per split (default: 500)",
2998
3033
  )
2999
3034
  split_parser.add_argument(
3000
- "--dry-run", action="store_true",
3001
- help="Preview splits without creating files"
3035
+ "--dry-run", action="store_true", help="Preview splits without creating files"
3002
3036
  )
3003
3037
 
3004
3038
  # list-evaluators command
@@ -3019,7 +3053,12 @@ For more information: https://github.com/movito/adversarial-workflow
3019
3053
  for name, config in evaluators.items():
3020
3054
  # Skip if name conflicts with static command
3021
3055
  if name in STATIC_COMMANDS:
3022
- logger.warning("Evaluator '%s' conflicts with CLI command; skipping", name)
3056
+ # Only warn for user-defined evaluators, not built-ins
3057
+ # Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
3058
+ if getattr(config, "source", None) != "builtin":
3059
+ logger.warning(
3060
+ "Evaluator '%s' conflicts with CLI command; skipping", name
3061
+ )
3023
3062
  # Mark as registered to prevent alias re-registration attempts
3024
3063
  registered_configs.add(id(config))
3025
3064
  continue
@@ -3046,10 +3085,11 @@ For more information: https://github.com/movito/adversarial-workflow
3046
3085
  )
3047
3086
  eval_parser.add_argument("file", help="File to evaluate")
3048
3087
  eval_parser.add_argument(
3049
- "--timeout", "-t",
3088
+ "--timeout",
3089
+ "-t",
3050
3090
  type=int,
3051
- default=180,
3052
- help="Timeout in seconds (default: 180)"
3091
+ default=None,
3092
+ help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
3053
3093
  )
3054
3094
  # Store config for later execution
3055
3095
  eval_parser.set_defaults(evaluator_config=config)
@@ -3062,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
3062
3102
 
3063
3103
  # Check for evaluator command first (has evaluator_config attribute)
3064
3104
  if hasattr(args, "evaluator_config"):
3105
+ # Determine timeout: CLI flag > YAML config > default (180s)
3106
+ if args.timeout is not None:
3107
+ timeout = args.timeout
3108
+ source = "CLI override"
3109
+ elif args.evaluator_config.timeout != 180:
3110
+ timeout = args.evaluator_config.timeout
3111
+ source = "evaluator config"
3112
+ else:
3113
+ timeout = args.evaluator_config.timeout # 180 (default)
3114
+ source = "default"
3115
+
3116
+ # Validate CLI timeout (consistent with YAML validation)
3117
+ if timeout <= 0:
3118
+ print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
3119
+ return 1
3120
+ if timeout > 600:
3121
+ print(
3122
+ f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
3123
+ )
3124
+ timeout = 600
3125
+
3126
+ # Log actual timeout and source
3127
+ print(f"Using timeout: {timeout}s ({source})")
3128
+
3065
3129
  return run_evaluator(
3066
3130
  args.evaluator_config,
3067
3131
  args.file,
3068
- timeout=args.timeout,
3132
+ timeout=timeout,
3069
3133
  )
3070
3134
 
3071
3135
  # Execute static commands
@@ -3097,7 +3161,7 @@ For more information: https://github.com/movito/adversarial-workflow
3097
3161
  args.task_file,
3098
3162
  strategy=args.strategy,
3099
3163
  max_lines=args.max_lines,
3100
- dry_run=args.dry_run
3164
+ dry_run=args.dry_run,
3101
3165
  )
3102
3166
  elif args.command == "list-evaluators":
3103
3167
  return list_evaluators()
@@ -1,13 +1,13 @@
1
1
  """Evaluators module for adversarial-workflow plugin architecture."""
2
2
 
3
+ from .builtins import BUILTIN_EVALUATORS
3
4
  from .config import EvaluatorConfig
4
5
  from .discovery import (
6
+ EvaluatorParseError,
5
7
  discover_local_evaluators,
6
8
  parse_evaluator_yaml,
7
- EvaluatorParseError,
8
9
  )
9
10
  from .runner import run_evaluator
10
- from .builtins import BUILTIN_EVALUATORS
11
11
 
12
12
 
13
13
  def get_all_evaluators() -> dict[str, EvaluatorConfig]:
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
17
17
  Aliases from local evaluators are also included in the returned dictionary.
18
18
  """
19
19
  import logging
20
+
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
  evaluators: dict[str, EvaluatorConfig] = {}
@@ -26,6 +26,7 @@ class EvaluatorConfig:
26
26
  fallback_model: Fallback model if primary fails
27
27
  aliases: Alternative command names
28
28
  version: Evaluator version
29
+ timeout: Timeout in seconds (default: 180, max: 600)
29
30
  source: "builtin" or "local" (set internally)
30
31
  config_file: Path to YAML file if local (set internally)
31
32
  """
@@ -43,6 +44,7 @@ class EvaluatorConfig:
43
44
  fallback_model: str | None = None
44
45
  aliases: list[str] = field(default_factory=list)
45
46
  version: str = "1.0.0"
47
+ timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
46
48
 
47
49
  # Metadata (set internally during discovery, not from YAML)
48
50
  source: str = "builtin"
@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
40
40
  try:
41
41
  content = yml_file.read_text(encoding="utf-8")
42
42
  except UnicodeDecodeError as e:
43
- raise EvaluatorParseError(
44
- f"File encoding error (not UTF-8): {yml_file}"
45
- ) from e
43
+ raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
46
44
 
47
45
  # Parse YAML
48
46
  data = yaml.safe_load(content)
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
58
56
  )
59
57
 
60
58
  # Validate required fields exist
61
- required = ["name", "description", "model", "api_key_env", "prompt", "output_suffix"]
59
+ required = [
60
+ "name",
61
+ "description",
62
+ "model",
63
+ "api_key_env",
64
+ "prompt",
65
+ "output_suffix",
66
+ ]
62
67
  missing = [f for f in required if f not in data]
63
68
  if missing:
64
69
  raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
@@ -117,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
117
122
  f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
118
123
  )
119
124
 
125
+ # Validate timeout if present
126
+ if "timeout" in data:
127
+ timeout = data["timeout"]
128
+ # Handle null/empty values
129
+ if timeout is None or timeout == "":
130
+ raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
131
+ # Check for bool before int (bool is subclass of int in Python)
132
+ # YAML parses 'yes'/'true' as True, 'no'/'false' as False
133
+ if isinstance(timeout, bool):
134
+ raise EvaluatorParseError(
135
+ f"Field 'timeout' must be an integer, got bool: {timeout!r}"
136
+ )
137
+ if not isinstance(timeout, int):
138
+ raise EvaluatorParseError(
139
+ f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
140
+ )
141
+ # timeout=0 is invalid (does not disable timeout - use a large value instead)
142
+ if timeout <= 0:
143
+ raise EvaluatorParseError(
144
+ f"Field 'timeout' must be positive (> 0), got {timeout}"
145
+ )
146
+ if timeout > 600:
147
+ logger.warning(
148
+ "Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
149
+ timeout,
150
+ yml_file.name,
151
+ )
152
+ data["timeout"] = 600
153
+
120
154
  # Filter to known fields only (log unknown fields)
121
155
  known_fields = {
122
156
  "name",
@@ -129,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
129
163
  "fallback_model",
130
164
  "aliases",
131
165
  "version",
166
+ "timeout",
132
167
  }
133
168
  unknown = set(data.keys()) - known_fields
134
169
  if unknown:
@@ -10,10 +10,10 @@ import tempfile
10
10
  from datetime import datetime, timezone
11
11
  from pathlib import Path
12
12
 
13
- from .config import EvaluatorConfig
14
- from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
13
+ from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
15
14
  from ..utils.config import load_config
16
15
  from ..utils.validation import validate_evaluation_output
16
+ from .config import EvaluatorConfig
17
17
 
18
18
 
19
19
  def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
124
124
  """
125
125
 
126
126
  # Create temp file for prompt
127
- with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
127
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
128
128
  f.write(full_prompt)
129
129
  prompt_file = f.name
130
130
 
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
136
136
  # Build aider command
137
137
  cmd = [
138
138
  "aider",
139
- "--model", config.model,
139
+ "--model",
140
+ config.model,
140
141
  "--yes",
141
142
  "--no-git",
142
143
  "--no-auto-commits",
143
- "--message-file", prompt_file,
144
- "--read", file_path,
144
+ "--message-file",
145
+ prompt_file,
146
+ "--read",
147
+ file_path,
145
148
  ]
146
149
 
147
150
  result = subprocess.run(
@@ -224,7 +227,10 @@ def _execute_script(
224
227
 
225
228
  # Validate output
226
229
  file_basename = Path(file_path).stem
227
- log_file = Path(project_config["log_directory"]) / f"{file_basename}-{config.output_suffix}.md"
230
+ log_file = (
231
+ Path(project_config["log_directory"])
232
+ / f"{file_basename}-{config.output_suffix}.md"
233
+ )
228
234
 
229
235
  is_valid, verdict, message = validate_evaluation_output(str(log_file))
230
236
 
@@ -235,7 +241,9 @@ def _execute_script(
235
241
  return _report_verdict(verdict, log_file, config)
236
242
 
237
243
 
238
- def _report_verdict(verdict: str | None, log_file: Path, config: EvaluatorConfig) -> int:
244
+ def _report_verdict(
245
+ verdict: str | None, log_file: Path, config: EvaluatorConfig
246
+ ) -> int:
239
247
  """Report the evaluation verdict to terminal."""
240
248
  print()
241
249
  if verdict == "APPROVED":