adversarial-workflow 0.6.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/PKG-INFO +1 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/__init__.py +1 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/cli.py +90 -56
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/__init__.py +3 -2
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/discovery.py +9 -4
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/runner.py +16 -8
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/file_splitter.py +218 -184
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/validation.py +3 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/PKG-INFO +1 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/SOURCES.txt +1 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/pyproject.toml +3 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_cli.py +24 -69
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_cli_dynamic_commands.py +99 -200
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_config.py +49 -44
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_env_loading.py +51 -89
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_evaluate.py +177 -129
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_evaluator_discovery.py +3 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_evaluator_runner.py +18 -5
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_file_splitter.py +105 -103
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_list_evaluators.py +24 -45
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_python_version.py +16 -16
- adversarial_workflow-0.6.2/tests/test_scripts_project.py +120 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_split_command.py +45 -37
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_utils_validation.py +26 -10
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/LICENSE +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/README.md +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/__main__.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/builtins.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/config.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/.env.example.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/README.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/config.yml.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/example-task.md.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/__init__.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/colors.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/utils/config.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/entry_points.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/requires.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow.egg-info/top_level.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/setup.cfg +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/setup.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/tests/test_evaluator_config.py +0 -0
|
@@ -27,9 +27,9 @@ from pathlib import Path
|
|
|
27
27
|
from typing import Dict, List, Optional, Tuple
|
|
28
28
|
|
|
29
29
|
import yaml
|
|
30
|
-
from dotenv import
|
|
30
|
+
from dotenv import dotenv_values, load_dotenv
|
|
31
31
|
|
|
32
|
-
__version__ = "0.6.
|
|
32
|
+
__version__ = "0.6.2"
|
|
33
33
|
|
|
34
34
|
# ANSI color codes for better output
|
|
35
35
|
RESET = "\033[0m"
|
|
@@ -806,15 +806,14 @@ def check() -> int:
|
|
|
806
806
|
|
|
807
807
|
if env_file.exists():
|
|
808
808
|
try:
|
|
809
|
-
#
|
|
810
|
-
load_dotenv(env_file)
|
|
811
|
-
# Use dotenv_values() to count variables directly from file
|
|
812
|
-
# This gives accurate count regardless of what was already in environment
|
|
809
|
+
# Count variables by reading file directly (works even if already loaded)
|
|
813
810
|
env_vars = dotenv_values(env_file)
|
|
811
|
+
var_count = len([k for k, v in env_vars.items() if v is not None])
|
|
812
|
+
|
|
813
|
+
# Still load to ensure environment is set
|
|
814
|
+
load_dotenv(env_file)
|
|
814
815
|
env_loaded = True
|
|
815
|
-
good_checks.append(
|
|
816
|
-
f".env file found ({len(env_vars)} variables configured)"
|
|
817
|
-
)
|
|
816
|
+
good_checks.append(f".env file found and loaded ({var_count} variables)")
|
|
818
817
|
except (FileNotFoundError, PermissionError) as e:
|
|
819
818
|
# File access errors
|
|
820
819
|
issues.append(
|
|
@@ -2097,10 +2096,6 @@ def evaluate(task_file: str) -> int:
|
|
|
2097
2096
|
return 0
|
|
2098
2097
|
|
|
2099
2098
|
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
2099
|
def review() -> int:
|
|
2105
2100
|
"""Run Phase 3: Code review."""
|
|
2106
2101
|
|
|
@@ -2739,54 +2734,61 @@ def agent_onboard(project_path: str = ".") -> int:
|
|
|
2739
2734
|
return 0
|
|
2740
2735
|
|
|
2741
2736
|
|
|
2742
|
-
def split(
|
|
2737
|
+
def split(
|
|
2738
|
+
task_file: str,
|
|
2739
|
+
strategy: str = "sections",
|
|
2740
|
+
max_lines: int = 500,
|
|
2741
|
+
dry_run: bool = False,
|
|
2742
|
+
):
|
|
2743
2743
|
"""Split large task files into smaller evaluable chunks.
|
|
2744
|
-
|
|
2744
|
+
|
|
2745
2745
|
Args:
|
|
2746
2746
|
task_file: Path to the task file to split
|
|
2747
2747
|
strategy: Split strategy ('sections', 'phases', or 'manual')
|
|
2748
2748
|
max_lines: Maximum lines per split (default: 500)
|
|
2749
2749
|
dry_run: Preview splits without creating files
|
|
2750
|
-
|
|
2750
|
+
|
|
2751
2751
|
Returns:
|
|
2752
2752
|
Exit code (0 for success, 1 for error)
|
|
2753
2753
|
"""
|
|
2754
2754
|
from .utils.file_splitter import (
|
|
2755
|
-
analyze_task_file,
|
|
2756
|
-
|
|
2757
|
-
split_by_phases,
|
|
2758
|
-
|
|
2755
|
+
analyze_task_file,
|
|
2756
|
+
generate_split_files,
|
|
2757
|
+
split_by_phases,
|
|
2758
|
+
split_by_sections,
|
|
2759
2759
|
)
|
|
2760
|
-
|
|
2760
|
+
|
|
2761
2761
|
try:
|
|
2762
2762
|
print_box("File Splitting Utility", CYAN)
|
|
2763
|
-
|
|
2763
|
+
|
|
2764
2764
|
# Validate file exists
|
|
2765
2765
|
if not os.path.exists(task_file):
|
|
2766
2766
|
print(f"{RED}Error: File not found: {task_file}{RESET}")
|
|
2767
2767
|
return 1
|
|
2768
|
-
|
|
2768
|
+
|
|
2769
2769
|
# Analyze file
|
|
2770
2770
|
print(f"📄 Analyzing task file: {task_file}")
|
|
2771
2771
|
analysis = analyze_task_file(task_file)
|
|
2772
|
-
|
|
2773
|
-
lines = analysis[
|
|
2774
|
-
tokens = analysis[
|
|
2772
|
+
|
|
2773
|
+
lines = analysis["total_lines"]
|
|
2774
|
+
tokens = analysis["estimated_tokens"]
|
|
2775
2775
|
print(f" Lines: {lines}")
|
|
2776
2776
|
print(f" Estimated tokens: ~{tokens:,}")
|
|
2777
|
-
|
|
2777
|
+
|
|
2778
2778
|
# Check if splitting is recommended
|
|
2779
2779
|
if lines <= max_lines:
|
|
2780
|
-
print(
|
|
2780
|
+
print(
|
|
2781
|
+
f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
|
|
2782
|
+
)
|
|
2781
2783
|
print("No splitting needed.")
|
|
2782
2784
|
return 0
|
|
2783
|
-
|
|
2785
|
+
|
|
2784
2786
|
print(f"{YELLOW}⚠️ File exceeds recommended limit ({max_lines} lines){RESET}")
|
|
2785
|
-
|
|
2787
|
+
|
|
2786
2788
|
# Read file content for splitting
|
|
2787
|
-
with open(task_file,
|
|
2789
|
+
with open(task_file, "r", encoding="utf-8") as f:
|
|
2788
2790
|
content = f.read()
|
|
2789
|
-
|
|
2791
|
+
|
|
2790
2792
|
# Apply split strategy
|
|
2791
2793
|
if strategy == "sections":
|
|
2792
2794
|
splits = split_by_sections(content, max_lines=max_lines)
|
|
@@ -2795,42 +2797,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
|
|
|
2795
2797
|
splits = split_by_phases(content)
|
|
2796
2798
|
print(f"\n💡 Suggested splits (by phases):")
|
|
2797
2799
|
else:
|
|
2798
|
-
print(
|
|
2800
|
+
print(
|
|
2801
|
+
f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
|
|
2802
|
+
)
|
|
2799
2803
|
return 1
|
|
2800
|
-
|
|
2804
|
+
|
|
2801
2805
|
# Display split preview
|
|
2802
2806
|
for i, split in enumerate(splits, 1):
|
|
2803
2807
|
filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
|
|
2804
2808
|
print(f" - {filename} ({split['line_count']} lines)")
|
|
2805
|
-
|
|
2809
|
+
|
|
2806
2810
|
# Dry run mode
|
|
2807
2811
|
if dry_run:
|
|
2808
2812
|
print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
|
|
2809
2813
|
return 0
|
|
2810
|
-
|
|
2814
|
+
|
|
2811
2815
|
# Prompt user for confirmation
|
|
2812
2816
|
create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
|
|
2813
|
-
|
|
2814
|
-
if create_files.lower() in [
|
|
2817
|
+
|
|
2818
|
+
if create_files.lower() in ["y", "yes"]:
|
|
2815
2819
|
# Create output directory
|
|
2816
2820
|
output_dir = os.path.join(os.path.dirname(task_file), "splits")
|
|
2817
|
-
|
|
2821
|
+
|
|
2818
2822
|
# Generate split files
|
|
2819
2823
|
created_files = generate_split_files(task_file, splits, output_dir)
|
|
2820
|
-
|
|
2824
|
+
|
|
2821
2825
|
print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
|
|
2822
2826
|
for file_path in created_files:
|
|
2823
2827
|
print(f" {file_path}")
|
|
2824
|
-
|
|
2828
|
+
|
|
2825
2829
|
print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
|
|
2826
2830
|
for file_path in created_files:
|
|
2827
2831
|
rel_path = os.path.relpath(file_path)
|
|
2828
2832
|
print(f" adversarial evaluate {rel_path}")
|
|
2829
2833
|
else:
|
|
2830
2834
|
print("Cancelled - no files created.")
|
|
2831
|
-
|
|
2835
|
+
|
|
2832
2836
|
return 0
|
|
2833
|
-
|
|
2837
|
+
|
|
2834
2838
|
except Exception as e:
|
|
2835
2839
|
print(f"{RED}Error during file splitting: {e}{RESET}")
|
|
2836
2840
|
return 1
|
|
@@ -2876,6 +2880,7 @@ def list_evaluators() -> int:
|
|
|
2876
2880
|
|
|
2877
2881
|
return 0
|
|
2878
2882
|
|
|
2883
|
+
|
|
2879
2884
|
def main():
|
|
2880
2885
|
"""Main CLI entry point."""
|
|
2881
2886
|
import logging
|
|
@@ -2888,10 +2893,20 @@ def main():
|
|
|
2888
2893
|
except Exception as e:
|
|
2889
2894
|
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2890
2895
|
|
|
2896
|
+
# Load .env file before any commands run
|
|
2897
|
+
# Use explicit path to ensure we find .env in current working directory
|
|
2898
|
+
# (load_dotenv() without args can fail to find .env in some contexts)
|
|
2899
|
+
env_file = Path.cwd() / ".env"
|
|
2900
|
+
if env_file.exists():
|
|
2901
|
+
try:
|
|
2902
|
+
load_dotenv(env_file)
|
|
2903
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
2904
|
+
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2905
|
+
|
|
2891
2906
|
from adversarial_workflow.evaluators import (
|
|
2907
|
+
BUILTIN_EVALUATORS,
|
|
2892
2908
|
get_all_evaluators,
|
|
2893
2909
|
run_evaluator,
|
|
2894
|
-
BUILTIN_EVALUATORS,
|
|
2895
2910
|
)
|
|
2896
2911
|
|
|
2897
2912
|
logger = logging.getLogger(__name__)
|
|
@@ -2899,8 +2914,16 @@ def main():
|
|
|
2899
2914
|
# Commands that cannot be overridden by evaluators
|
|
2900
2915
|
# Note: 'review' is special - it reviews git changes without a file argument
|
|
2901
2916
|
STATIC_COMMANDS = {
|
|
2902
|
-
"init",
|
|
2903
|
-
"
|
|
2917
|
+
"init",
|
|
2918
|
+
"check",
|
|
2919
|
+
"doctor",
|
|
2920
|
+
"health",
|
|
2921
|
+
"quickstart",
|
|
2922
|
+
"agent",
|
|
2923
|
+
"split",
|
|
2924
|
+
"validate",
|
|
2925
|
+
"review",
|
|
2926
|
+
"list-evaluators",
|
|
2904
2927
|
}
|
|
2905
2928
|
|
|
2906
2929
|
parser = argparse.ArgumentParser(
|
|
@@ -2989,16 +3012,21 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
2989
3012
|
)
|
|
2990
3013
|
split_parser.add_argument("task_file", help="Task file to split")
|
|
2991
3014
|
split_parser.add_argument(
|
|
2992
|
-
"--strategy",
|
|
2993
|
-
|
|
3015
|
+
"--strategy",
|
|
3016
|
+
"-s",
|
|
3017
|
+
choices=["sections", "phases"],
|
|
3018
|
+
default="sections",
|
|
3019
|
+
help="Split strategy: 'sections' (default) or 'phases'",
|
|
2994
3020
|
)
|
|
2995
3021
|
split_parser.add_argument(
|
|
2996
|
-
"--max-lines",
|
|
2997
|
-
|
|
3022
|
+
"--max-lines",
|
|
3023
|
+
"-m",
|
|
3024
|
+
type=int,
|
|
3025
|
+
default=500,
|
|
3026
|
+
help="Maximum lines per split (default: 500)",
|
|
2998
3027
|
)
|
|
2999
3028
|
split_parser.add_argument(
|
|
3000
|
-
"--dry-run", action="store_true",
|
|
3001
|
-
help="Preview splits without creating files"
|
|
3029
|
+
"--dry-run", action="store_true", help="Preview splits without creating files"
|
|
3002
3030
|
)
|
|
3003
3031
|
|
|
3004
3032
|
# list-evaluators command
|
|
@@ -3019,7 +3047,12 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3019
3047
|
for name, config in evaluators.items():
|
|
3020
3048
|
# Skip if name conflicts with static command
|
|
3021
3049
|
if name in STATIC_COMMANDS:
|
|
3022
|
-
|
|
3050
|
+
# Only warn for user-defined evaluators, not built-ins
|
|
3051
|
+
# Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
|
|
3052
|
+
if getattr(config, "source", None) != "builtin":
|
|
3053
|
+
logger.warning(
|
|
3054
|
+
"Evaluator '%s' conflicts with CLI command; skipping", name
|
|
3055
|
+
)
|
|
3023
3056
|
# Mark as registered to prevent alias re-registration attempts
|
|
3024
3057
|
registered_configs.add(id(config))
|
|
3025
3058
|
continue
|
|
@@ -3046,10 +3079,11 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3046
3079
|
)
|
|
3047
3080
|
eval_parser.add_argument("file", help="File to evaluate")
|
|
3048
3081
|
eval_parser.add_argument(
|
|
3049
|
-
"--timeout",
|
|
3082
|
+
"--timeout",
|
|
3083
|
+
"-t",
|
|
3050
3084
|
type=int,
|
|
3051
3085
|
default=180,
|
|
3052
|
-
help="Timeout in seconds (default: 180)"
|
|
3086
|
+
help="Timeout in seconds (default: 180)",
|
|
3053
3087
|
)
|
|
3054
3088
|
# Store config for later execution
|
|
3055
3089
|
eval_parser.set_defaults(evaluator_config=config)
|
|
@@ -3097,7 +3131,7 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3097
3131
|
args.task_file,
|
|
3098
3132
|
strategy=args.strategy,
|
|
3099
3133
|
max_lines=args.max_lines,
|
|
3100
|
-
dry_run=args.dry_run
|
|
3134
|
+
dry_run=args.dry_run,
|
|
3101
3135
|
)
|
|
3102
3136
|
elif args.command == "list-evaluators":
|
|
3103
3137
|
return list_evaluators()
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Evaluators module for adversarial-workflow plugin architecture."""
|
|
2
2
|
|
|
3
|
+
from .builtins import BUILTIN_EVALUATORS
|
|
3
4
|
from .config import EvaluatorConfig
|
|
4
5
|
from .discovery import (
|
|
6
|
+
EvaluatorParseError,
|
|
5
7
|
discover_local_evaluators,
|
|
6
8
|
parse_evaluator_yaml,
|
|
7
|
-
EvaluatorParseError,
|
|
8
9
|
)
|
|
9
10
|
from .runner import run_evaluator
|
|
10
|
-
from .builtins import BUILTIN_EVALUATORS
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
|
17
17
|
Aliases from local evaluators are also included in the returned dictionary.
|
|
18
18
|
"""
|
|
19
19
|
import logging
|
|
20
|
+
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
evaluators: dict[str, EvaluatorConfig] = {}
|
|
@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
40
40
|
try:
|
|
41
41
|
content = yml_file.read_text(encoding="utf-8")
|
|
42
42
|
except UnicodeDecodeError as e:
|
|
43
|
-
raise EvaluatorParseError(
|
|
44
|
-
f"File encoding error (not UTF-8): {yml_file}"
|
|
45
|
-
) from e
|
|
43
|
+
raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
|
|
46
44
|
|
|
47
45
|
# Parse YAML
|
|
48
46
|
data = yaml.safe_load(content)
|
|
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
58
56
|
)
|
|
59
57
|
|
|
60
58
|
# Validate required fields exist
|
|
61
|
-
required = [
|
|
59
|
+
required = [
|
|
60
|
+
"name",
|
|
61
|
+
"description",
|
|
62
|
+
"model",
|
|
63
|
+
"api_key_env",
|
|
64
|
+
"prompt",
|
|
65
|
+
"output_suffix",
|
|
66
|
+
]
|
|
62
67
|
missing = [f for f in required if f not in data]
|
|
63
68
|
if missing:
|
|
64
69
|
raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
|
{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.2}/adversarial_workflow/evaluators/runner.py
RENAMED
|
@@ -10,10 +10,10 @@ import tempfile
|
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
|
|
13
|
-
from .
|
|
14
|
-
from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
|
|
13
|
+
from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
|
|
15
14
|
from ..utils.config import load_config
|
|
16
15
|
from ..utils.validation import validate_evaluation_output
|
|
16
|
+
from .config import EvaluatorConfig
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
|
|
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
|
|
|
124
124
|
"""
|
|
125
125
|
|
|
126
126
|
# Create temp file for prompt
|
|
127
|
-
with tempfile.NamedTemporaryFile(mode=
|
|
127
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
|
128
128
|
f.write(full_prompt)
|
|
129
129
|
prompt_file = f.name
|
|
130
130
|
|
|
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
|
|
|
136
136
|
# Build aider command
|
|
137
137
|
cmd = [
|
|
138
138
|
"aider",
|
|
139
|
-
"--model",
|
|
139
|
+
"--model",
|
|
140
|
+
config.model,
|
|
140
141
|
"--yes",
|
|
141
142
|
"--no-git",
|
|
142
143
|
"--no-auto-commits",
|
|
143
|
-
"--message-file",
|
|
144
|
-
|
|
144
|
+
"--message-file",
|
|
145
|
+
prompt_file,
|
|
146
|
+
"--read",
|
|
147
|
+
file_path,
|
|
145
148
|
]
|
|
146
149
|
|
|
147
150
|
result = subprocess.run(
|
|
@@ -224,7 +227,10 @@ def _execute_script(
|
|
|
224
227
|
|
|
225
228
|
# Validate output
|
|
226
229
|
file_basename = Path(file_path).stem
|
|
227
|
-
log_file =
|
|
230
|
+
log_file = (
|
|
231
|
+
Path(project_config["log_directory"])
|
|
232
|
+
/ f"{file_basename}-{config.output_suffix}.md"
|
|
233
|
+
)
|
|
228
234
|
|
|
229
235
|
is_valid, verdict, message = validate_evaluation_output(str(log_file))
|
|
230
236
|
|
|
@@ -235,7 +241,9 @@ def _execute_script(
|
|
|
235
241
|
return _report_verdict(verdict, log_file, config)
|
|
236
242
|
|
|
237
243
|
|
|
238
|
-
def _report_verdict(
|
|
244
|
+
def _report_verdict(
|
|
245
|
+
verdict: str | None, log_file: Path, config: EvaluatorConfig
|
|
246
|
+
) -> int:
|
|
239
247
|
"""Report the evaluation verdict to terminal."""
|
|
240
248
|
print()
|
|
241
249
|
if verdict == "APPROVED":
|