adversarial-workflow 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/__main__.py +1 -0
- adversarial_workflow/cli.py +129 -65
- adversarial_workflow/evaluators/__init__.py +3 -2
- adversarial_workflow/evaluators/config.py +2 -0
- adversarial_workflow/evaluators/discovery.py +39 -4
- adversarial_workflow/evaluators/runner.py +16 -8
- adversarial_workflow/utils/file_splitter.py +218 -184
- adversarial_workflow/utils/validation.py +3 -1
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/METADATA +25 -3
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/RECORD +15 -15
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/WHEEL +1 -1
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/top_level.txt +0 -0
adversarial_workflow/__init__.py
CHANGED
adversarial_workflow/__main__.py
CHANGED
adversarial_workflow/cli.py
CHANGED
|
@@ -27,9 +27,9 @@ from pathlib import Path
|
|
|
27
27
|
from typing import Dict, List, Optional, Tuple
|
|
28
28
|
|
|
29
29
|
import yaml
|
|
30
|
-
from dotenv import
|
|
30
|
+
from dotenv import dotenv_values, load_dotenv
|
|
31
31
|
|
|
32
|
-
__version__ = "0.6.
|
|
32
|
+
__version__ = "0.6.2"
|
|
33
33
|
|
|
34
34
|
# ANSI color codes for better output
|
|
35
35
|
RESET = "\033[0m"
|
|
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
|
|
|
322
322
|
f"{GREEN}✅ Setup Complete!{RESET}",
|
|
323
323
|
[
|
|
324
324
|
"Created:",
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
325
|
+
(
|
|
326
|
+
" ✓ .env (with your API keys - added to .gitignore)"
|
|
327
|
+
if (anthropic_key or openai_key)
|
|
328
|
+
else " ⚠️ .env (skipped - no API keys provided)"
|
|
329
|
+
),
|
|
328
330
|
" ✓ .adversarial/config.yml",
|
|
329
331
|
" ✓ .adversarial/scripts/ (3 workflow scripts)",
|
|
330
332
|
" ✓ .aider.conf.yml (aider configuration)",
|
|
331
333
|
"",
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
334
|
+
(
|
|
335
|
+
"Your configuration:"
|
|
336
|
+
if (anthropic_key or openai_key)
|
|
337
|
+
else "Configuration (no API keys yet):"
|
|
338
|
+
),
|
|
335
339
|
f" Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
|
|
336
340
|
f" Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
|
|
337
341
|
f" Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
|
|
@@ -806,15 +810,14 @@ def check() -> int:
|
|
|
806
810
|
|
|
807
811
|
if env_file.exists():
|
|
808
812
|
try:
|
|
809
|
-
#
|
|
810
|
-
load_dotenv(env_file)
|
|
811
|
-
# Use dotenv_values() to count variables directly from file
|
|
812
|
-
# This gives accurate count regardless of what was already in environment
|
|
813
|
+
# Count variables by reading file directly (works even if already loaded)
|
|
813
814
|
env_vars = dotenv_values(env_file)
|
|
815
|
+
var_count = len([k for k, v in env_vars.items() if v is not None])
|
|
816
|
+
|
|
817
|
+
# Still load to ensure environment is set
|
|
818
|
+
load_dotenv(env_file)
|
|
814
819
|
env_loaded = True
|
|
815
|
-
good_checks.append(
|
|
816
|
-
f".env file found ({len(env_vars)} variables configured)"
|
|
817
|
-
)
|
|
820
|
+
good_checks.append(f".env file found and loaded ({var_count} variables)")
|
|
818
821
|
except (FileNotFoundError, PermissionError) as e:
|
|
819
822
|
# File access errors
|
|
820
823
|
issues.append(
|
|
@@ -2097,10 +2100,6 @@ def evaluate(task_file: str) -> int:
|
|
|
2097
2100
|
return 0
|
|
2098
2101
|
|
|
2099
2102
|
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
2103
|
def review() -> int:
|
|
2105
2104
|
"""Run Phase 3: Code review."""
|
|
2106
2105
|
|
|
@@ -2289,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
|
|
|
2289
2288
|
)
|
|
2290
2289
|
return None
|
|
2291
2290
|
else:
|
|
2292
|
-
print(
|
|
2291
|
+
print(
|
|
2292
|
+
f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
|
|
2293
|
+
)
|
|
2293
2294
|
return None
|
|
2294
2295
|
|
|
2295
2296
|
elif template_type == "custom" and url:
|
|
@@ -2739,54 +2740,61 @@ def agent_onboard(project_path: str = ".") -> int:
|
|
|
2739
2740
|
return 0
|
|
2740
2741
|
|
|
2741
2742
|
|
|
2742
|
-
def split(
|
|
2743
|
+
def split(
|
|
2744
|
+
task_file: str,
|
|
2745
|
+
strategy: str = "sections",
|
|
2746
|
+
max_lines: int = 500,
|
|
2747
|
+
dry_run: bool = False,
|
|
2748
|
+
):
|
|
2743
2749
|
"""Split large task files into smaller evaluable chunks.
|
|
2744
|
-
|
|
2750
|
+
|
|
2745
2751
|
Args:
|
|
2746
2752
|
task_file: Path to the task file to split
|
|
2747
2753
|
strategy: Split strategy ('sections', 'phases', or 'manual')
|
|
2748
2754
|
max_lines: Maximum lines per split (default: 500)
|
|
2749
2755
|
dry_run: Preview splits without creating files
|
|
2750
|
-
|
|
2756
|
+
|
|
2751
2757
|
Returns:
|
|
2752
2758
|
Exit code (0 for success, 1 for error)
|
|
2753
2759
|
"""
|
|
2754
2760
|
from .utils.file_splitter import (
|
|
2755
|
-
analyze_task_file,
|
|
2756
|
-
|
|
2757
|
-
split_by_phases,
|
|
2758
|
-
|
|
2761
|
+
analyze_task_file,
|
|
2762
|
+
generate_split_files,
|
|
2763
|
+
split_by_phases,
|
|
2764
|
+
split_by_sections,
|
|
2759
2765
|
)
|
|
2760
|
-
|
|
2766
|
+
|
|
2761
2767
|
try:
|
|
2762
2768
|
print_box("File Splitting Utility", CYAN)
|
|
2763
|
-
|
|
2769
|
+
|
|
2764
2770
|
# Validate file exists
|
|
2765
2771
|
if not os.path.exists(task_file):
|
|
2766
2772
|
print(f"{RED}Error: File not found: {task_file}{RESET}")
|
|
2767
2773
|
return 1
|
|
2768
|
-
|
|
2774
|
+
|
|
2769
2775
|
# Analyze file
|
|
2770
2776
|
print(f"📄 Analyzing task file: {task_file}")
|
|
2771
2777
|
analysis = analyze_task_file(task_file)
|
|
2772
|
-
|
|
2773
|
-
lines = analysis[
|
|
2774
|
-
tokens = analysis[
|
|
2778
|
+
|
|
2779
|
+
lines = analysis["total_lines"]
|
|
2780
|
+
tokens = analysis["estimated_tokens"]
|
|
2775
2781
|
print(f" Lines: {lines}")
|
|
2776
2782
|
print(f" Estimated tokens: ~{tokens:,}")
|
|
2777
|
-
|
|
2783
|
+
|
|
2778
2784
|
# Check if splitting is recommended
|
|
2779
2785
|
if lines <= max_lines:
|
|
2780
|
-
print(
|
|
2786
|
+
print(
|
|
2787
|
+
f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
|
|
2788
|
+
)
|
|
2781
2789
|
print("No splitting needed.")
|
|
2782
2790
|
return 0
|
|
2783
|
-
|
|
2791
|
+
|
|
2784
2792
|
print(f"{YELLOW}⚠️ File exceeds recommended limit ({max_lines} lines){RESET}")
|
|
2785
|
-
|
|
2793
|
+
|
|
2786
2794
|
# Read file content for splitting
|
|
2787
|
-
with open(task_file,
|
|
2795
|
+
with open(task_file, "r", encoding="utf-8") as f:
|
|
2788
2796
|
content = f.read()
|
|
2789
|
-
|
|
2797
|
+
|
|
2790
2798
|
# Apply split strategy
|
|
2791
2799
|
if strategy == "sections":
|
|
2792
2800
|
splits = split_by_sections(content, max_lines=max_lines)
|
|
@@ -2795,42 +2803,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
|
|
|
2795
2803
|
splits = split_by_phases(content)
|
|
2796
2804
|
print(f"\n💡 Suggested splits (by phases):")
|
|
2797
2805
|
else:
|
|
2798
|
-
print(
|
|
2806
|
+
print(
|
|
2807
|
+
f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
|
|
2808
|
+
)
|
|
2799
2809
|
return 1
|
|
2800
|
-
|
|
2810
|
+
|
|
2801
2811
|
# Display split preview
|
|
2802
2812
|
for i, split in enumerate(splits, 1):
|
|
2803
2813
|
filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
|
|
2804
2814
|
print(f" - {filename} ({split['line_count']} lines)")
|
|
2805
|
-
|
|
2815
|
+
|
|
2806
2816
|
# Dry run mode
|
|
2807
2817
|
if dry_run:
|
|
2808
2818
|
print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
|
|
2809
2819
|
return 0
|
|
2810
|
-
|
|
2820
|
+
|
|
2811
2821
|
# Prompt user for confirmation
|
|
2812
2822
|
create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
|
|
2813
|
-
|
|
2814
|
-
if create_files.lower() in [
|
|
2823
|
+
|
|
2824
|
+
if create_files.lower() in ["y", "yes"]:
|
|
2815
2825
|
# Create output directory
|
|
2816
2826
|
output_dir = os.path.join(os.path.dirname(task_file), "splits")
|
|
2817
|
-
|
|
2827
|
+
|
|
2818
2828
|
# Generate split files
|
|
2819
2829
|
created_files = generate_split_files(task_file, splits, output_dir)
|
|
2820
|
-
|
|
2830
|
+
|
|
2821
2831
|
print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
|
|
2822
2832
|
for file_path in created_files:
|
|
2823
2833
|
print(f" {file_path}")
|
|
2824
|
-
|
|
2834
|
+
|
|
2825
2835
|
print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
|
|
2826
2836
|
for file_path in created_files:
|
|
2827
2837
|
rel_path = os.path.relpath(file_path)
|
|
2828
2838
|
print(f" adversarial evaluate {rel_path}")
|
|
2829
2839
|
else:
|
|
2830
2840
|
print("Cancelled - no files created.")
|
|
2831
|
-
|
|
2841
|
+
|
|
2832
2842
|
return 0
|
|
2833
|
-
|
|
2843
|
+
|
|
2834
2844
|
except Exception as e:
|
|
2835
2845
|
print(f"{RED}Error during file splitting: {e}{RESET}")
|
|
2836
2846
|
return 1
|
|
@@ -2876,6 +2886,7 @@ def list_evaluators() -> int:
|
|
|
2876
2886
|
|
|
2877
2887
|
return 0
|
|
2878
2888
|
|
|
2889
|
+
|
|
2879
2890
|
def main():
|
|
2880
2891
|
"""Main CLI entry point."""
|
|
2881
2892
|
import logging
|
|
@@ -2888,10 +2899,20 @@ def main():
|
|
|
2888
2899
|
except Exception as e:
|
|
2889
2900
|
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2890
2901
|
|
|
2902
|
+
# Load .env file before any commands run
|
|
2903
|
+
# Use explicit path to ensure we find .env in current working directory
|
|
2904
|
+
# (load_dotenv() without args can fail to find .env in some contexts)
|
|
2905
|
+
env_file = Path.cwd() / ".env"
|
|
2906
|
+
if env_file.exists():
|
|
2907
|
+
try:
|
|
2908
|
+
load_dotenv(env_file)
|
|
2909
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
2910
|
+
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2911
|
+
|
|
2891
2912
|
from adversarial_workflow.evaluators import (
|
|
2913
|
+
BUILTIN_EVALUATORS,
|
|
2892
2914
|
get_all_evaluators,
|
|
2893
2915
|
run_evaluator,
|
|
2894
|
-
BUILTIN_EVALUATORS,
|
|
2895
2916
|
)
|
|
2896
2917
|
|
|
2897
2918
|
logger = logging.getLogger(__name__)
|
|
@@ -2899,8 +2920,16 @@ def main():
|
|
|
2899
2920
|
# Commands that cannot be overridden by evaluators
|
|
2900
2921
|
# Note: 'review' is special - it reviews git changes without a file argument
|
|
2901
2922
|
STATIC_COMMANDS = {
|
|
2902
|
-
"init",
|
|
2903
|
-
"
|
|
2923
|
+
"init",
|
|
2924
|
+
"check",
|
|
2925
|
+
"doctor",
|
|
2926
|
+
"health",
|
|
2927
|
+
"quickstart",
|
|
2928
|
+
"agent",
|
|
2929
|
+
"split",
|
|
2930
|
+
"validate",
|
|
2931
|
+
"review",
|
|
2932
|
+
"list-evaluators",
|
|
2904
2933
|
}
|
|
2905
2934
|
|
|
2906
2935
|
parser = argparse.ArgumentParser(
|
|
@@ -2989,16 +3018,21 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
2989
3018
|
)
|
|
2990
3019
|
split_parser.add_argument("task_file", help="Task file to split")
|
|
2991
3020
|
split_parser.add_argument(
|
|
2992
|
-
"--strategy",
|
|
2993
|
-
|
|
3021
|
+
"--strategy",
|
|
3022
|
+
"-s",
|
|
3023
|
+
choices=["sections", "phases"],
|
|
3024
|
+
default="sections",
|
|
3025
|
+
help="Split strategy: 'sections' (default) or 'phases'",
|
|
2994
3026
|
)
|
|
2995
3027
|
split_parser.add_argument(
|
|
2996
|
-
"--max-lines",
|
|
2997
|
-
|
|
3028
|
+
"--max-lines",
|
|
3029
|
+
"-m",
|
|
3030
|
+
type=int,
|
|
3031
|
+
default=500,
|
|
3032
|
+
help="Maximum lines per split (default: 500)",
|
|
2998
3033
|
)
|
|
2999
3034
|
split_parser.add_argument(
|
|
3000
|
-
"--dry-run", action="store_true",
|
|
3001
|
-
help="Preview splits without creating files"
|
|
3035
|
+
"--dry-run", action="store_true", help="Preview splits without creating files"
|
|
3002
3036
|
)
|
|
3003
3037
|
|
|
3004
3038
|
# list-evaluators command
|
|
@@ -3019,7 +3053,12 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3019
3053
|
for name, config in evaluators.items():
|
|
3020
3054
|
# Skip if name conflicts with static command
|
|
3021
3055
|
if name in STATIC_COMMANDS:
|
|
3022
|
-
|
|
3056
|
+
# Only warn for user-defined evaluators, not built-ins
|
|
3057
|
+
# Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
|
|
3058
|
+
if getattr(config, "source", None) != "builtin":
|
|
3059
|
+
logger.warning(
|
|
3060
|
+
"Evaluator '%s' conflicts with CLI command; skipping", name
|
|
3061
|
+
)
|
|
3023
3062
|
# Mark as registered to prevent alias re-registration attempts
|
|
3024
3063
|
registered_configs.add(id(config))
|
|
3025
3064
|
continue
|
|
@@ -3046,10 +3085,11 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3046
3085
|
)
|
|
3047
3086
|
eval_parser.add_argument("file", help="File to evaluate")
|
|
3048
3087
|
eval_parser.add_argument(
|
|
3049
|
-
"--timeout",
|
|
3088
|
+
"--timeout",
|
|
3089
|
+
"-t",
|
|
3050
3090
|
type=int,
|
|
3051
|
-
default=
|
|
3052
|
-
help="Timeout in seconds (default: 180)"
|
|
3091
|
+
default=None,
|
|
3092
|
+
help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
|
|
3053
3093
|
)
|
|
3054
3094
|
# Store config for later execution
|
|
3055
3095
|
eval_parser.set_defaults(evaluator_config=config)
|
|
@@ -3062,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3062
3102
|
|
|
3063
3103
|
# Check for evaluator command first (has evaluator_config attribute)
|
|
3064
3104
|
if hasattr(args, "evaluator_config"):
|
|
3105
|
+
# Determine timeout: CLI flag > YAML config > default (180s)
|
|
3106
|
+
if args.timeout is not None:
|
|
3107
|
+
timeout = args.timeout
|
|
3108
|
+
source = "CLI override"
|
|
3109
|
+
elif args.evaluator_config.timeout != 180:
|
|
3110
|
+
timeout = args.evaluator_config.timeout
|
|
3111
|
+
source = "evaluator config"
|
|
3112
|
+
else:
|
|
3113
|
+
timeout = args.evaluator_config.timeout # 180 (default)
|
|
3114
|
+
source = "default"
|
|
3115
|
+
|
|
3116
|
+
# Validate CLI timeout (consistent with YAML validation)
|
|
3117
|
+
if timeout <= 0:
|
|
3118
|
+
print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
|
|
3119
|
+
return 1
|
|
3120
|
+
if timeout > 600:
|
|
3121
|
+
print(
|
|
3122
|
+
f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
|
|
3123
|
+
)
|
|
3124
|
+
timeout = 600
|
|
3125
|
+
|
|
3126
|
+
# Log actual timeout and source
|
|
3127
|
+
print(f"Using timeout: {timeout}s ({source})")
|
|
3128
|
+
|
|
3065
3129
|
return run_evaluator(
|
|
3066
3130
|
args.evaluator_config,
|
|
3067
3131
|
args.file,
|
|
3068
|
-
timeout=
|
|
3132
|
+
timeout=timeout,
|
|
3069
3133
|
)
|
|
3070
3134
|
|
|
3071
3135
|
# Execute static commands
|
|
@@ -3097,7 +3161,7 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3097
3161
|
args.task_file,
|
|
3098
3162
|
strategy=args.strategy,
|
|
3099
3163
|
max_lines=args.max_lines,
|
|
3100
|
-
dry_run=args.dry_run
|
|
3164
|
+
dry_run=args.dry_run,
|
|
3101
3165
|
)
|
|
3102
3166
|
elif args.command == "list-evaluators":
|
|
3103
3167
|
return list_evaluators()
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Evaluators module for adversarial-workflow plugin architecture."""
|
|
2
2
|
|
|
3
|
+
from .builtins import BUILTIN_EVALUATORS
|
|
3
4
|
from .config import EvaluatorConfig
|
|
4
5
|
from .discovery import (
|
|
6
|
+
EvaluatorParseError,
|
|
5
7
|
discover_local_evaluators,
|
|
6
8
|
parse_evaluator_yaml,
|
|
7
|
-
EvaluatorParseError,
|
|
8
9
|
)
|
|
9
10
|
from .runner import run_evaluator
|
|
10
|
-
from .builtins import BUILTIN_EVALUATORS
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
|
17
17
|
Aliases from local evaluators are also included in the returned dictionary.
|
|
18
18
|
"""
|
|
19
19
|
import logging
|
|
20
|
+
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
evaluators: dict[str, EvaluatorConfig] = {}
|
|
@@ -26,6 +26,7 @@ class EvaluatorConfig:
|
|
|
26
26
|
fallback_model: Fallback model if primary fails
|
|
27
27
|
aliases: Alternative command names
|
|
28
28
|
version: Evaluator version
|
|
29
|
+
timeout: Timeout in seconds (default: 180, max: 600)
|
|
29
30
|
source: "builtin" or "local" (set internally)
|
|
30
31
|
config_file: Path to YAML file if local (set internally)
|
|
31
32
|
"""
|
|
@@ -43,6 +44,7 @@ class EvaluatorConfig:
|
|
|
43
44
|
fallback_model: str | None = None
|
|
44
45
|
aliases: list[str] = field(default_factory=list)
|
|
45
46
|
version: str = "1.0.0"
|
|
47
|
+
timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
|
|
46
48
|
|
|
47
49
|
# Metadata (set internally during discovery, not from YAML)
|
|
48
50
|
source: str = "builtin"
|
|
@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
40
40
|
try:
|
|
41
41
|
content = yml_file.read_text(encoding="utf-8")
|
|
42
42
|
except UnicodeDecodeError as e:
|
|
43
|
-
raise EvaluatorParseError(
|
|
44
|
-
f"File encoding error (not UTF-8): {yml_file}"
|
|
45
|
-
) from e
|
|
43
|
+
raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
|
|
46
44
|
|
|
47
45
|
# Parse YAML
|
|
48
46
|
data = yaml.safe_load(content)
|
|
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
58
56
|
)
|
|
59
57
|
|
|
60
58
|
# Validate required fields exist
|
|
61
|
-
required = [
|
|
59
|
+
required = [
|
|
60
|
+
"name",
|
|
61
|
+
"description",
|
|
62
|
+
"model",
|
|
63
|
+
"api_key_env",
|
|
64
|
+
"prompt",
|
|
65
|
+
"output_suffix",
|
|
66
|
+
]
|
|
62
67
|
missing = [f for f in required if f not in data]
|
|
63
68
|
if missing:
|
|
64
69
|
raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
|
|
@@ -117,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
117
122
|
f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
|
|
118
123
|
)
|
|
119
124
|
|
|
125
|
+
# Validate timeout if present
|
|
126
|
+
if "timeout" in data:
|
|
127
|
+
timeout = data["timeout"]
|
|
128
|
+
# Handle null/empty values
|
|
129
|
+
if timeout is None or timeout == "":
|
|
130
|
+
raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
|
|
131
|
+
# Check for bool before int (bool is subclass of int in Python)
|
|
132
|
+
# YAML parses 'yes'/'true' as True, 'no'/'false' as False
|
|
133
|
+
if isinstance(timeout, bool):
|
|
134
|
+
raise EvaluatorParseError(
|
|
135
|
+
f"Field 'timeout' must be an integer, got bool: {timeout!r}"
|
|
136
|
+
)
|
|
137
|
+
if not isinstance(timeout, int):
|
|
138
|
+
raise EvaluatorParseError(
|
|
139
|
+
f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
|
|
140
|
+
)
|
|
141
|
+
# timeout=0 is invalid (does not disable timeout - use a large value instead)
|
|
142
|
+
if timeout <= 0:
|
|
143
|
+
raise EvaluatorParseError(
|
|
144
|
+
f"Field 'timeout' must be positive (> 0), got {timeout}"
|
|
145
|
+
)
|
|
146
|
+
if timeout > 600:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
|
|
149
|
+
timeout,
|
|
150
|
+
yml_file.name,
|
|
151
|
+
)
|
|
152
|
+
data["timeout"] = 600
|
|
153
|
+
|
|
120
154
|
# Filter to known fields only (log unknown fields)
|
|
121
155
|
known_fields = {
|
|
122
156
|
"name",
|
|
@@ -129,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
129
163
|
"fallback_model",
|
|
130
164
|
"aliases",
|
|
131
165
|
"version",
|
|
166
|
+
"timeout",
|
|
132
167
|
}
|
|
133
168
|
unknown = set(data.keys()) - known_fields
|
|
134
169
|
if unknown:
|
|
@@ -10,10 +10,10 @@ import tempfile
|
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
|
|
13
|
-
from .
|
|
14
|
-
from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
|
|
13
|
+
from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
|
|
15
14
|
from ..utils.config import load_config
|
|
16
15
|
from ..utils.validation import validate_evaluation_output
|
|
16
|
+
from .config import EvaluatorConfig
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
|
|
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
|
|
|
124
124
|
"""
|
|
125
125
|
|
|
126
126
|
# Create temp file for prompt
|
|
127
|
-
with tempfile.NamedTemporaryFile(mode=
|
|
127
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
|
128
128
|
f.write(full_prompt)
|
|
129
129
|
prompt_file = f.name
|
|
130
130
|
|
|
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
|
|
|
136
136
|
# Build aider command
|
|
137
137
|
cmd = [
|
|
138
138
|
"aider",
|
|
139
|
-
"--model",
|
|
139
|
+
"--model",
|
|
140
|
+
config.model,
|
|
140
141
|
"--yes",
|
|
141
142
|
"--no-git",
|
|
142
143
|
"--no-auto-commits",
|
|
143
|
-
"--message-file",
|
|
144
|
-
|
|
144
|
+
"--message-file",
|
|
145
|
+
prompt_file,
|
|
146
|
+
"--read",
|
|
147
|
+
file_path,
|
|
145
148
|
]
|
|
146
149
|
|
|
147
150
|
result = subprocess.run(
|
|
@@ -224,7 +227,10 @@ def _execute_script(
|
|
|
224
227
|
|
|
225
228
|
# Validate output
|
|
226
229
|
file_basename = Path(file_path).stem
|
|
227
|
-
log_file =
|
|
230
|
+
log_file = (
|
|
231
|
+
Path(project_config["log_directory"])
|
|
232
|
+
/ f"{file_basename}-{config.output_suffix}.md"
|
|
233
|
+
)
|
|
228
234
|
|
|
229
235
|
is_valid, verdict, message = validate_evaluation_output(str(log_file))
|
|
230
236
|
|
|
@@ -235,7 +241,9 @@ def _execute_script(
|
|
|
235
241
|
return _report_verdict(verdict, log_file, config)
|
|
236
242
|
|
|
237
243
|
|
|
238
|
-
def _report_verdict(
|
|
244
|
+
def _report_verdict(
|
|
245
|
+
verdict: str | None, log_file: Path, config: EvaluatorConfig
|
|
246
|
+
) -> int:
|
|
239
247
|
"""Report the evaluation verdict to terminal."""
|
|
240
248
|
print()
|
|
241
249
|
if verdict == "APPROVED":
|
|
@@ -4,360 +4,392 @@ This module provides functionality to split large markdown files into smaller,
|
|
|
4
4
|
independently evaluable chunks to work around OpenAI's rate limits.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import re
|
|
8
7
|
import os
|
|
8
|
+
import re
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Any, Dict, List
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def analyze_task_file(file_path: str) -> Dict[str, Any]:
|
|
14
14
|
"""Analyze file structure and suggest split points.
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
Args:
|
|
17
17
|
file_path: Path to the markdown file to analyze
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
Returns:
|
|
20
20
|
Dict containing:
|
|
21
21
|
- total_lines: Total number of lines
|
|
22
22
|
- sections: List of detected sections with metadata
|
|
23
23
|
- estimated_tokens: Rough token estimate (lines * 4)
|
|
24
24
|
- suggested_splits: List of suggested split points
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
Raises:
|
|
27
27
|
FileNotFoundError: If file doesn't exist
|
|
28
28
|
ValueError: If file is empty or too small
|
|
29
29
|
"""
|
|
30
30
|
if not os.path.exists(file_path):
|
|
31
31
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
32
|
-
|
|
33
|
-
with open(file_path,
|
|
32
|
+
|
|
33
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
34
34
|
content = f.read()
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
if not content.strip():
|
|
37
37
|
raise ValueError("File is empty or too small")
|
|
38
|
-
|
|
39
|
-
lines = content.split(
|
|
38
|
+
|
|
39
|
+
lines = content.split("\n")
|
|
40
40
|
total_lines = len(lines)
|
|
41
|
-
|
|
41
|
+
|
|
42
42
|
# Detect markdown sections
|
|
43
43
|
sections = []
|
|
44
44
|
current_section = None
|
|
45
45
|
current_start = 1
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
for i, line in enumerate(lines, 1):
|
|
48
48
|
# Check for markdown headings (# or ##)
|
|
49
|
-
if re.match(r
|
|
49
|
+
if re.match(r"^#+\s+", line.strip()):
|
|
50
50
|
# Close previous section
|
|
51
51
|
if current_section is not None:
|
|
52
|
-
current_section[
|
|
53
|
-
current_section[
|
|
52
|
+
current_section["end_line"] = i - 1
|
|
53
|
+
current_section["line_count"] = (
|
|
54
|
+
current_section["end_line"] - current_section["start_line"] + 1
|
|
55
|
+
)
|
|
54
56
|
sections.append(current_section)
|
|
55
|
-
|
|
57
|
+
|
|
56
58
|
# Start new section
|
|
57
59
|
heading_level = len(line.lstrip().split()[0]) # Count # characters
|
|
58
|
-
title = re.sub(r
|
|
60
|
+
title = re.sub(r"^#+\s+", "", line.strip())
|
|
59
61
|
current_section = {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
"title": title,
|
|
63
|
+
"heading_level": heading_level,
|
|
64
|
+
"start_line": i,
|
|
65
|
+
"end_line": None,
|
|
66
|
+
"line_count": 0,
|
|
65
67
|
}
|
|
66
68
|
current_start = i
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
# Close final section
|
|
69
71
|
if current_section is not None:
|
|
70
|
-
current_section[
|
|
71
|
-
current_section[
|
|
72
|
+
current_section["end_line"] = total_lines
|
|
73
|
+
current_section["line_count"] = (
|
|
74
|
+
current_section["end_line"] - current_section["start_line"] + 1
|
|
75
|
+
)
|
|
72
76
|
sections.append(current_section)
|
|
73
|
-
|
|
77
|
+
|
|
74
78
|
# If no sections found, treat entire file as one section
|
|
75
79
|
if not sections:
|
|
76
|
-
sections = [
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
sections = [
|
|
81
|
+
{
|
|
82
|
+
"title": "Full Document",
|
|
83
|
+
"heading_level": 1,
|
|
84
|
+
"start_line": 1,
|
|
85
|
+
"end_line": total_lines,
|
|
86
|
+
"line_count": total_lines,
|
|
87
|
+
}
|
|
88
|
+
]
|
|
89
|
+
|
|
84
90
|
# Estimate tokens (rough approximation: 1 line ≈ 4 tokens)
|
|
85
91
|
estimated_tokens = total_lines * 4
|
|
86
|
-
|
|
92
|
+
|
|
87
93
|
# Suggest splits if file is large
|
|
88
94
|
suggested_splits = []
|
|
89
95
|
if total_lines > 500:
|
|
90
96
|
# Suggest section-based splits
|
|
91
97
|
suggested_splits = _suggest_section_splits(sections, max_lines=500)
|
|
92
|
-
|
|
98
|
+
|
|
93
99
|
return {
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
100
|
+
"total_lines": total_lines,
|
|
101
|
+
"sections": sections,
|
|
102
|
+
"estimated_tokens": estimated_tokens,
|
|
103
|
+
"suggested_splits": suggested_splits,
|
|
98
104
|
}
|
|
99
105
|
|
|
100
106
|
|
|
101
107
|
def split_by_sections(content: str, max_lines: int = 500) -> List[Dict[str, Any]]:
|
|
102
108
|
"""Split file by markdown sections.
|
|
103
|
-
|
|
109
|
+
|
|
104
110
|
Args:
|
|
105
111
|
content: The markdown content to split
|
|
106
112
|
max_lines: Maximum lines per split
|
|
107
|
-
|
|
113
|
+
|
|
108
114
|
Returns:
|
|
109
115
|
List of split dictionaries with metadata
|
|
110
116
|
"""
|
|
111
|
-
lines = content.split(
|
|
117
|
+
lines = content.split("\n")
|
|
112
118
|
total_lines = len(lines)
|
|
113
|
-
|
|
119
|
+
|
|
114
120
|
if total_lines <= max_lines:
|
|
115
|
-
return [
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
return [
|
|
122
|
+
{
|
|
123
|
+
"content": content,
|
|
124
|
+
"title": "Full Document",
|
|
125
|
+
"start_line": 1,
|
|
126
|
+
"end_line": total_lines,
|
|
127
|
+
"line_count": total_lines,
|
|
128
|
+
}
|
|
129
|
+
]
|
|
130
|
+
|
|
123
131
|
splits = []
|
|
124
132
|
current_split_lines = []
|
|
125
133
|
current_start = 1
|
|
126
134
|
current_title = "Part"
|
|
127
135
|
split_count = 1
|
|
128
|
-
|
|
136
|
+
|
|
129
137
|
for i, line in enumerate(lines, 1):
|
|
130
138
|
current_split_lines.append(line)
|
|
131
|
-
|
|
139
|
+
|
|
132
140
|
# Check if we hit a section boundary and are near limit
|
|
133
|
-
is_section_boundary = re.match(r
|
|
141
|
+
is_section_boundary = re.match(r"^#+\s+", line.strip())
|
|
134
142
|
approaching_limit = len(current_split_lines) >= max_lines * 0.8
|
|
135
|
-
|
|
136
|
-
if len(current_split_lines) >= max_lines or (
|
|
143
|
+
|
|
144
|
+
if len(current_split_lines) >= max_lines or (
|
|
145
|
+
is_section_boundary and approaching_limit
|
|
146
|
+
):
|
|
137
147
|
# Create split
|
|
138
|
-
split_content =
|
|
139
|
-
splits.append(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
148
|
+
split_content = "\n".join(current_split_lines)
|
|
149
|
+
splits.append(
|
|
150
|
+
{
|
|
151
|
+
"content": split_content,
|
|
152
|
+
"title": f"Part {split_count}",
|
|
153
|
+
"start_line": current_start,
|
|
154
|
+
"end_line": i,
|
|
155
|
+
"line_count": len(current_split_lines),
|
|
156
|
+
}
|
|
157
|
+
)
|
|
158
|
+
|
|
147
159
|
# Reset for next split
|
|
148
160
|
current_split_lines = []
|
|
149
161
|
current_start = i + 1
|
|
150
162
|
split_count += 1
|
|
151
|
-
|
|
163
|
+
|
|
152
164
|
# Handle remaining lines
|
|
153
165
|
if current_split_lines:
|
|
154
|
-
split_content =
|
|
155
|
-
splits.append(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
166
|
+
split_content = "\n".join(current_split_lines)
|
|
167
|
+
splits.append(
|
|
168
|
+
{
|
|
169
|
+
"content": split_content,
|
|
170
|
+
"title": f"Part {split_count}",
|
|
171
|
+
"start_line": current_start,
|
|
172
|
+
"end_line": total_lines,
|
|
173
|
+
"line_count": len(current_split_lines),
|
|
174
|
+
}
|
|
175
|
+
)
|
|
176
|
+
|
|
163
177
|
return splits
|
|
164
178
|
|
|
165
179
|
|
|
166
180
|
def split_by_phases(content: str) -> List[Dict[str, Any]]:
|
|
167
181
|
"""Split file by implementation phases.
|
|
168
|
-
|
|
182
|
+
|
|
169
183
|
Args:
|
|
170
184
|
content: The markdown content to split
|
|
171
|
-
|
|
185
|
+
|
|
172
186
|
Returns:
|
|
173
187
|
List of split dictionaries, one per phase
|
|
174
188
|
"""
|
|
175
|
-
lines = content.split(
|
|
189
|
+
lines = content.split("\n")
|
|
176
190
|
splits = []
|
|
177
191
|
current_split_lines = []
|
|
178
192
|
current_phase = None
|
|
179
193
|
current_start = 1
|
|
180
|
-
|
|
194
|
+
|
|
181
195
|
for i, line in enumerate(lines, 1):
|
|
182
196
|
# Check for phase markers
|
|
183
|
-
phase_match = re.search(r
|
|
184
|
-
|
|
197
|
+
phase_match = re.search(r"#+\s+Phase\s+(\d+)", line, re.IGNORECASE)
|
|
198
|
+
|
|
185
199
|
if phase_match:
|
|
186
200
|
# Close previous split
|
|
187
201
|
if current_split_lines:
|
|
188
|
-
split_content =
|
|
202
|
+
split_content = "\n".join(current_split_lines)
|
|
189
203
|
title = f"Phase {current_phase}" if current_phase else "Overview"
|
|
190
|
-
splits.append(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
204
|
+
splits.append(
|
|
205
|
+
{
|
|
206
|
+
"content": split_content,
|
|
207
|
+
"title": title,
|
|
208
|
+
"phase_number": current_phase,
|
|
209
|
+
"start_line": current_start,
|
|
210
|
+
"end_line": i - 1,
|
|
211
|
+
"line_count": len(current_split_lines),
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
|
|
199
215
|
# Start new split
|
|
200
216
|
current_phase = int(phase_match.group(1))
|
|
201
217
|
current_split_lines = [line]
|
|
202
218
|
current_start = i
|
|
203
219
|
else:
|
|
204
220
|
current_split_lines.append(line)
|
|
205
|
-
|
|
221
|
+
|
|
206
222
|
# Handle final split
|
|
207
223
|
if current_split_lines:
|
|
208
|
-
split_content =
|
|
224
|
+
split_content = "\n".join(current_split_lines)
|
|
209
225
|
title = f"Phase {current_phase}" if current_phase else "Full Document"
|
|
210
|
-
phase_info = {
|
|
211
|
-
splits.append(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
226
|
+
phase_info = {"phase_number": current_phase} if current_phase else {}
|
|
227
|
+
splits.append(
|
|
228
|
+
{
|
|
229
|
+
"content": split_content,
|
|
230
|
+
"title": title,
|
|
231
|
+
"start_line": current_start,
|
|
232
|
+
"end_line": len(lines),
|
|
233
|
+
"line_count": len(current_split_lines),
|
|
234
|
+
**phase_info,
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
|
|
220
238
|
# If no phases found, return entire content
|
|
221
239
|
if not splits:
|
|
222
|
-
splits = [
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
240
|
+
splits = [
|
|
241
|
+
{
|
|
242
|
+
"content": content,
|
|
243
|
+
"title": "Full Document",
|
|
244
|
+
"start_line": 1,
|
|
245
|
+
"end_line": len(lines),
|
|
246
|
+
"line_count": len(lines),
|
|
247
|
+
}
|
|
248
|
+
]
|
|
249
|
+
|
|
230
250
|
return splits
|
|
231
251
|
|
|
232
252
|
|
|
233
253
|
def split_at_lines(content: str, line_numbers: List[int]) -> List[Dict[str, Any]]:
|
|
234
254
|
"""Split at specified line numbers.
|
|
235
|
-
|
|
255
|
+
|
|
236
256
|
Args:
|
|
237
257
|
content: The content to split
|
|
238
258
|
line_numbers: Line numbers where splits should occur
|
|
239
|
-
|
|
259
|
+
|
|
240
260
|
Returns:
|
|
241
261
|
List of split dictionaries
|
|
242
262
|
"""
|
|
243
|
-
lines = content.split(
|
|
263
|
+
lines = content.split("\n")
|
|
244
264
|
total_lines = len(lines)
|
|
245
|
-
|
|
265
|
+
|
|
246
266
|
if not line_numbers:
|
|
247
|
-
return [
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
267
|
+
return [
|
|
268
|
+
{
|
|
269
|
+
"content": content,
|
|
270
|
+
"title": "Full Document",
|
|
271
|
+
"start_line": 1,
|
|
272
|
+
"end_line": total_lines,
|
|
273
|
+
"line_count": total_lines,
|
|
274
|
+
}
|
|
275
|
+
]
|
|
276
|
+
|
|
255
277
|
# Sort and deduplicate line numbers
|
|
256
278
|
split_points = sorted(set(line_numbers))
|
|
257
|
-
|
|
279
|
+
|
|
258
280
|
splits = []
|
|
259
281
|
current_start = 1
|
|
260
|
-
|
|
282
|
+
|
|
261
283
|
for split_line in split_points:
|
|
262
284
|
if split_line >= total_lines:
|
|
263
285
|
continue
|
|
264
|
-
|
|
286
|
+
|
|
265
287
|
# Create split from current_start to split_line
|
|
266
|
-
split_lines = lines[current_start - 1:split_line]
|
|
267
|
-
split_content =
|
|
268
|
-
|
|
269
|
-
splits.append(
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
288
|
+
split_lines = lines[current_start - 1 : split_line]
|
|
289
|
+
split_content = "\n".join(split_lines)
|
|
290
|
+
|
|
291
|
+
splits.append(
|
|
292
|
+
{
|
|
293
|
+
"content": split_content,
|
|
294
|
+
"title": f"Lines {current_start}-{split_line}",
|
|
295
|
+
"start_line": current_start,
|
|
296
|
+
"end_line": split_line,
|
|
297
|
+
"line_count": len(split_lines),
|
|
298
|
+
}
|
|
299
|
+
)
|
|
300
|
+
|
|
277
301
|
current_start = split_line + 1
|
|
278
|
-
|
|
302
|
+
|
|
279
303
|
# Handle remaining lines after final split
|
|
280
304
|
if current_start <= total_lines:
|
|
281
|
-
remaining_lines = lines[current_start - 1:]
|
|
282
|
-
split_content =
|
|
283
|
-
|
|
284
|
-
splits.append(
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
305
|
+
remaining_lines = lines[current_start - 1 :]
|
|
306
|
+
split_content = "\n".join(remaining_lines)
|
|
307
|
+
|
|
308
|
+
splits.append(
|
|
309
|
+
{
|
|
310
|
+
"content": split_content,
|
|
311
|
+
"title": f"Lines {current_start}-{total_lines}",
|
|
312
|
+
"start_line": current_start,
|
|
313
|
+
"end_line": total_lines,
|
|
314
|
+
"line_count": len(remaining_lines),
|
|
315
|
+
}
|
|
316
|
+
)
|
|
317
|
+
|
|
292
318
|
return splits
|
|
293
319
|
|
|
294
320
|
|
|
295
|
-
def generate_split_files(
|
|
321
|
+
def generate_split_files(
|
|
322
|
+
original: str, splits: List[Dict[str, Any]], output_dir: str
|
|
323
|
+
) -> List[str]:
|
|
296
324
|
"""Generate split files with metadata and cross-references.
|
|
297
|
-
|
|
325
|
+
|
|
298
326
|
Args:
|
|
299
327
|
original: Original filename
|
|
300
328
|
splits: List of split dictionaries
|
|
301
329
|
output_dir: Directory to write split files
|
|
302
|
-
|
|
330
|
+
|
|
303
331
|
Returns:
|
|
304
332
|
List of created file paths
|
|
305
333
|
"""
|
|
306
334
|
os.makedirs(output_dir, exist_ok=True)
|
|
307
|
-
|
|
335
|
+
|
|
308
336
|
created_files = []
|
|
309
337
|
original_name = Path(original).stem
|
|
310
338
|
original_ext = Path(original).suffix
|
|
311
|
-
|
|
339
|
+
|
|
312
340
|
for i, split in enumerate(splits, 1):
|
|
313
341
|
# Generate filename
|
|
314
342
|
filename = f"{original_name}-part{i}{original_ext}"
|
|
315
343
|
file_path = os.path.join(output_dir, filename)
|
|
316
|
-
|
|
344
|
+
|
|
317
345
|
# Create content with metadata header
|
|
318
346
|
metadata_header = f"""<!-- Split from {original} -->
|
|
319
347
|
<!-- Part {i} of {len(splits)} -->
|
|
320
348
|
<!-- Lines {split['start_line']}-{split['end_line']} ({split['line_count']} lines) -->
|
|
321
349
|
|
|
322
350
|
"""
|
|
323
|
-
|
|
324
|
-
full_content = metadata_header + split[
|
|
325
|
-
|
|
351
|
+
|
|
352
|
+
full_content = metadata_header + split["content"]
|
|
353
|
+
|
|
326
354
|
# Write file
|
|
327
|
-
with open(file_path,
|
|
355
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
328
356
|
f.write(full_content)
|
|
329
|
-
|
|
357
|
+
|
|
330
358
|
created_files.append(file_path)
|
|
331
|
-
|
|
359
|
+
|
|
332
360
|
return created_files
|
|
333
361
|
|
|
334
362
|
|
|
335
|
-
def _suggest_section_splits(
|
|
363
|
+
def _suggest_section_splits(
|
|
364
|
+
sections: List[Dict[str, Any]], max_lines: int = 500
|
|
365
|
+
) -> List[Dict[str, Any]]:
|
|
336
366
|
"""Suggest optimal split points based on sections.
|
|
337
|
-
|
|
367
|
+
|
|
338
368
|
Args:
|
|
339
369
|
sections: List of section metadata
|
|
340
370
|
max_lines: Maximum lines per split
|
|
341
|
-
|
|
371
|
+
|
|
342
372
|
Returns:
|
|
343
373
|
List of suggested split configurations
|
|
344
374
|
"""
|
|
345
375
|
suggestions = []
|
|
346
376
|
current_chunk_lines = 0
|
|
347
377
|
current_chunk_sections = []
|
|
348
|
-
|
|
378
|
+
|
|
349
379
|
for section in sections:
|
|
350
|
-
section_lines = section[
|
|
351
|
-
|
|
380
|
+
section_lines = section["line_count"]
|
|
381
|
+
|
|
352
382
|
# If adding this section would exceed limit, finish current chunk
|
|
353
383
|
if current_chunk_lines + section_lines > max_lines and current_chunk_sections:
|
|
354
|
-
suggestions.append(
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
384
|
+
suggestions.append(
|
|
385
|
+
{
|
|
386
|
+
"sections": current_chunk_sections.copy(),
|
|
387
|
+
"total_lines": current_chunk_lines,
|
|
388
|
+
"start_line": current_chunk_sections[0]["start_line"],
|
|
389
|
+
"end_line": current_chunk_sections[-1]["end_line"],
|
|
390
|
+
}
|
|
391
|
+
)
|
|
392
|
+
|
|
361
393
|
# Start new chunk
|
|
362
394
|
current_chunk_sections = [section]
|
|
363
395
|
current_chunk_lines = section_lines
|
|
@@ -365,14 +397,16 @@ def _suggest_section_splits(sections: List[Dict[str, Any]], max_lines: int = 500
|
|
|
365
397
|
# Add section to current chunk
|
|
366
398
|
current_chunk_sections.append(section)
|
|
367
399
|
current_chunk_lines += section_lines
|
|
368
|
-
|
|
400
|
+
|
|
369
401
|
# Add final chunk
|
|
370
402
|
if current_chunk_sections:
|
|
371
|
-
suggestions.append(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
403
|
+
suggestions.append(
|
|
404
|
+
{
|
|
405
|
+
"sections": current_chunk_sections,
|
|
406
|
+
"total_lines": current_chunk_lines,
|
|
407
|
+
"start_line": current_chunk_sections[0]["start_line"],
|
|
408
|
+
"end_line": current_chunk_sections[-1]["end_line"],
|
|
409
|
+
}
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
return suggestions
|
|
@@ -47,7 +47,9 @@ def validate_evaluation_output(
|
|
|
47
47
|
"concerns",
|
|
48
48
|
]
|
|
49
49
|
|
|
50
|
-
has_evaluation_content = any(
|
|
50
|
+
has_evaluation_content = any(
|
|
51
|
+
marker in content_lower for marker in evaluation_markers
|
|
52
|
+
)
|
|
51
53
|
if not has_evaluation_content:
|
|
52
54
|
return (
|
|
53
55
|
False,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
55
55
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
56
56
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
57
57
|
|
|
58
|
-
## What's New in v0.6.
|
|
58
|
+
## What's New in v0.6.3
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
### Upgrade
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install --upgrade adversarial-workflow
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### v0.6.3 - Configurable Timeouts
|
|
67
|
+
|
|
68
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
69
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
70
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
71
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
72
|
+
|
|
73
|
+
### v0.6.2 - .env Loading & Stability
|
|
74
|
+
|
|
75
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
76
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
77
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
78
|
+
|
|
79
|
+
### v0.6.0 - Plugin Architecture
|
|
80
|
+
|
|
81
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
61
82
|
|
|
62
83
|
```bash
|
|
63
84
|
# Create a custom evaluator
|
|
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
459
480
|
| `aliases` | No | Alternative command names |
|
|
460
481
|
| `log_prefix` | No | CLI output prefix |
|
|
461
482
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
483
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
462
484
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
463
485
|
|
|
464
486
|
### Listing Available Evaluators
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
adversarial_workflow/__init__.py,sha256=
|
|
2
|
-
adversarial_workflow/__main__.py,sha256=
|
|
3
|
-
adversarial_workflow/cli.py,sha256=
|
|
4
|
-
adversarial_workflow/evaluators/__init__.py,sha256=
|
|
1
|
+
adversarial_workflow/__init__.py,sha256=moTEp6nKU5F4B1YnJaSBmwhptkDP0ST5n--2hak9PRc,596
|
|
2
|
+
adversarial_workflow/__main__.py,sha256=iM2jmO5YCFpGxfWiEhIYi_SsxVa0hRIE-MB7J0EcN7Y,120
|
|
3
|
+
adversarial_workflow/cli.py,sha256=FxjoC3KVUiwbOF7mWNPe6Zrk82fQcqyE8SPi5bo3ntI,111802
|
|
4
|
+
adversarial_workflow/evaluators/__init__.py,sha256=A9ZKUmjSMfyvEu6jDzYAFLxfkt_OQ4RGA10Bv_eO2i4,1267
|
|
5
5
|
adversarial_workflow/evaluators/builtins.py,sha256=u5LokYLe8ruEW2tunhOQaNSkpcZ9Ee2IeTkaC0dZDSY,1102
|
|
6
|
-
adversarial_workflow/evaluators/config.py,sha256=
|
|
7
|
-
adversarial_workflow/evaluators/discovery.py,sha256=
|
|
8
|
-
adversarial_workflow/evaluators/runner.py,sha256=
|
|
6
|
+
adversarial_workflow/evaluators/config.py,sha256=H_4vkto07rAqnz0qEYdzN_DH6WbvRPMIEdkEOFE58UI,1651
|
|
7
|
+
adversarial_workflow/evaluators/discovery.py,sha256=dPQ0dDy9anYjzLnG-V9gVrLkCVAVZ2tEE9dyFWqSvJc,8079
|
|
8
|
+
adversarial_workflow/evaluators/runner.py,sha256=JPVeigjGF2fRDVJLcGyDEuy9pCIp-LjmVAZyucMbdCU,9310
|
|
9
9
|
adversarial_workflow/templates/.aider.conf.yml.template,sha256=jT2jWIgsnmS3HLhoQWMTO3GV07bUcsT2keYw60jqiDw,183
|
|
10
10
|
adversarial_workflow/templates/.env.example.template,sha256=TmTlcgz44uZqIbqgXqdfHMl-0vVn96F_EGNohClFkb8,1821
|
|
11
11
|
adversarial_workflow/templates/README.template,sha256=FQAMPO99eIt_kgQfwhGHcrK736rm_MEvWSbPnqBSjAE,1349
|
|
@@ -23,11 +23,11 @@ adversarial_workflow/templates/agent-context/current-state.json.template,sha256=
|
|
|
23
23
|
adversarial_workflow/utils/__init__.py,sha256=Pnm-a_jqoMVOxHdvVWXeVrL0IKI-zkY7EAdbQmZAkSI,352
|
|
24
24
|
adversarial_workflow/utils/colors.py,sha256=uRrG6KfIDBLo0F5_vPwms9NCm9-x8YXBiyZ4naCr868,160
|
|
25
25
|
adversarial_workflow/utils/config.py,sha256=NBoC_-YYukEVo6BgpX2cDyeqV-3tnn_sHNU9L1AuSLQ,1341
|
|
26
|
-
adversarial_workflow/utils/file_splitter.py,sha256
|
|
27
|
-
adversarial_workflow/utils/validation.py,sha256=
|
|
28
|
-
adversarial_workflow-0.6.
|
|
29
|
-
adversarial_workflow-0.6.
|
|
30
|
-
adversarial_workflow-0.6.
|
|
31
|
-
adversarial_workflow-0.6.
|
|
32
|
-
adversarial_workflow-0.6.
|
|
33
|
-
adversarial_workflow-0.6.
|
|
26
|
+
adversarial_workflow/utils/file_splitter.py,sha256=-zSWgAZ71DfX6dBu15Y4M84NBbJzq-0ENktbBEp9zvQ,12409
|
|
27
|
+
adversarial_workflow/utils/validation.py,sha256=ZiJxtm03kJXicfFTt0QZwpc9V_D8PkDOVYrJEDsafQI,2202
|
|
28
|
+
adversarial_workflow-0.6.3.dist-info/licenses/LICENSE,sha256=M-dOQlre-NmicyPa55hYOJUW8roGpCKEgtq-z0z1KCA,1073
|
|
29
|
+
adversarial_workflow-0.6.3.dist-info/METADATA,sha256=4dfW8_CURJEoooPFtdqtYu-R-BVj-SCC-AXg_teHklg,30835
|
|
30
|
+
adversarial_workflow-0.6.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
31
|
+
adversarial_workflow-0.6.3.dist-info/entry_points.txt,sha256=9H-iZ-yF1uKZ8P0G1suc6kWR0NvK7uPZJbhN7nvt1sE,62
|
|
32
|
+
adversarial_workflow-0.6.3.dist-info/top_level.txt,sha256=8irutNxLRjUbTlzfAibIpz7_ovkkF2h8ES69NQpv24c,21
|
|
33
|
+
adversarial_workflow-0.6.3.dist-info/RECORD,,
|
{adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{adversarial_workflow-0.6.1.dist-info → adversarial_workflow-0.6.3.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|