adversarial-workflow 0.6.1__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/PKG-INFO +25 -3
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/README.md +24 -2
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__init__.py +1 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/__main__.py +1 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/cli.py +129 -65
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/__init__.py +3 -2
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py +2 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/discovery.py +39 -4
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py +16 -8
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/file_splitter.py +218 -184
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/validation.py +3 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/PKG-INFO +25 -3
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/SOURCES.txt +2 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/pyproject.toml +3 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_cli.py +24 -69
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_cli_dynamic_commands.py +154 -200
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_config.py +55 -44
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_env_loading.py +51 -89
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluate.py +188 -129
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_discovery.py +206 -1
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_runner.py +18 -5
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_file_splitter.py +106 -103
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_list_evaluators.py +28 -45
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_python_version.py +17 -16
- adversarial_workflow-0.6.3/tests/test_scripts_project.py +120 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_split_command.py +45 -37
- adversarial_workflow-0.6.3/tests/test_timeout_integration.py +406 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_utils_validation.py +26 -10
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/LICENSE +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/builtins.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.env.example.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/README.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/config.yml.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/example-task.md.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/__init__.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/colors.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/config.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/entry_points.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/requires.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/top_level.txt +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/setup.cfg +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/setup.py +0 -0
- {adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/tests/test_evaluator_config.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
55
55
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
56
56
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
57
57
|
|
|
58
|
-
## What's New in v0.6.
|
|
58
|
+
## What's New in v0.6.3
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
### Upgrade
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install --upgrade adversarial-workflow
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### v0.6.3 - Configurable Timeouts
|
|
67
|
+
|
|
68
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
69
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
70
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
71
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
72
|
+
|
|
73
|
+
### v0.6.2 - .env Loading & Stability
|
|
74
|
+
|
|
75
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
76
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
77
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
78
|
+
|
|
79
|
+
### v0.6.0 - Plugin Architecture
|
|
80
|
+
|
|
81
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
61
82
|
|
|
62
83
|
```bash
|
|
63
84
|
# Create a custom evaluator
|
|
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
459
480
|
| `aliases` | No | Alternative command names |
|
|
460
481
|
| `log_prefix` | No | CLI output prefix |
|
|
461
482
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
483
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
462
484
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
463
485
|
|
|
464
486
|
### Listing Available Evaluators
|
|
@@ -20,9 +20,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
20
20
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
21
21
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
22
22
|
|
|
23
|
-
## What's New in v0.6.
|
|
23
|
+
## What's New in v0.6.3
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
### Upgrade
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install --upgrade adversarial-workflow
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### v0.6.3 - Configurable Timeouts
|
|
32
|
+
|
|
33
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
34
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
35
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
36
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
37
|
+
|
|
38
|
+
### v0.6.2 - .env Loading & Stability
|
|
39
|
+
|
|
40
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
41
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
42
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
43
|
+
|
|
44
|
+
### v0.6.0 - Plugin Architecture
|
|
45
|
+
|
|
46
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
26
47
|
|
|
27
48
|
```bash
|
|
28
49
|
# Create a custom evaluator
|
|
@@ -424,6 +445,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
424
445
|
| `aliases` | No | Alternative command names |
|
|
425
446
|
| `log_prefix` | No | CLI output prefix |
|
|
426
447
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
448
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
427
449
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
428
450
|
|
|
429
451
|
### Listing Available Evaluators
|
|
@@ -27,9 +27,9 @@ from pathlib import Path
|
|
|
27
27
|
from typing import Dict, List, Optional, Tuple
|
|
28
28
|
|
|
29
29
|
import yaml
|
|
30
|
-
from dotenv import
|
|
30
|
+
from dotenv import dotenv_values, load_dotenv
|
|
31
31
|
|
|
32
|
-
__version__ = "0.6.
|
|
32
|
+
__version__ = "0.6.2"
|
|
33
33
|
|
|
34
34
|
# ANSI color codes for better output
|
|
35
35
|
RESET = "\033[0m"
|
|
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
|
|
|
322
322
|
f"{GREEN}✅ Setup Complete!{RESET}",
|
|
323
323
|
[
|
|
324
324
|
"Created:",
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
325
|
+
(
|
|
326
|
+
" ✓ .env (with your API keys - added to .gitignore)"
|
|
327
|
+
if (anthropic_key or openai_key)
|
|
328
|
+
else " ⚠️ .env (skipped - no API keys provided)"
|
|
329
|
+
),
|
|
328
330
|
" ✓ .adversarial/config.yml",
|
|
329
331
|
" ✓ .adversarial/scripts/ (3 workflow scripts)",
|
|
330
332
|
" ✓ .aider.conf.yml (aider configuration)",
|
|
331
333
|
"",
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
334
|
+
(
|
|
335
|
+
"Your configuration:"
|
|
336
|
+
if (anthropic_key or openai_key)
|
|
337
|
+
else "Configuration (no API keys yet):"
|
|
338
|
+
),
|
|
335
339
|
f" Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
|
|
336
340
|
f" Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
|
|
337
341
|
f" Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
|
|
@@ -806,15 +810,14 @@ def check() -> int:
|
|
|
806
810
|
|
|
807
811
|
if env_file.exists():
|
|
808
812
|
try:
|
|
809
|
-
#
|
|
810
|
-
load_dotenv(env_file)
|
|
811
|
-
# Use dotenv_values() to count variables directly from file
|
|
812
|
-
# This gives accurate count regardless of what was already in environment
|
|
813
|
+
# Count variables by reading file directly (works even if already loaded)
|
|
813
814
|
env_vars = dotenv_values(env_file)
|
|
815
|
+
var_count = len([k for k, v in env_vars.items() if v is not None])
|
|
816
|
+
|
|
817
|
+
# Still load to ensure environment is set
|
|
818
|
+
load_dotenv(env_file)
|
|
814
819
|
env_loaded = True
|
|
815
|
-
good_checks.append(
|
|
816
|
-
f".env file found ({len(env_vars)} variables configured)"
|
|
817
|
-
)
|
|
820
|
+
good_checks.append(f".env file found and loaded ({var_count} variables)")
|
|
818
821
|
except (FileNotFoundError, PermissionError) as e:
|
|
819
822
|
# File access errors
|
|
820
823
|
issues.append(
|
|
@@ -2097,10 +2100,6 @@ def evaluate(task_file: str) -> int:
|
|
|
2097
2100
|
return 0
|
|
2098
2101
|
|
|
2099
2102
|
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
2103
|
def review() -> int:
|
|
2105
2104
|
"""Run Phase 3: Code review."""
|
|
2106
2105
|
|
|
@@ -2289,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
|
|
|
2289
2288
|
)
|
|
2290
2289
|
return None
|
|
2291
2290
|
else:
|
|
2292
|
-
print(
|
|
2291
|
+
print(
|
|
2292
|
+
f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
|
|
2293
|
+
)
|
|
2293
2294
|
return None
|
|
2294
2295
|
|
|
2295
2296
|
elif template_type == "custom" and url:
|
|
@@ -2739,54 +2740,61 @@ def agent_onboard(project_path: str = ".") -> int:
|
|
|
2739
2740
|
return 0
|
|
2740
2741
|
|
|
2741
2742
|
|
|
2742
|
-
def split(
|
|
2743
|
+
def split(
|
|
2744
|
+
task_file: str,
|
|
2745
|
+
strategy: str = "sections",
|
|
2746
|
+
max_lines: int = 500,
|
|
2747
|
+
dry_run: bool = False,
|
|
2748
|
+
):
|
|
2743
2749
|
"""Split large task files into smaller evaluable chunks.
|
|
2744
|
-
|
|
2750
|
+
|
|
2745
2751
|
Args:
|
|
2746
2752
|
task_file: Path to the task file to split
|
|
2747
2753
|
strategy: Split strategy ('sections', 'phases', or 'manual')
|
|
2748
2754
|
max_lines: Maximum lines per split (default: 500)
|
|
2749
2755
|
dry_run: Preview splits without creating files
|
|
2750
|
-
|
|
2756
|
+
|
|
2751
2757
|
Returns:
|
|
2752
2758
|
Exit code (0 for success, 1 for error)
|
|
2753
2759
|
"""
|
|
2754
2760
|
from .utils.file_splitter import (
|
|
2755
|
-
analyze_task_file,
|
|
2756
|
-
|
|
2757
|
-
split_by_phases,
|
|
2758
|
-
|
|
2761
|
+
analyze_task_file,
|
|
2762
|
+
generate_split_files,
|
|
2763
|
+
split_by_phases,
|
|
2764
|
+
split_by_sections,
|
|
2759
2765
|
)
|
|
2760
|
-
|
|
2766
|
+
|
|
2761
2767
|
try:
|
|
2762
2768
|
print_box("File Splitting Utility", CYAN)
|
|
2763
|
-
|
|
2769
|
+
|
|
2764
2770
|
# Validate file exists
|
|
2765
2771
|
if not os.path.exists(task_file):
|
|
2766
2772
|
print(f"{RED}Error: File not found: {task_file}{RESET}")
|
|
2767
2773
|
return 1
|
|
2768
|
-
|
|
2774
|
+
|
|
2769
2775
|
# Analyze file
|
|
2770
2776
|
print(f"📄 Analyzing task file: {task_file}")
|
|
2771
2777
|
analysis = analyze_task_file(task_file)
|
|
2772
|
-
|
|
2773
|
-
lines = analysis[
|
|
2774
|
-
tokens = analysis[
|
|
2778
|
+
|
|
2779
|
+
lines = analysis["total_lines"]
|
|
2780
|
+
tokens = analysis["estimated_tokens"]
|
|
2775
2781
|
print(f" Lines: {lines}")
|
|
2776
2782
|
print(f" Estimated tokens: ~{tokens:,}")
|
|
2777
|
-
|
|
2783
|
+
|
|
2778
2784
|
# Check if splitting is recommended
|
|
2779
2785
|
if lines <= max_lines:
|
|
2780
|
-
print(
|
|
2786
|
+
print(
|
|
2787
|
+
f"{GREEN}✅ File is under recommended limit ({max_lines} lines){RESET}"
|
|
2788
|
+
)
|
|
2781
2789
|
print("No splitting needed.")
|
|
2782
2790
|
return 0
|
|
2783
|
-
|
|
2791
|
+
|
|
2784
2792
|
print(f"{YELLOW}⚠️ File exceeds recommended limit ({max_lines} lines){RESET}")
|
|
2785
|
-
|
|
2793
|
+
|
|
2786
2794
|
# Read file content for splitting
|
|
2787
|
-
with open(task_file,
|
|
2795
|
+
with open(task_file, "r", encoding="utf-8") as f:
|
|
2788
2796
|
content = f.read()
|
|
2789
|
-
|
|
2797
|
+
|
|
2790
2798
|
# Apply split strategy
|
|
2791
2799
|
if strategy == "sections":
|
|
2792
2800
|
splits = split_by_sections(content, max_lines=max_lines)
|
|
@@ -2795,42 +2803,44 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
|
|
|
2795
2803
|
splits = split_by_phases(content)
|
|
2796
2804
|
print(f"\n💡 Suggested splits (by phases):")
|
|
2797
2805
|
else:
|
|
2798
|
-
print(
|
|
2806
|
+
print(
|
|
2807
|
+
f"{RED}Error: Unknown strategy '{strategy}'. Use 'sections' or 'phases'.{RESET}"
|
|
2808
|
+
)
|
|
2799
2809
|
return 1
|
|
2800
|
-
|
|
2810
|
+
|
|
2801
2811
|
# Display split preview
|
|
2802
2812
|
for i, split in enumerate(splits, 1):
|
|
2803
2813
|
filename = f"{Path(task_file).stem}-part{i}{Path(task_file).suffix}"
|
|
2804
2814
|
print(f" - {filename} ({split['line_count']} lines)")
|
|
2805
|
-
|
|
2815
|
+
|
|
2806
2816
|
# Dry run mode
|
|
2807
2817
|
if dry_run:
|
|
2808
2818
|
print(f"\n{CYAN}📋 Dry run mode - no files created{RESET}")
|
|
2809
2819
|
return 0
|
|
2810
|
-
|
|
2820
|
+
|
|
2811
2821
|
# Prompt user for confirmation
|
|
2812
2822
|
create_files = prompt_user(f"\nCreate {len(splits)} files?", default="n")
|
|
2813
|
-
|
|
2814
|
-
if create_files.lower() in [
|
|
2823
|
+
|
|
2824
|
+
if create_files.lower() in ["y", "yes"]:
|
|
2815
2825
|
# Create output directory
|
|
2816
2826
|
output_dir = os.path.join(os.path.dirname(task_file), "splits")
|
|
2817
|
-
|
|
2827
|
+
|
|
2818
2828
|
# Generate split files
|
|
2819
2829
|
created_files = generate_split_files(task_file, splits, output_dir)
|
|
2820
|
-
|
|
2830
|
+
|
|
2821
2831
|
print(f"{GREEN}✅ Created {len(created_files)} files:{RESET}")
|
|
2822
2832
|
for file_path in created_files:
|
|
2823
2833
|
print(f" {file_path}")
|
|
2824
|
-
|
|
2834
|
+
|
|
2825
2835
|
print(f"\n{CYAN}💡 Tip: Evaluate each split file independently:{RESET}")
|
|
2826
2836
|
for file_path in created_files:
|
|
2827
2837
|
rel_path = os.path.relpath(file_path)
|
|
2828
2838
|
print(f" adversarial evaluate {rel_path}")
|
|
2829
2839
|
else:
|
|
2830
2840
|
print("Cancelled - no files created.")
|
|
2831
|
-
|
|
2841
|
+
|
|
2832
2842
|
return 0
|
|
2833
|
-
|
|
2843
|
+
|
|
2834
2844
|
except Exception as e:
|
|
2835
2845
|
print(f"{RED}Error during file splitting: {e}{RESET}")
|
|
2836
2846
|
return 1
|
|
@@ -2876,6 +2886,7 @@ def list_evaluators() -> int:
|
|
|
2876
2886
|
|
|
2877
2887
|
return 0
|
|
2878
2888
|
|
|
2889
|
+
|
|
2879
2890
|
def main():
|
|
2880
2891
|
"""Main CLI entry point."""
|
|
2881
2892
|
import logging
|
|
@@ -2888,10 +2899,20 @@ def main():
|
|
|
2888
2899
|
except Exception as e:
|
|
2889
2900
|
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2890
2901
|
|
|
2902
|
+
# Load .env file before any commands run
|
|
2903
|
+
# Use explicit path to ensure we find .env in current working directory
|
|
2904
|
+
# (load_dotenv() without args can fail to find .env in some contexts)
|
|
2905
|
+
env_file = Path.cwd() / ".env"
|
|
2906
|
+
if env_file.exists():
|
|
2907
|
+
try:
|
|
2908
|
+
load_dotenv(env_file)
|
|
2909
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
2910
|
+
print(f"Warning: Could not load .env file: {e}", file=sys.stderr)
|
|
2911
|
+
|
|
2891
2912
|
from adversarial_workflow.evaluators import (
|
|
2913
|
+
BUILTIN_EVALUATORS,
|
|
2892
2914
|
get_all_evaluators,
|
|
2893
2915
|
run_evaluator,
|
|
2894
|
-
BUILTIN_EVALUATORS,
|
|
2895
2916
|
)
|
|
2896
2917
|
|
|
2897
2918
|
logger = logging.getLogger(__name__)
|
|
@@ -2899,8 +2920,16 @@ def main():
|
|
|
2899
2920
|
# Commands that cannot be overridden by evaluators
|
|
2900
2921
|
# Note: 'review' is special - it reviews git changes without a file argument
|
|
2901
2922
|
STATIC_COMMANDS = {
|
|
2902
|
-
"init",
|
|
2903
|
-
"
|
|
2923
|
+
"init",
|
|
2924
|
+
"check",
|
|
2925
|
+
"doctor",
|
|
2926
|
+
"health",
|
|
2927
|
+
"quickstart",
|
|
2928
|
+
"agent",
|
|
2929
|
+
"split",
|
|
2930
|
+
"validate",
|
|
2931
|
+
"review",
|
|
2932
|
+
"list-evaluators",
|
|
2904
2933
|
}
|
|
2905
2934
|
|
|
2906
2935
|
parser = argparse.ArgumentParser(
|
|
@@ -2989,16 +3018,21 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
2989
3018
|
)
|
|
2990
3019
|
split_parser.add_argument("task_file", help="Task file to split")
|
|
2991
3020
|
split_parser.add_argument(
|
|
2992
|
-
"--strategy",
|
|
2993
|
-
|
|
3021
|
+
"--strategy",
|
|
3022
|
+
"-s",
|
|
3023
|
+
choices=["sections", "phases"],
|
|
3024
|
+
default="sections",
|
|
3025
|
+
help="Split strategy: 'sections' (default) or 'phases'",
|
|
2994
3026
|
)
|
|
2995
3027
|
split_parser.add_argument(
|
|
2996
|
-
"--max-lines",
|
|
2997
|
-
|
|
3028
|
+
"--max-lines",
|
|
3029
|
+
"-m",
|
|
3030
|
+
type=int,
|
|
3031
|
+
default=500,
|
|
3032
|
+
help="Maximum lines per split (default: 500)",
|
|
2998
3033
|
)
|
|
2999
3034
|
split_parser.add_argument(
|
|
3000
|
-
"--dry-run", action="store_true",
|
|
3001
|
-
help="Preview splits without creating files"
|
|
3035
|
+
"--dry-run", action="store_true", help="Preview splits without creating files"
|
|
3002
3036
|
)
|
|
3003
3037
|
|
|
3004
3038
|
# list-evaluators command
|
|
@@ -3019,7 +3053,12 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3019
3053
|
for name, config in evaluators.items():
|
|
3020
3054
|
# Skip if name conflicts with static command
|
|
3021
3055
|
if name in STATIC_COMMANDS:
|
|
3022
|
-
|
|
3056
|
+
# Only warn for user-defined evaluators, not built-ins
|
|
3057
|
+
# Built-in conflicts are intentional (e.g., 'review' command vs 'review' evaluator)
|
|
3058
|
+
if getattr(config, "source", None) != "builtin":
|
|
3059
|
+
logger.warning(
|
|
3060
|
+
"Evaluator '%s' conflicts with CLI command; skipping", name
|
|
3061
|
+
)
|
|
3023
3062
|
# Mark as registered to prevent alias re-registration attempts
|
|
3024
3063
|
registered_configs.add(id(config))
|
|
3025
3064
|
continue
|
|
@@ -3046,10 +3085,11 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3046
3085
|
)
|
|
3047
3086
|
eval_parser.add_argument("file", help="File to evaluate")
|
|
3048
3087
|
eval_parser.add_argument(
|
|
3049
|
-
"--timeout",
|
|
3088
|
+
"--timeout",
|
|
3089
|
+
"-t",
|
|
3050
3090
|
type=int,
|
|
3051
|
-
default=
|
|
3052
|
-
help="Timeout in seconds (default: 180)"
|
|
3091
|
+
default=None,
|
|
3092
|
+
help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
|
|
3053
3093
|
)
|
|
3054
3094
|
# Store config for later execution
|
|
3055
3095
|
eval_parser.set_defaults(evaluator_config=config)
|
|
@@ -3062,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3062
3102
|
|
|
3063
3103
|
# Check for evaluator command first (has evaluator_config attribute)
|
|
3064
3104
|
if hasattr(args, "evaluator_config"):
|
|
3105
|
+
# Determine timeout: CLI flag > YAML config > default (180s)
|
|
3106
|
+
if args.timeout is not None:
|
|
3107
|
+
timeout = args.timeout
|
|
3108
|
+
source = "CLI override"
|
|
3109
|
+
elif args.evaluator_config.timeout != 180:
|
|
3110
|
+
timeout = args.evaluator_config.timeout
|
|
3111
|
+
source = "evaluator config"
|
|
3112
|
+
else:
|
|
3113
|
+
timeout = args.evaluator_config.timeout # 180 (default)
|
|
3114
|
+
source = "default"
|
|
3115
|
+
|
|
3116
|
+
# Validate CLI timeout (consistent with YAML validation)
|
|
3117
|
+
if timeout <= 0:
|
|
3118
|
+
print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
|
|
3119
|
+
return 1
|
|
3120
|
+
if timeout > 600:
|
|
3121
|
+
print(
|
|
3122
|
+
f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
|
|
3123
|
+
)
|
|
3124
|
+
timeout = 600
|
|
3125
|
+
|
|
3126
|
+
# Log actual timeout and source
|
|
3127
|
+
print(f"Using timeout: {timeout}s ({source})")
|
|
3128
|
+
|
|
3065
3129
|
return run_evaluator(
|
|
3066
3130
|
args.evaluator_config,
|
|
3067
3131
|
args.file,
|
|
3068
|
-
timeout=
|
|
3132
|
+
timeout=timeout,
|
|
3069
3133
|
)
|
|
3070
3134
|
|
|
3071
3135
|
# Execute static commands
|
|
@@ -3097,7 +3161,7 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3097
3161
|
args.task_file,
|
|
3098
3162
|
strategy=args.strategy,
|
|
3099
3163
|
max_lines=args.max_lines,
|
|
3100
|
-
dry_run=args.dry_run
|
|
3164
|
+
dry_run=args.dry_run,
|
|
3101
3165
|
)
|
|
3102
3166
|
elif args.command == "list-evaluators":
|
|
3103
3167
|
return list_evaluators()
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Evaluators module for adversarial-workflow plugin architecture."""
|
|
2
2
|
|
|
3
|
+
from .builtins import BUILTIN_EVALUATORS
|
|
3
4
|
from .config import EvaluatorConfig
|
|
4
5
|
from .discovery import (
|
|
6
|
+
EvaluatorParseError,
|
|
5
7
|
discover_local_evaluators,
|
|
6
8
|
parse_evaluator_yaml,
|
|
7
|
-
EvaluatorParseError,
|
|
8
9
|
)
|
|
9
10
|
from .runner import run_evaluator
|
|
10
|
-
from .builtins import BUILTIN_EVALUATORS
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
@@ -17,6 +17,7 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
|
17
17
|
Aliases from local evaluators are also included in the returned dictionary.
|
|
18
18
|
"""
|
|
19
19
|
import logging
|
|
20
|
+
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
evaluators: dict[str, EvaluatorConfig] = {}
|
{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py
RENAMED
|
@@ -26,6 +26,7 @@ class EvaluatorConfig:
|
|
|
26
26
|
fallback_model: Fallback model if primary fails
|
|
27
27
|
aliases: Alternative command names
|
|
28
28
|
version: Evaluator version
|
|
29
|
+
timeout: Timeout in seconds (default: 180, max: 600)
|
|
29
30
|
source: "builtin" or "local" (set internally)
|
|
30
31
|
config_file: Path to YAML file if local (set internally)
|
|
31
32
|
"""
|
|
@@ -43,6 +44,7 @@ class EvaluatorConfig:
|
|
|
43
44
|
fallback_model: str | None = None
|
|
44
45
|
aliases: list[str] = field(default_factory=list)
|
|
45
46
|
version: str = "1.0.0"
|
|
47
|
+
timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
|
|
46
48
|
|
|
47
49
|
# Metadata (set internally during discovery, not from YAML)
|
|
48
50
|
source: str = "builtin"
|
|
@@ -40,9 +40,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
40
40
|
try:
|
|
41
41
|
content = yml_file.read_text(encoding="utf-8")
|
|
42
42
|
except UnicodeDecodeError as e:
|
|
43
|
-
raise EvaluatorParseError(
|
|
44
|
-
f"File encoding error (not UTF-8): {yml_file}"
|
|
45
|
-
) from e
|
|
43
|
+
raise EvaluatorParseError(f"File encoding error (not UTF-8): {yml_file}") from e
|
|
46
44
|
|
|
47
45
|
# Parse YAML
|
|
48
46
|
data = yaml.safe_load(content)
|
|
@@ -58,7 +56,14 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
58
56
|
)
|
|
59
57
|
|
|
60
58
|
# Validate required fields exist
|
|
61
|
-
required = [
|
|
59
|
+
required = [
|
|
60
|
+
"name",
|
|
61
|
+
"description",
|
|
62
|
+
"model",
|
|
63
|
+
"api_key_env",
|
|
64
|
+
"prompt",
|
|
65
|
+
"output_suffix",
|
|
66
|
+
]
|
|
62
67
|
missing = [f for f in required if f not in data]
|
|
63
68
|
if missing:
|
|
64
69
|
raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
|
|
@@ -117,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
117
122
|
f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
|
|
118
123
|
)
|
|
119
124
|
|
|
125
|
+
# Validate timeout if present
|
|
126
|
+
if "timeout" in data:
|
|
127
|
+
timeout = data["timeout"]
|
|
128
|
+
# Handle null/empty values
|
|
129
|
+
if timeout is None or timeout == "":
|
|
130
|
+
raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
|
|
131
|
+
# Check for bool before int (bool is subclass of int in Python)
|
|
132
|
+
# YAML parses 'yes'/'true' as True, 'no'/'false' as False
|
|
133
|
+
if isinstance(timeout, bool):
|
|
134
|
+
raise EvaluatorParseError(
|
|
135
|
+
f"Field 'timeout' must be an integer, got bool: {timeout!r}"
|
|
136
|
+
)
|
|
137
|
+
if not isinstance(timeout, int):
|
|
138
|
+
raise EvaluatorParseError(
|
|
139
|
+
f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
|
|
140
|
+
)
|
|
141
|
+
# timeout=0 is invalid (does not disable timeout - use a large value instead)
|
|
142
|
+
if timeout <= 0:
|
|
143
|
+
raise EvaluatorParseError(
|
|
144
|
+
f"Field 'timeout' must be positive (> 0), got {timeout}"
|
|
145
|
+
)
|
|
146
|
+
if timeout > 600:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
|
|
149
|
+
timeout,
|
|
150
|
+
yml_file.name,
|
|
151
|
+
)
|
|
152
|
+
data["timeout"] = 600
|
|
153
|
+
|
|
120
154
|
# Filter to known fields only (log unknown fields)
|
|
121
155
|
known_fields = {
|
|
122
156
|
"name",
|
|
@@ -129,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
129
163
|
"fallback_model",
|
|
130
164
|
"aliases",
|
|
131
165
|
"version",
|
|
166
|
+
"timeout",
|
|
132
167
|
}
|
|
133
168
|
unknown = set(data.keys()) - known_fields
|
|
134
169
|
if unknown:
|
{adversarial_workflow-0.6.1 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py
RENAMED
|
@@ -10,10 +10,10 @@ import tempfile
|
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
|
|
13
|
-
from .
|
|
14
|
-
from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
|
|
13
|
+
from ..utils.colors import BOLD, GREEN, RED, RESET, YELLOW
|
|
15
14
|
from ..utils.config import load_config
|
|
16
15
|
from ..utils.validation import validate_evaluation_output
|
|
16
|
+
from .config import EvaluatorConfig
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
|
|
@@ -124,7 +124,7 @@ def _run_custom_evaluator(
|
|
|
124
124
|
"""
|
|
125
125
|
|
|
126
126
|
# Create temp file for prompt
|
|
127
|
-
with tempfile.NamedTemporaryFile(mode=
|
|
127
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
|
|
128
128
|
f.write(full_prompt)
|
|
129
129
|
prompt_file = f.name
|
|
130
130
|
|
|
@@ -136,12 +136,15 @@ def _run_custom_evaluator(
|
|
|
136
136
|
# Build aider command
|
|
137
137
|
cmd = [
|
|
138
138
|
"aider",
|
|
139
|
-
"--model",
|
|
139
|
+
"--model",
|
|
140
|
+
config.model,
|
|
140
141
|
"--yes",
|
|
141
142
|
"--no-git",
|
|
142
143
|
"--no-auto-commits",
|
|
143
|
-
"--message-file",
|
|
144
|
-
|
|
144
|
+
"--message-file",
|
|
145
|
+
prompt_file,
|
|
146
|
+
"--read",
|
|
147
|
+
file_path,
|
|
145
148
|
]
|
|
146
149
|
|
|
147
150
|
result = subprocess.run(
|
|
@@ -224,7 +227,10 @@ def _execute_script(
|
|
|
224
227
|
|
|
225
228
|
# Validate output
|
|
226
229
|
file_basename = Path(file_path).stem
|
|
227
|
-
log_file =
|
|
230
|
+
log_file = (
|
|
231
|
+
Path(project_config["log_directory"])
|
|
232
|
+
/ f"{file_basename}-{config.output_suffix}.md"
|
|
233
|
+
)
|
|
228
234
|
|
|
229
235
|
is_valid, verdict, message = validate_evaluation_output(str(log_file))
|
|
230
236
|
|
|
@@ -235,7 +241,9 @@ def _execute_script(
|
|
|
235
241
|
return _report_verdict(verdict, log_file, config)
|
|
236
242
|
|
|
237
243
|
|
|
238
|
-
def _report_verdict(
|
|
244
|
+
def _report_verdict(
|
|
245
|
+
verdict: str | None, log_file: Path, config: EvaluatorConfig
|
|
246
|
+
) -> int:
|
|
239
247
|
"""Report the evaluation verdict to terminal."""
|
|
240
248
|
print()
|
|
241
249
|
if verdict == "APPROVED":
|