adversarial-workflow 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/PKG-INFO +25 -3
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/README.md +24 -2
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/__main__.py +1 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/cli.py +40 -10
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py +2 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/discovery.py +30 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/PKG-INFO +25 -3
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/SOURCES.txt +1 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/pyproject.toml +1 -1
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_cli_dynamic_commands.py +58 -3
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_config.py +20 -14
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluate.py +32 -21
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_discovery.py +203 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_file_splitter.py +1 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_list_evaluators.py +6 -2
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_python_version.py +1 -0
- adversarial_workflow-0.6.3/tests/test_timeout_integration.py +406 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/LICENSE +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/__init__.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/__init__.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/builtins.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.env.example.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/README.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/config.yml.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/example-task.md.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/__init__.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/colors.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/config.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/file_splitter.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/validation.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/entry_points.txt +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/requires.txt +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/top_level.txt +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/setup.cfg +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/setup.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_cli.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_env_loading.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_config.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_runner.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_scripts_project.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_split_command.py +0 -0
- {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_utils_validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
55
55
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
56
56
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
57
57
|
|
|
58
|
-
## What's New in v0.6.
|
|
58
|
+
## What's New in v0.6.3
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
### Upgrade
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install --upgrade adversarial-workflow
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### v0.6.3 - Configurable Timeouts
|
|
67
|
+
|
|
68
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
69
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
70
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
71
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
72
|
+
|
|
73
|
+
### v0.6.2 - .env Loading & Stability
|
|
74
|
+
|
|
75
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
76
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
77
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
78
|
+
|
|
79
|
+
### v0.6.0 - Plugin Architecture
|
|
80
|
+
|
|
81
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
61
82
|
|
|
62
83
|
```bash
|
|
63
84
|
# Create a custom evaluator
|
|
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
459
480
|
| `aliases` | No | Alternative command names |
|
|
460
481
|
| `log_prefix` | No | CLI output prefix |
|
|
461
482
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
483
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
462
484
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
463
485
|
|
|
464
486
|
### Listing Available Evaluators
|
|
@@ -20,9 +20,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
20
20
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
21
21
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
22
22
|
|
|
23
|
-
## What's New in v0.6.
|
|
23
|
+
## What's New in v0.6.3
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
### Upgrade
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install --upgrade adversarial-workflow
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### v0.6.3 - Configurable Timeouts
|
|
32
|
+
|
|
33
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
34
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
35
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
36
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
37
|
+
|
|
38
|
+
### v0.6.2 - .env Loading & Stability
|
|
39
|
+
|
|
40
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
41
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
42
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
43
|
+
|
|
44
|
+
### v0.6.0 - Plugin Architecture
|
|
45
|
+
|
|
46
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
26
47
|
|
|
27
48
|
```bash
|
|
28
49
|
# Create a custom evaluator
|
|
@@ -424,6 +445,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
424
445
|
| `aliases` | No | Alternative command names |
|
|
425
446
|
| `log_prefix` | No | CLI output prefix |
|
|
426
447
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
448
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
427
449
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
428
450
|
|
|
429
451
|
### Listing Available Evaluators
|
|
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
|
|
|
322
322
|
f"{GREEN}✅ Setup Complete!{RESET}",
|
|
323
323
|
[
|
|
324
324
|
"Created:",
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
325
|
+
(
|
|
326
|
+
" ✓ .env (with your API keys - added to .gitignore)"
|
|
327
|
+
if (anthropic_key or openai_key)
|
|
328
|
+
else " ⚠️ .env (skipped - no API keys provided)"
|
|
329
|
+
),
|
|
328
330
|
" ✓ .adversarial/config.yml",
|
|
329
331
|
" ✓ .adversarial/scripts/ (3 workflow scripts)",
|
|
330
332
|
" ✓ .aider.conf.yml (aider configuration)",
|
|
331
333
|
"",
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
334
|
+
(
|
|
335
|
+
"Your configuration:"
|
|
336
|
+
if (anthropic_key or openai_key)
|
|
337
|
+
else "Configuration (no API keys yet):"
|
|
338
|
+
),
|
|
335
339
|
f" Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
|
|
336
340
|
f" Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
|
|
337
341
|
f" Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
|
|
@@ -2284,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
|
|
|
2284
2288
|
)
|
|
2285
2289
|
return None
|
|
2286
2290
|
else:
|
|
2287
|
-
print(
|
|
2291
|
+
print(
|
|
2292
|
+
f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
|
|
2293
|
+
)
|
|
2288
2294
|
return None
|
|
2289
2295
|
|
|
2290
2296
|
elif template_type == "custom" and url:
|
|
@@ -3082,8 +3088,8 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3082
3088
|
"--timeout",
|
|
3083
3089
|
"-t",
|
|
3084
3090
|
type=int,
|
|
3085
|
-
default=
|
|
3086
|
-
help="Timeout in seconds (default: 180)",
|
|
3091
|
+
default=None,
|
|
3092
|
+
help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
|
|
3087
3093
|
)
|
|
3088
3094
|
# Store config for later execution
|
|
3089
3095
|
eval_parser.set_defaults(evaluator_config=config)
|
|
@@ -3096,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3096
3102
|
|
|
3097
3103
|
# Check for evaluator command first (has evaluator_config attribute)
|
|
3098
3104
|
if hasattr(args, "evaluator_config"):
|
|
3105
|
+
# Determine timeout: CLI flag > YAML config > default (180s)
|
|
3106
|
+
if args.timeout is not None:
|
|
3107
|
+
timeout = args.timeout
|
|
3108
|
+
source = "CLI override"
|
|
3109
|
+
elif args.evaluator_config.timeout != 180:
|
|
3110
|
+
timeout = args.evaluator_config.timeout
|
|
3111
|
+
source = "evaluator config"
|
|
3112
|
+
else:
|
|
3113
|
+
timeout = args.evaluator_config.timeout # 180 (default)
|
|
3114
|
+
source = "default"
|
|
3115
|
+
|
|
3116
|
+
# Validate CLI timeout (consistent with YAML validation)
|
|
3117
|
+
if timeout <= 0:
|
|
3118
|
+
print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
|
|
3119
|
+
return 1
|
|
3120
|
+
if timeout > 600:
|
|
3121
|
+
print(
|
|
3122
|
+
f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
|
|
3123
|
+
)
|
|
3124
|
+
timeout = 600
|
|
3125
|
+
|
|
3126
|
+
# Log actual timeout and source
|
|
3127
|
+
print(f"Using timeout: {timeout}s ({source})")
|
|
3128
|
+
|
|
3099
3129
|
return run_evaluator(
|
|
3100
3130
|
args.evaluator_config,
|
|
3101
3131
|
args.file,
|
|
3102
|
-
timeout=
|
|
3132
|
+
timeout=timeout,
|
|
3103
3133
|
)
|
|
3104
3134
|
|
|
3105
3135
|
# Execute static commands
|
{adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py
RENAMED
|
@@ -26,6 +26,7 @@ class EvaluatorConfig:
|
|
|
26
26
|
fallback_model: Fallback model if primary fails
|
|
27
27
|
aliases: Alternative command names
|
|
28
28
|
version: Evaluator version
|
|
29
|
+
timeout: Timeout in seconds (default: 180, max: 600)
|
|
29
30
|
source: "builtin" or "local" (set internally)
|
|
30
31
|
config_file: Path to YAML file if local (set internally)
|
|
31
32
|
"""
|
|
@@ -43,6 +44,7 @@ class EvaluatorConfig:
|
|
|
43
44
|
fallback_model: str | None = None
|
|
44
45
|
aliases: list[str] = field(default_factory=list)
|
|
45
46
|
version: str = "1.0.0"
|
|
47
|
+
timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
|
|
46
48
|
|
|
47
49
|
# Metadata (set internally during discovery, not from YAML)
|
|
48
50
|
source: str = "builtin"
|
|
@@ -122,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
122
122
|
f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
|
|
123
123
|
)
|
|
124
124
|
|
|
125
|
+
# Validate timeout if present
|
|
126
|
+
if "timeout" in data:
|
|
127
|
+
timeout = data["timeout"]
|
|
128
|
+
# Handle null/empty values
|
|
129
|
+
if timeout is None or timeout == "":
|
|
130
|
+
raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
|
|
131
|
+
# Check for bool before int (bool is subclass of int in Python)
|
|
132
|
+
# YAML parses 'yes'/'true' as True, 'no'/'false' as False
|
|
133
|
+
if isinstance(timeout, bool):
|
|
134
|
+
raise EvaluatorParseError(
|
|
135
|
+
f"Field 'timeout' must be an integer, got bool: {timeout!r}"
|
|
136
|
+
)
|
|
137
|
+
if not isinstance(timeout, int):
|
|
138
|
+
raise EvaluatorParseError(
|
|
139
|
+
f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
|
|
140
|
+
)
|
|
141
|
+
# timeout=0 is invalid (does not disable timeout - use a large value instead)
|
|
142
|
+
if timeout <= 0:
|
|
143
|
+
raise EvaluatorParseError(
|
|
144
|
+
f"Field 'timeout' must be positive (> 0), got {timeout}"
|
|
145
|
+
)
|
|
146
|
+
if timeout > 600:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
|
|
149
|
+
timeout,
|
|
150
|
+
yml_file.name,
|
|
151
|
+
)
|
|
152
|
+
data["timeout"] = 600
|
|
153
|
+
|
|
125
154
|
# Filter to known fields only (log unknown fields)
|
|
126
155
|
known_fields = {
|
|
127
156
|
"name",
|
|
@@ -134,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
|
134
163
|
"fallback_model",
|
|
135
164
|
"aliases",
|
|
136
165
|
"version",
|
|
166
|
+
"timeout",
|
|
137
167
|
}
|
|
138
168
|
unknown = set(data.keys()) - known_fields
|
|
139
169
|
if unknown:
|
{adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
55
55
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
56
56
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
57
57
|
|
|
58
|
-
## What's New in v0.6.
|
|
58
|
+
## What's New in v0.6.3
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
### Upgrade
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install --upgrade adversarial-workflow
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### v0.6.3 - Configurable Timeouts
|
|
67
|
+
|
|
68
|
+
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
69
|
+
- **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
|
|
70
|
+
- **Timeout logging**: See which timeout source is used (CLI/YAML/default)
|
|
71
|
+
- **Safety limits**: Maximum 600 seconds to prevent runaway processes
|
|
72
|
+
|
|
73
|
+
### v0.6.2 - .env Loading & Stability
|
|
74
|
+
|
|
75
|
+
- **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
|
|
76
|
+
- **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
|
|
77
|
+
- **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
|
|
78
|
+
|
|
79
|
+
### v0.6.0 - Plugin Architecture
|
|
80
|
+
|
|
81
|
+
🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
|
|
61
82
|
|
|
62
83
|
```bash
|
|
63
84
|
# Create a custom evaluator
|
|
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
|
|
|
459
480
|
| `aliases` | No | Alternative command names |
|
|
460
481
|
| `log_prefix` | No | CLI output prefix |
|
|
461
482
|
| `fallback_model` | No | Fallback model if primary fails |
|
|
483
|
+
| `timeout` | No | Timeout in seconds (default: 180, max: 600) |
|
|
462
484
|
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
463
485
|
|
|
464
486
|
### Listing Available Evaluators
|
{adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_cli_dynamic_commands.py
RENAMED
|
@@ -351,7 +351,9 @@ class TestBackwardsCompatibility:
|
|
|
351
351
|
class TestGracefulDegradation:
|
|
352
352
|
"""Test graceful degradation on errors."""
|
|
353
353
|
|
|
354
|
-
def test_help_works_without_local_evaluators_dir(
|
|
354
|
+
def test_help_works_without_local_evaluators_dir(
|
|
355
|
+
self, tmp_path, monkeypatch, run_cli
|
|
356
|
+
):
|
|
355
357
|
"""CLI help works even without .adversarial/evaluators/ directory."""
|
|
356
358
|
adv_dir = tmp_path / ".adversarial"
|
|
357
359
|
adv_dir.mkdir(parents=True)
|
|
@@ -420,7 +422,9 @@ class TestReviewCommandBackwardsCompatibility:
|
|
|
420
422
|
# Review should NOT have --timeout flag (that's for evaluators)
|
|
421
423
|
assert "--timeout" not in result.stdout
|
|
422
424
|
|
|
423
|
-
def test_review_command_not_overridden_by_evaluator(
|
|
425
|
+
def test_review_command_not_overridden_by_evaluator(
|
|
426
|
+
self, tmp_path, monkeypatch, run_cli
|
|
427
|
+
):
|
|
424
428
|
"""Review command cannot be overridden by local evaluator."""
|
|
425
429
|
adv_dir = tmp_path / ".adversarial"
|
|
426
430
|
adv_dir.mkdir(parents=True)
|
|
@@ -488,7 +492,9 @@ aliases:
|
|
|
488
492
|
assert "--path" in result_init.stdout
|
|
489
493
|
assert "--interactive" in result_init.stdout
|
|
490
494
|
|
|
491
|
-
def test_evaluator_with_conflicting_name_and_alias(
|
|
495
|
+
def test_evaluator_with_conflicting_name_and_alias(
|
|
496
|
+
self, tmp_path, monkeypatch, run_cli
|
|
497
|
+
):
|
|
492
498
|
"""Evaluator with conflicting name doesn't crash when alias is processed."""
|
|
493
499
|
adv_dir = tmp_path / ".adversarial"
|
|
494
500
|
adv_dir.mkdir(parents=True)
|
|
@@ -518,3 +524,52 @@ aliases:
|
|
|
518
524
|
assert result.returncode == 0
|
|
519
525
|
# 'init' should still be the static command
|
|
520
526
|
assert "init" in result.stdout
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class TestTimeoutConfiguration:
|
|
530
|
+
"""Test timeout configuration from YAML and CLI."""
|
|
531
|
+
|
|
532
|
+
def test_evaluator_config_timeout_in_yaml(self, tmp_path, monkeypatch, run_cli):
|
|
533
|
+
"""Evaluator YAML timeout appears in help text."""
|
|
534
|
+
adv_dir = tmp_path / ".adversarial"
|
|
535
|
+
adv_dir.mkdir(parents=True)
|
|
536
|
+
(adv_dir / "config.yml").write_text("log_directory: .adversarial/logs/")
|
|
537
|
+
|
|
538
|
+
eval_dir = adv_dir / "evaluators"
|
|
539
|
+
eval_dir.mkdir(parents=True)
|
|
540
|
+
(eval_dir / "slow-model.yml").write_text(
|
|
541
|
+
"""
|
|
542
|
+
name: slow-model
|
|
543
|
+
description: Slow model evaluator
|
|
544
|
+
model: mistral/mistral-large-latest
|
|
545
|
+
api_key_env: MISTRAL_API_KEY
|
|
546
|
+
prompt: Evaluate this
|
|
547
|
+
output_suffix: SLOW-EVAL
|
|
548
|
+
timeout: 300
|
|
549
|
+
"""
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
monkeypatch.chdir(tmp_path)
|
|
553
|
+
|
|
554
|
+
result = run_cli(["slow-model", "--help"], cwd=tmp_path)
|
|
555
|
+
assert result.returncode == 0
|
|
556
|
+
# Help should mention timeout flag with updated text
|
|
557
|
+
assert "--timeout" in result.stdout or "-t" in result.stdout
|
|
558
|
+
# Help text mentions evaluator config (may wrap across lines)
|
|
559
|
+
assert "evaluator config" in result.stdout
|
|
560
|
+
assert "max: 600" in result.stdout
|
|
561
|
+
|
|
562
|
+
def test_timeout_help_text_updated(self, tmp_path, monkeypatch, run_cli):
|
|
563
|
+
"""Timeout help text shows it can come from config."""
|
|
564
|
+
adv_dir = tmp_path / ".adversarial"
|
|
565
|
+
adv_dir.mkdir(parents=True)
|
|
566
|
+
(adv_dir / "config.yml").write_text("log_directory: .adversarial/logs/")
|
|
567
|
+
|
|
568
|
+
monkeypatch.chdir(tmp_path)
|
|
569
|
+
|
|
570
|
+
result = run_cli(["evaluate", "--help"], cwd=tmp_path)
|
|
571
|
+
assert result.returncode == 0
|
|
572
|
+
# New help text mentioning evaluator config (may wrap across lines)
|
|
573
|
+
assert "evaluator config" in result.stdout
|
|
574
|
+
# Max 600 mentioned
|
|
575
|
+
assert "max: 600" in result.stdout
|
|
@@ -59,13 +59,16 @@ custom_setting: test_value
|
|
|
59
59
|
|
|
60
60
|
def test_load_config_with_env_overrides(self):
|
|
61
61
|
"""Test that environment variables override config file values."""
|
|
62
|
-
with
|
|
63
|
-
os.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
62
|
+
with (
|
|
63
|
+
patch("os.path.exists", return_value=False),
|
|
64
|
+
patch.dict(
|
|
65
|
+
os.environ,
|
|
66
|
+
{
|
|
67
|
+
"ADVERSARIAL_EVALUATOR_MODEL": "gpt-4-turbo",
|
|
68
|
+
"ADVERSARIAL_TEST_COMMAND": "cargo test",
|
|
69
|
+
"ADVERSARIAL_LOG_DIR": "custom_logs/",
|
|
70
|
+
},
|
|
71
|
+
),
|
|
69
72
|
):
|
|
70
73
|
config = load_config("nonexistent.yml")
|
|
71
74
|
|
|
@@ -127,13 +130,16 @@ test_command: pytest
|
|
|
127
130
|
|
|
128
131
|
def test_load_config_partial_env_overrides(self):
|
|
129
132
|
"""Test that only set environment variables override config."""
|
|
130
|
-
with
|
|
131
|
-
os.
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
133
|
+
with (
|
|
134
|
+
patch("os.path.exists", return_value=False),
|
|
135
|
+
patch.dict(
|
|
136
|
+
os.environ,
|
|
137
|
+
{
|
|
138
|
+
"ADVERSARIAL_EVALUATOR_MODEL": "gpt-4",
|
|
139
|
+
# Only set one env var, others should remain default
|
|
140
|
+
},
|
|
141
|
+
clear=True,
|
|
142
|
+
),
|
|
137
143
|
):
|
|
138
144
|
config = load_config("nonexistent.yml")
|
|
139
145
|
|
|
@@ -272,14 +272,18 @@ class TestEvaluate:
|
|
|
272
272
|
large_content = "# Test task\n" + "Line content\n" * 600
|
|
273
273
|
task_file.write_text(large_content)
|
|
274
274
|
|
|
275
|
-
with
|
|
276
|
-
"
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
275
|
+
with (
|
|
276
|
+
patch("shutil.which", return_value="/usr/bin/aider"),
|
|
277
|
+
patch(
|
|
278
|
+
"adversarial_workflow.cli.load_config",
|
|
279
|
+
return_value={"log_directory": ".adversarial/logs/"},
|
|
280
|
+
),
|
|
281
|
+
patch("os.path.exists", return_value=True),
|
|
282
|
+
patch(
|
|
283
|
+
"adversarial_workflow.cli.validate_evaluation_output",
|
|
284
|
+
return_value=(True, "APPROVED", "OK"),
|
|
285
|
+
),
|
|
286
|
+
patch("adversarial_workflow.cli.verify_token_count"),
|
|
283
287
|
):
|
|
284
288
|
result = evaluate(str(task_file))
|
|
285
289
|
|
|
@@ -293,11 +297,14 @@ class TestEvaluate:
|
|
|
293
297
|
very_large_content = "# Test task\n" + "Line content\n" * 800
|
|
294
298
|
task_file.write_text(very_large_content)
|
|
295
299
|
|
|
296
|
-
with
|
|
297
|
-
"
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
300
|
+
with (
|
|
301
|
+
patch("shutil.which", return_value="/usr/bin/aider"),
|
|
302
|
+
patch(
|
|
303
|
+
"adversarial_workflow.cli.load_config",
|
|
304
|
+
return_value={"log_directory": ".adversarial/logs/"},
|
|
305
|
+
),
|
|
306
|
+
patch("os.path.exists", return_value=True),
|
|
307
|
+
patch("builtins.input", return_value="n"),
|
|
301
308
|
): # User says no
|
|
302
309
|
result = evaluate(str(task_file))
|
|
303
310
|
assert result == 0 # Cancelled, not error
|
|
@@ -450,14 +457,18 @@ class TestEvaluateIntegration:
|
|
|
450
457
|
|
|
451
458
|
def test_evaluate_with_sample_task(self, sample_task_file, mock_aider_command):
|
|
452
459
|
"""Test evaluate with sample task file from fixture."""
|
|
453
|
-
with
|
|
454
|
-
"
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
460
|
+
with (
|
|
461
|
+
patch("shutil.which", return_value="/usr/bin/aider"),
|
|
462
|
+
patch(
|
|
463
|
+
"adversarial_workflow.cli.load_config",
|
|
464
|
+
return_value={"log_directory": ".adversarial/logs/"},
|
|
465
|
+
),
|
|
466
|
+
patch("os.path.exists", return_value=True),
|
|
467
|
+
patch(
|
|
468
|
+
"adversarial_workflow.cli.validate_evaluation_output",
|
|
469
|
+
return_value=(True, "APPROVED", "OK"),
|
|
470
|
+
),
|
|
471
|
+
patch("adversarial_workflow.cli.verify_token_count"),
|
|
461
472
|
):
|
|
462
473
|
result = evaluate(str(sample_task_file))
|
|
463
474
|
assert isinstance(result, int)
|