adversarial-workflow 0.6.2__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/PKG-INFO +25 -3
  2. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/README.md +24 -2
  3. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/__main__.py +1 -0
  4. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/cli.py +40 -10
  5. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/config.py +2 -0
  6. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/discovery.py +30 -0
  7. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/PKG-INFO +25 -3
  8. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/SOURCES.txt +1 -0
  9. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/pyproject.toml +1 -1
  10. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_cli_dynamic_commands.py +58 -3
  11. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_config.py +20 -14
  12. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluate.py +32 -21
  13. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_discovery.py +203 -0
  14. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_file_splitter.py +1 -0
  15. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_list_evaluators.py +6 -2
  16. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_python_version.py +1 -0
  17. adversarial_workflow-0.6.3/tests/test_timeout_integration.py +406 -0
  18. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/LICENSE +0 -0
  19. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/__init__.py +0 -0
  20. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/__init__.py +0 -0
  21. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/builtins.py +0 -0
  22. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/evaluators/runner.py +0 -0
  23. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
  24. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/.env.example.template +0 -0
  25. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/README.template +0 -0
  26. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
  27. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
  28. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
  29. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
  30. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
  31. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/config.yml.template +0 -0
  32. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
  33. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/example-task.md.template +0 -0
  34. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
  35. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
  36. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
  37. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/__init__.py +0 -0
  38. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/colors.py +0 -0
  39. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/config.py +0 -0
  40. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/file_splitter.py +0 -0
  41. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow/utils/validation.py +0 -0
  42. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
  43. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/entry_points.txt +0 -0
  44. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/requires.txt +0 -0
  45. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/adversarial_workflow.egg-info/top_level.txt +0 -0
  46. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/setup.cfg +0 -0
  47. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/setup.py +0 -0
  48. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_cli.py +0 -0
  49. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_env_loading.py +0 -0
  50. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_config.py +0 -0
  51. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_evaluator_runner.py +0 -0
  52. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_scripts_project.py +0 -0
  53. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_split_command.py +0 -0
  54. {adversarial_workflow-0.6.2 → adversarial_workflow-0.6.3}/tests/test_utils_validation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
55
55
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
56
56
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
57
57
 
58
- ## What's New in v0.6.0
58
+ ## What's New in v0.6.3
59
59
 
60
- 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
60
+ ### Upgrade
61
+
62
+ ```bash
63
+ pip install --upgrade adversarial-workflow
64
+ ```
65
+
66
+ ### v0.6.3 - Configurable Timeouts
67
+
68
+ - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
69
+ - **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
70
+ - **Timeout logging**: See which timeout source is used (CLI/YAML/default)
71
+ - **Safety limits**: Maximum 600 seconds to prevent runaway processes
72
+
73
+ ### v0.6.2 - .env Loading & Stability
74
+
75
+ - **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
76
+ - **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
77
+ - **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
78
+
79
+ ### v0.6.0 - Plugin Architecture
80
+
81
+ 🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
61
82
 
62
83
  ```bash
63
84
  # Create a custom evaluator
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
459
480
  | `aliases` | No | Alternative command names |
460
481
  | `log_prefix` | No | CLI output prefix |
461
482
  | `fallback_model` | No | Fallback model if primary fails |
483
+ | `timeout` | No | Timeout in seconds (default: 180, max: 600) |
462
484
  | `version` | No | Evaluator version (default: 1.0.0) |
463
485
 
464
486
  ### Listing Available Evaluators
@@ -20,9 +20,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
20
20
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
21
21
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
22
22
 
23
- ## What's New in v0.6.0
23
+ ## What's New in v0.6.3
24
24
 
25
- 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
25
+ ### Upgrade
26
+
27
+ ```bash
28
+ pip install --upgrade adversarial-workflow
29
+ ```
30
+
31
+ ### v0.6.3 - Configurable Timeouts
32
+
33
+ - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
34
+ - **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
35
+ - **Timeout logging**: See which timeout source is used (CLI/YAML/default)
36
+ - **Safety limits**: Maximum 600 seconds to prevent runaway processes
37
+
38
+ ### v0.6.2 - .env Loading & Stability
39
+
40
+ - **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
41
+ - **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
42
+ - **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
43
+
44
+ ### v0.6.0 - Plugin Architecture
45
+
46
+ 🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
26
47
 
27
48
  ```bash
28
49
  # Create a custom evaluator
@@ -424,6 +445,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
424
445
  | `aliases` | No | Alternative command names |
425
446
  | `log_prefix` | No | CLI output prefix |
426
447
  | `fallback_model` | No | Fallback model if primary fails |
448
+ | `timeout` | No | Timeout in seconds (default: 180, max: 600) |
427
449
  | `version` | No | Evaluator version (default: 1.0.0) |
428
450
 
429
451
  ### Listing Available Evaluators
@@ -1,4 +1,5 @@
1
1
  """Allow execution via python -m adversarial_workflow."""
2
+
2
3
  from .cli import main
3
4
 
4
5
  if __name__ == "__main__":
@@ -322,16 +322,20 @@ def init_interactive(project_path: str = ".") -> int:
322
322
  f"{GREEN}✅ Setup Complete!{RESET}",
323
323
  [
324
324
  "Created:",
325
- " ✓ .env (with your API keys - added to .gitignore)"
326
- if (anthropic_key or openai_key)
327
- else " ⚠️ .env (skipped - no API keys provided)",
325
+ (
326
+ " ✓ .env (with your API keys - added to .gitignore)"
327
+ if (anthropic_key or openai_key)
328
+ else " ⚠️ .env (skipped - no API keys provided)"
329
+ ),
328
330
  " ✓ .adversarial/config.yml",
329
331
  " ✓ .adversarial/scripts/ (3 workflow scripts)",
330
332
  " ✓ .aider.conf.yml (aider configuration)",
331
333
  "",
332
- "Your configuration:"
333
- if (anthropic_key or openai_key)
334
- else "Configuration (no API keys yet):",
334
+ (
335
+ "Your configuration:"
336
+ if (anthropic_key or openai_key)
337
+ else "Configuration (no API keys yet):"
338
+ ),
335
339
  f" Author (implementation): {'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'GPT-4o (OpenAI)' if openai_key else 'Not configured'}",
336
340
  f" Evaluator: {'GPT-4o (OpenAI)' if openai_key else 'Claude 3.5 Sonnet (Anthropic)' if anthropic_key else 'Not configured'}",
337
341
  f" Cost per workflow: {'~$0.02-0.10' if (anthropic_key and openai_key) else '~$0.05-0.15' if (anthropic_key or openai_key) else 'N/A'}",
@@ -2284,7 +2288,9 @@ def fetch_agent_template(url: str, template_type: str = "standard") -> Optional[
2284
2288
  )
2285
2289
  return None
2286
2290
  else:
2287
- print(f"{RED}❌ ERROR: {template_type} template not found in package{RESET}")
2291
+ print(
2292
+ f"{RED}❌ ERROR: {template_type} template not found in package{RESET}"
2293
+ )
2288
2294
  return None
2289
2295
 
2290
2296
  elif template_type == "custom" and url:
@@ -3082,8 +3088,8 @@ For more information: https://github.com/movito/adversarial-workflow
3082
3088
  "--timeout",
3083
3089
  "-t",
3084
3090
  type=int,
3085
- default=180,
3086
- help="Timeout in seconds (default: 180)",
3091
+ default=None,
3092
+ help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
3087
3093
  )
3088
3094
  # Store config for later execution
3089
3095
  eval_parser.set_defaults(evaluator_config=config)
@@ -3096,10 +3102,34 @@ For more information: https://github.com/movito/adversarial-workflow
3096
3102
 
3097
3103
  # Check for evaluator command first (has evaluator_config attribute)
3098
3104
  if hasattr(args, "evaluator_config"):
3105
+ # Determine timeout: CLI flag > YAML config > default (180s)
3106
+ if args.timeout is not None:
3107
+ timeout = args.timeout
3108
+ source = "CLI override"
3109
+ elif args.evaluator_config.timeout != 180:
3110
+ timeout = args.evaluator_config.timeout
3111
+ source = "evaluator config"
3112
+ else:
3113
+ timeout = args.evaluator_config.timeout # 180 (default)
3114
+ source = "default"
3115
+
3116
+ # Validate CLI timeout (consistent with YAML validation)
3117
+ if timeout <= 0:
3118
+ print(f"{RED}Error: Timeout must be positive (> 0), got {timeout}{RESET}")
3119
+ return 1
3120
+ if timeout > 600:
3121
+ print(
3122
+ f"{YELLOW}Warning: Timeout {timeout}s exceeds maximum (600s), clamping to 600s{RESET}"
3123
+ )
3124
+ timeout = 600
3125
+
3126
+ # Log actual timeout and source
3127
+ print(f"Using timeout: {timeout}s ({source})")
3128
+
3099
3129
  return run_evaluator(
3100
3130
  args.evaluator_config,
3101
3131
  args.file,
3102
- timeout=args.timeout,
3132
+ timeout=timeout,
3103
3133
  )
3104
3134
 
3105
3135
  # Execute static commands
@@ -26,6 +26,7 @@ class EvaluatorConfig:
26
26
  fallback_model: Fallback model if primary fails
27
27
  aliases: Alternative command names
28
28
  version: Evaluator version
29
+ timeout: Timeout in seconds (default: 180, max: 600)
29
30
  source: "builtin" or "local" (set internally)
30
31
  config_file: Path to YAML file if local (set internally)
31
32
  """
@@ -43,6 +44,7 @@ class EvaluatorConfig:
43
44
  fallback_model: str | None = None
44
45
  aliases: list[str] = field(default_factory=list)
45
46
  version: str = "1.0.0"
47
+ timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
46
48
 
47
49
  # Metadata (set internally during discovery, not from YAML)
48
50
  source: str = "builtin"
@@ -122,6 +122,35 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
122
122
  f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
123
123
  )
124
124
 
125
+ # Validate timeout if present
126
+ if "timeout" in data:
127
+ timeout = data["timeout"]
128
+ # Handle null/empty values
129
+ if timeout is None or timeout == "":
130
+ raise EvaluatorParseError("Field 'timeout' cannot be null or empty")
131
+ # Check for bool before int (bool is subclass of int in Python)
132
+ # YAML parses 'yes'/'true' as True, 'no'/'false' as False
133
+ if isinstance(timeout, bool):
134
+ raise EvaluatorParseError(
135
+ f"Field 'timeout' must be an integer, got bool: {timeout!r}"
136
+ )
137
+ if not isinstance(timeout, int):
138
+ raise EvaluatorParseError(
139
+ f"Field 'timeout' must be an integer, got {type(timeout).__name__}: {timeout!r}"
140
+ )
141
+ # timeout=0 is invalid (does not disable timeout - use a large value instead)
142
+ if timeout <= 0:
143
+ raise EvaluatorParseError(
144
+ f"Field 'timeout' must be positive (> 0), got {timeout}"
145
+ )
146
+ if timeout > 600:
147
+ logger.warning(
148
+ "Timeout %ds exceeds maximum (600s), clamping to 600s in %s",
149
+ timeout,
150
+ yml_file.name,
151
+ )
152
+ data["timeout"] = 600
153
+
125
154
  # Filter to known fields only (log unknown fields)
126
155
  known_fields = {
127
156
  "name",
@@ -134,6 +163,7 @@ def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
134
163
  "fallback_model",
135
164
  "aliases",
136
165
  "version",
166
+ "timeout",
137
167
  }
138
168
  unknown = set(data.keys()) - known_fields
139
169
  if unknown:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -55,9 +55,30 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
55
55
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
56
56
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
57
57
 
58
- ## What's New in v0.6.0
58
+ ## What's New in v0.6.3
59
59
 
60
- 🔌 **Plugin Architecture** - Define custom evaluators without modifying the package:
60
+ ### Upgrade
61
+
62
+ ```bash
63
+ pip install --upgrade adversarial-workflow
64
+ ```
65
+
66
+ ### v0.6.3 - Configurable Timeouts
67
+
68
+ - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
69
+ - **CLI override**: Use `--timeout 400` to override YAML config on-the-fly
70
+ - **Timeout logging**: See which timeout source is used (CLI/YAML/default)
71
+ - **Safety limits**: Maximum 600 seconds to prevent runaway processes
72
+
73
+ ### v0.6.2 - .env Loading & Stability
74
+
75
+ - **Automatic .env loading**: API keys in `.env` files are now loaded at CLI startup
76
+ - **Custom evaluator support**: Evaluators using `api_key_env: GEMINI_API_KEY` (or other keys) now work with `.env` files
77
+ - **Better diagnostics**: `adversarial check` correctly reports the number of variables loaded from `.env`
78
+
79
+ ### v0.6.0 - Plugin Architecture
80
+
81
+ 🔌 **Custom Evaluators** - Define your own evaluators without modifying the package:
61
82
 
62
83
  ```bash
63
84
  # Create a custom evaluator
@@ -459,6 +480,7 @@ Starting with v0.6.0, you can define project-specific evaluators without modifyi
459
480
  | `aliases` | No | Alternative command names |
460
481
  | `log_prefix` | No | CLI output prefix |
461
482
  | `fallback_model` | No | Fallback model if primary fails |
483
+ | `timeout` | No | Timeout in seconds (default: 180, max: 600) |
462
484
  | `version` | No | Evaluator version (default: 1.0.0) |
463
485
 
464
486
  ### Listing Available Evaluators
@@ -48,4 +48,5 @@ tests/test_list_evaluators.py
48
48
  tests/test_python_version.py
49
49
  tests/test_scripts_project.py
50
50
  tests/test_split_command.py
51
+ tests/test_timeout_integration.py
51
52
  tests/test_utils_validation.py
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "adversarial-workflow"
7
7
 
8
- version = "0.6.2"
8
+ version = "0.6.3"
9
9
 
10
10
  description = "Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern"
11
11
  readme = "README.md"
@@ -351,7 +351,9 @@ class TestBackwardsCompatibility:
351
351
  class TestGracefulDegradation:
352
352
  """Test graceful degradation on errors."""
353
353
 
354
- def test_help_works_without_local_evaluators_dir(self, tmp_path, monkeypatch, run_cli):
354
+ def test_help_works_without_local_evaluators_dir(
355
+ self, tmp_path, monkeypatch, run_cli
356
+ ):
355
357
  """CLI help works even without .adversarial/evaluators/ directory."""
356
358
  adv_dir = tmp_path / ".adversarial"
357
359
  adv_dir.mkdir(parents=True)
@@ -420,7 +422,9 @@ class TestReviewCommandBackwardsCompatibility:
420
422
  # Review should NOT have --timeout flag (that's for evaluators)
421
423
  assert "--timeout" not in result.stdout
422
424
 
423
- def test_review_command_not_overridden_by_evaluator(self, tmp_path, monkeypatch, run_cli):
425
+ def test_review_command_not_overridden_by_evaluator(
426
+ self, tmp_path, monkeypatch, run_cli
427
+ ):
424
428
  """Review command cannot be overridden by local evaluator."""
425
429
  adv_dir = tmp_path / ".adversarial"
426
430
  adv_dir.mkdir(parents=True)
@@ -488,7 +492,9 @@ aliases:
488
492
  assert "--path" in result_init.stdout
489
493
  assert "--interactive" in result_init.stdout
490
494
 
491
- def test_evaluator_with_conflicting_name_and_alias(self, tmp_path, monkeypatch, run_cli):
495
+ def test_evaluator_with_conflicting_name_and_alias(
496
+ self, tmp_path, monkeypatch, run_cli
497
+ ):
492
498
  """Evaluator with conflicting name doesn't crash when alias is processed."""
493
499
  adv_dir = tmp_path / ".adversarial"
494
500
  adv_dir.mkdir(parents=True)
@@ -518,3 +524,52 @@ aliases:
518
524
  assert result.returncode == 0
519
525
  # 'init' should still be the static command
520
526
  assert "init" in result.stdout
527
+
528
+
529
+ class TestTimeoutConfiguration:
530
+ """Test timeout configuration from YAML and CLI."""
531
+
532
+ def test_evaluator_config_timeout_in_yaml(self, tmp_path, monkeypatch, run_cli):
533
+ """Evaluator YAML timeout appears in help text."""
534
+ adv_dir = tmp_path / ".adversarial"
535
+ adv_dir.mkdir(parents=True)
536
+ (adv_dir / "config.yml").write_text("log_directory: .adversarial/logs/")
537
+
538
+ eval_dir = adv_dir / "evaluators"
539
+ eval_dir.mkdir(parents=True)
540
+ (eval_dir / "slow-model.yml").write_text(
541
+ """
542
+ name: slow-model
543
+ description: Slow model evaluator
544
+ model: mistral/mistral-large-latest
545
+ api_key_env: MISTRAL_API_KEY
546
+ prompt: Evaluate this
547
+ output_suffix: SLOW-EVAL
548
+ timeout: 300
549
+ """
550
+ )
551
+
552
+ monkeypatch.chdir(tmp_path)
553
+
554
+ result = run_cli(["slow-model", "--help"], cwd=tmp_path)
555
+ assert result.returncode == 0
556
+ # Help should mention timeout flag with updated text
557
+ assert "--timeout" in result.stdout or "-t" in result.stdout
558
+ # Help text mentions evaluator config (may wrap across lines)
559
+ assert "evaluator config" in result.stdout
560
+ assert "max: 600" in result.stdout
561
+
562
+ def test_timeout_help_text_updated(self, tmp_path, monkeypatch, run_cli):
563
+ """Timeout help text shows it can come from config."""
564
+ adv_dir = tmp_path / ".adversarial"
565
+ adv_dir.mkdir(parents=True)
566
+ (adv_dir / "config.yml").write_text("log_directory: .adversarial/logs/")
567
+
568
+ monkeypatch.chdir(tmp_path)
569
+
570
+ result = run_cli(["evaluate", "--help"], cwd=tmp_path)
571
+ assert result.returncode == 0
572
+ # New help text mentioning evaluator config (may wrap across lines)
573
+ assert "evaluator config" in result.stdout
574
+ # Max 600 mentioned
575
+ assert "max: 600" in result.stdout
@@ -59,13 +59,16 @@ custom_setting: test_value
59
59
 
60
60
  def test_load_config_with_env_overrides(self):
61
61
  """Test that environment variables override config file values."""
62
- with patch("os.path.exists", return_value=False), patch.dict(
63
- os.environ,
64
- {
65
- "ADVERSARIAL_EVALUATOR_MODEL": "gpt-4-turbo",
66
- "ADVERSARIAL_TEST_COMMAND": "cargo test",
67
- "ADVERSARIAL_LOG_DIR": "custom_logs/",
68
- },
62
+ with (
63
+ patch("os.path.exists", return_value=False),
64
+ patch.dict(
65
+ os.environ,
66
+ {
67
+ "ADVERSARIAL_EVALUATOR_MODEL": "gpt-4-turbo",
68
+ "ADVERSARIAL_TEST_COMMAND": "cargo test",
69
+ "ADVERSARIAL_LOG_DIR": "custom_logs/",
70
+ },
71
+ ),
69
72
  ):
70
73
  config = load_config("nonexistent.yml")
71
74
 
@@ -127,13 +130,16 @@ test_command: pytest
127
130
 
128
131
  def test_load_config_partial_env_overrides(self):
129
132
  """Test that only set environment variables override config."""
130
- with patch("os.path.exists", return_value=False), patch.dict(
131
- os.environ,
132
- {
133
- "ADVERSARIAL_EVALUATOR_MODEL": "gpt-4",
134
- # Only set one env var, others should remain default
135
- },
136
- clear=True,
133
+ with (
134
+ patch("os.path.exists", return_value=False),
135
+ patch.dict(
136
+ os.environ,
137
+ {
138
+ "ADVERSARIAL_EVALUATOR_MODEL": "gpt-4",
139
+ # Only set one env var, others should remain default
140
+ },
141
+ clear=True,
142
+ ),
137
143
  ):
138
144
  config = load_config("nonexistent.yml")
139
145
 
@@ -272,14 +272,18 @@ class TestEvaluate:
272
272
  large_content = "# Test task\n" + "Line content\n" * 600
273
273
  task_file.write_text(large_content)
274
274
 
275
- with patch("shutil.which", return_value="/usr/bin/aider"), patch(
276
- "adversarial_workflow.cli.load_config",
277
- return_value={"log_directory": ".adversarial/logs/"},
278
- ), patch("os.path.exists", return_value=True), patch(
279
- "adversarial_workflow.cli.validate_evaluation_output",
280
- return_value=(True, "APPROVED", "OK"),
281
- ), patch(
282
- "adversarial_workflow.cli.verify_token_count"
275
+ with (
276
+ patch("shutil.which", return_value="/usr/bin/aider"),
277
+ patch(
278
+ "adversarial_workflow.cli.load_config",
279
+ return_value={"log_directory": ".adversarial/logs/"},
280
+ ),
281
+ patch("os.path.exists", return_value=True),
282
+ patch(
283
+ "adversarial_workflow.cli.validate_evaluation_output",
284
+ return_value=(True, "APPROVED", "OK"),
285
+ ),
286
+ patch("adversarial_workflow.cli.verify_token_count"),
283
287
  ):
284
288
  result = evaluate(str(task_file))
285
289
 
@@ -293,11 +297,14 @@ class TestEvaluate:
293
297
  very_large_content = "# Test task\n" + "Line content\n" * 800
294
298
  task_file.write_text(very_large_content)
295
299
 
296
- with patch("shutil.which", return_value="/usr/bin/aider"), patch(
297
- "adversarial_workflow.cli.load_config",
298
- return_value={"log_directory": ".adversarial/logs/"},
299
- ), patch("os.path.exists", return_value=True), patch(
300
- "builtins.input", return_value="n"
300
+ with (
301
+ patch("shutil.which", return_value="/usr/bin/aider"),
302
+ patch(
303
+ "adversarial_workflow.cli.load_config",
304
+ return_value={"log_directory": ".adversarial/logs/"},
305
+ ),
306
+ patch("os.path.exists", return_value=True),
307
+ patch("builtins.input", return_value="n"),
301
308
  ): # User says no
302
309
  result = evaluate(str(task_file))
303
310
  assert result == 0 # Cancelled, not error
@@ -450,14 +457,18 @@ class TestEvaluateIntegration:
450
457
 
451
458
  def test_evaluate_with_sample_task(self, sample_task_file, mock_aider_command):
452
459
  """Test evaluate with sample task file from fixture."""
453
- with patch("shutil.which", return_value="/usr/bin/aider"), patch(
454
- "adversarial_workflow.cli.load_config",
455
- return_value={"log_directory": ".adversarial/logs/"},
456
- ), patch("os.path.exists", return_value=True), patch(
457
- "adversarial_workflow.cli.validate_evaluation_output",
458
- return_value=(True, "APPROVED", "OK"),
459
- ), patch(
460
- "adversarial_workflow.cli.verify_token_count"
460
+ with (
461
+ patch("shutil.which", return_value="/usr/bin/aider"),
462
+ patch(
463
+ "adversarial_workflow.cli.load_config",
464
+ return_value={"log_directory": ".adversarial/logs/"},
465
+ ),
466
+ patch("os.path.exists", return_value=True),
467
+ patch(
468
+ "adversarial_workflow.cli.validate_evaluation_output",
469
+ return_value=(True, "APPROVED", "OK"),
470
+ ),
471
+ patch("adversarial_workflow.cli.verify_token_count"),
461
472
  ):
462
473
  result = evaluate(str(sample_task_file))
463
474
  assert isinstance(result, int)