adversarial-workflow 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/PKG-INFO +61 -1
  2. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/README.md +60 -0
  3. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/__init__.py +1 -1
  4. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/cli.py +127 -237
  5. adversarial_workflow-0.6.0/adversarial_workflow/evaluators/__init__.py +45 -0
  6. adversarial_workflow-0.6.0/adversarial_workflow/evaluators/builtins.py +36 -0
  7. adversarial_workflow-0.6.0/adversarial_workflow/evaluators/config.py +49 -0
  8. adversarial_workflow-0.6.0/adversarial_workflow/evaluators/discovery.py +212 -0
  9. adversarial_workflow-0.6.0/adversarial_workflow/evaluators/runner.py +313 -0
  10. adversarial_workflow-0.6.0/adversarial_workflow/utils/__init__.py +17 -0
  11. adversarial_workflow-0.6.0/adversarial_workflow/utils/colors.py +9 -0
  12. adversarial_workflow-0.6.0/adversarial_workflow/utils/config.py +44 -0
  13. adversarial_workflow-0.6.0/adversarial_workflow/utils/file_splitter.py +378 -0
  14. adversarial_workflow-0.6.0/adversarial_workflow/utils/validation.py +76 -0
  15. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/PKG-INFO +61 -1
  16. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/SOURCES.txt +17 -1
  17. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/pyproject.toml +4 -3
  18. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_cli.py +1 -1
  19. adversarial_workflow-0.6.0/tests/test_cli_dynamic_commands.py +621 -0
  20. adversarial_workflow-0.6.0/tests/test_evaluator_config.py +135 -0
  21. adversarial_workflow-0.6.0/tests/test_evaluator_discovery.py +680 -0
  22. adversarial_workflow-0.6.0/tests/test_evaluator_runner.py +249 -0
  23. adversarial_workflow-0.6.0/tests/test_list_evaluators.py +121 -0
  24. adversarial_workflow-0.6.0/tests/test_utils_validation.py +116 -0
  25. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/LICENSE +0 -0
  26. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/__main__.py +0 -0
  27. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
  28. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/.env.example.template +0 -0
  29. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/README.template +0 -0
  30. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
  31. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
  32. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
  33. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
  34. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
  35. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/config.yml.template +0 -0
  36. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
  37. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/example-task.md.template +0 -0
  38. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
  39. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
  40. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
  41. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
  42. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/entry_points.txt +0 -0
  43. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/requires.txt +0 -0
  44. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/adversarial_workflow.egg-info/top_level.txt +0 -0
  45. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/setup.cfg +0 -0
  46. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/setup.py +0 -0
  47. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_config.py +0 -0
  48. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_evaluate.py +0 -0
  49. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_file_splitter.py +0 -0
  50. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_python_version.py +0 -0
  51. {adversarial_workflow-0.5.0 → adversarial_workflow-0.6.0}/tests/test_split_command.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -382,8 +382,68 @@ adversarial split task.md # Split large files into smaller parts
382
382
  adversarial split task.md --dry-run # Preview split without creating files
383
383
  adversarial review # Phase 3: Review implementation
384
384
  adversarial validate "pytest" # Phase 4: Validate with tests
385
+ adversarial list-evaluators # List all available evaluators
385
386
  ```
386
387
 
388
+ ## Custom Evaluators
389
+
390
+ Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
391
+
392
+ ### Creating a Custom Evaluator
393
+
394
+ 1. Create the evaluators directory:
395
+ ```bash
396
+ mkdir -p .adversarial/evaluators
397
+ ```
398
+
399
+ 2. Create a YAML definition:
400
+ ```yaml
401
+ # .adversarial/evaluators/athena.yml
402
+ name: athena
403
+ description: Knowledge evaluation using Gemini 2.5 Pro
404
+ model: gemini-2.5-pro
405
+ api_key_env: GEMINI_API_KEY
406
+ output_suffix: KNOWLEDGE-EVALUATION
407
+ prompt: |
408
+ You are Athena, a knowledge evaluation specialist...
409
+
410
+ # Optional
411
+ aliases:
412
+ - knowledge
413
+ ```
414
+
415
+ 3. Use it like any built-in evaluator:
416
+ ```bash
417
+ adversarial athena docs/research-plan.md
418
+ ```
419
+
420
+ ### Evaluator YAML Schema
421
+
422
+ | Field | Required | Description |
423
+ |-------|----------|-------------|
424
+ | `name` | Yes | Command name |
425
+ | `description` | Yes | Help text shown in CLI |
426
+ | `model` | Yes | Model to use (e.g., `gpt-4o`, `gemini-2.5-pro`) |
427
+ | `api_key_env` | Yes | Environment variable for API key |
428
+ | `output_suffix` | Yes | Log file suffix (e.g., `KNOWLEDGE-EVAL`) |
429
+ | `prompt` | Yes | The evaluation prompt |
430
+ | `aliases` | No | Alternative command names |
431
+ | `log_prefix` | No | CLI output prefix |
432
+ | `fallback_model` | No | Fallback model if primary fails |
433
+ | `version` | No | Evaluator version (default: 1.0.0) |
434
+
435
+ ### Listing Available Evaluators
436
+
437
+ ```bash
438
+ adversarial list-evaluators
439
+ ```
440
+
441
+ ### Example: Athena Knowledge Evaluator
442
+
443
+ See [docs/examples/athena.yml](docs/examples/athena.yml) for a complete example of a knowledge-focused evaluator using Gemini 2.5 Pro.
444
+
445
+ For full documentation on custom evaluators, see [docs/CUSTOM_EVALUATORS.md](docs/CUSTOM_EVALUATORS.md).
446
+
387
447
  ## Configuration
388
448
 
389
449
  ### Option 1: YAML Config (persistent)
@@ -347,8 +347,68 @@ adversarial split task.md # Split large files into smaller parts
347
347
  adversarial split task.md --dry-run # Preview split without creating files
348
348
  adversarial review # Phase 3: Review implementation
349
349
  adversarial validate "pytest" # Phase 4: Validate with tests
350
+ adversarial list-evaluators # List all available evaluators
350
351
  ```
351
352
 
353
+ ## Custom Evaluators
354
+
355
+ Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
356
+
357
+ ### Creating a Custom Evaluator
358
+
359
+ 1. Create the evaluators directory:
360
+ ```bash
361
+ mkdir -p .adversarial/evaluators
362
+ ```
363
+
364
+ 2. Create a YAML definition:
365
+ ```yaml
366
+ # .adversarial/evaluators/athena.yml
367
+ name: athena
368
+ description: Knowledge evaluation using Gemini 2.5 Pro
369
+ model: gemini-2.5-pro
370
+ api_key_env: GEMINI_API_KEY
371
+ output_suffix: KNOWLEDGE-EVALUATION
372
+ prompt: |
373
+ You are Athena, a knowledge evaluation specialist...
374
+
375
+ # Optional
376
+ aliases:
377
+ - knowledge
378
+ ```
379
+
380
+ 3. Use it like any built-in evaluator:
381
+ ```bash
382
+ adversarial athena docs/research-plan.md
383
+ ```
384
+
385
+ ### Evaluator YAML Schema
386
+
387
+ | Field | Required | Description |
388
+ |-------|----------|-------------|
389
+ | `name` | Yes | Command name |
390
+ | `description` | Yes | Help text shown in CLI |
391
+ | `model` | Yes | Model to use (e.g., `gpt-4o`, `gemini-2.5-pro`) |
392
+ | `api_key_env` | Yes | Environment variable for API key |
393
+ | `output_suffix` | Yes | Log file suffix (e.g., `KNOWLEDGE-EVAL`) |
394
+ | `prompt` | Yes | The evaluation prompt |
395
+ | `aliases` | No | Alternative command names |
396
+ | `log_prefix` | No | CLI output prefix |
397
+ | `fallback_model` | No | Fallback model if primary fails |
398
+ | `version` | No | Evaluator version (default: 1.0.0) |
399
+
400
+ ### Listing Available Evaluators
401
+
402
+ ```bash
403
+ adversarial list-evaluators
404
+ ```
405
+
406
+ ### Example: Athena Knowledge Evaluator
407
+
408
+ See [docs/examples/athena.yml](docs/examples/athena.yml) for a complete example of a knowledge-focused evaluator using Gemini 2.5 Pro.
409
+
410
+ For full documentation on custom evaluators, see [docs/CUSTOM_EVALUATORS.md](docs/CUSTOM_EVALUATORS.md).
411
+
352
412
  ## Configuration
353
413
 
354
414
  ### Option 1: YAML Config (persistent)
@@ -12,7 +12,7 @@ Usage:
12
12
  adversarial validate "pytest"
13
13
  """
14
14
 
15
- __version__ = "0.5.0"
15
+ __version__ = "0.6.0"
16
16
  __author__ = "Fredrik Matheson"
17
17
  __license__ = "MIT"
18
18
 
@@ -29,7 +29,7 @@ from typing import Dict, List, Optional, Tuple
29
29
  import yaml
30
30
  from dotenv import load_dotenv
31
31
 
32
- __version__ = "0.5.0"
32
+ __version__ = "0.6.0"
33
33
 
34
34
  # ANSI color codes for better output
35
35
  RESET = "\033[0m"
@@ -2086,225 +2086,8 @@ def evaluate(task_file: str) -> int:
2086
2086
  return 0
2087
2087
 
2088
2088
 
2089
- def proofread(doc_file: str) -> int:
2090
- """Run proofreading review on teaching/documentation content."""
2091
2089
 
2092
- print(f"📖 Proofreading document: {doc_file}")
2093
- print()
2094
-
2095
- # Error 1: Document file not found
2096
- if not os.path.exists(doc_file):
2097
- print(f"{RED}❌ ERROR: Document file not found: {doc_file}{RESET}")
2098
- print(" Usage: adversarial proofread <document_file>")
2099
- print(" Example: adversarial proofread docs/guide/concept.md")
2100
- return 1
2101
-
2102
- # Error 2: Config not loaded
2103
- try:
2104
- config = load_config()
2105
- except FileNotFoundError:
2106
- print(f"{RED}❌ ERROR: Not initialized. Run 'adversarial init' first.{RESET}")
2107
- return 1
2108
-
2109
- # Error 3: Aider not available
2110
- if not shutil.which("aider"):
2111
- print(f"{RED}❌ ERROR: Aider not found{RESET}")
2112
- print()
2113
- print(f"{BOLD}WHY:{RESET}")
2114
- print(" Proofreader uses aider (AI pair programming tool) to:")
2115
- print(" • Review teaching content quality")
2116
- print(" • Check clarity and accuracy")
2117
- print(" • Evaluate pedagogical effectiveness")
2118
- print()
2119
- print(f"{BOLD}FIX:{RESET}")
2120
- print(" 1. Install aider: pip install aider-chat")
2121
- print(" 2. Verify installation: aider --version")
2122
- print(" 3. Then retry: adversarial proofread ...")
2123
- print()
2124
- print(f"{BOLD}HELP:{RESET}")
2125
- print(" Aider docs: https://aider.chat/docs/install.html")
2126
- return 1
2127
-
2128
- # Pre-flight check for file size
2129
- with open(doc_file, "r") as f:
2130
- line_count = len(f.readlines())
2131
- f.seek(0)
2132
- file_size = len(f.read())
2133
2090
 
2134
- # Estimate tokens (1 token ≈ 4 characters)
2135
- estimated_tokens = file_size // 4
2136
-
2137
- # Warn if file is large (>500 lines or >20k tokens)
2138
- if line_count > 500 or estimated_tokens > 20000:
2139
- print(f"{YELLOW}⚠️ Large file detected:{RESET}")
2140
- print(f" Lines: {line_count:,}")
2141
- print(f" Estimated tokens: ~{estimated_tokens:,}")
2142
- print()
2143
- print(f"{BOLD}Note:{RESET} Files over 500 lines may exceed OpenAI rate limits.")
2144
- print(
2145
- f" If proofreading fails, consider splitting into smaller documents."
2146
- )
2147
- print()
2148
-
2149
- # Give user a chance to cancel for very large files
2150
- if line_count > 700:
2151
- print(f"{RED}⚠️ WARNING: File is very large (>{line_count} lines){RESET}")
2152
- print(f" This will likely fail on Tier 1 OpenAI accounts (30k TPM limit)")
2153
- print(f" Recommended: Split into files <500 lines each")
2154
- print()
2155
- response = input("Continue anyway? [y/N]: ").strip().lower()
2156
- if response not in ["y", "yes"]:
2157
- print("Proofreading cancelled.")
2158
- return 0
2159
- print()
2160
-
2161
- # Error 4: Script execution fails
2162
- script = ".adversarial/scripts/proofread_content.sh"
2163
- if not os.path.exists(script):
2164
- print(f"{RED}❌ ERROR: Script not found: {script}{RESET}")
2165
- print(" Fix: Run 'adversarial init' to reinstall scripts")
2166
- return 1
2167
-
2168
- try:
2169
- result = subprocess.run(
2170
- [script, doc_file], text=True, capture_output=True, timeout=180 # 3 minutes
2171
- )
2172
-
2173
- # Check for rate limit errors in output
2174
- output = result.stdout + result.stderr
2175
- if "RateLimitError" in output or "tokens per min (TPM)" in output:
2176
- print(f"{RED}❌ ERROR: OpenAI rate limit exceeded{RESET}")
2177
- print()
2178
- print(f"{BOLD}WHY:{RESET}")
2179
- print(
2180
- " Your document file is too large for your OpenAI organization's rate limit"
2181
- )
2182
- print()
2183
-
2184
- # Extract file size for helpful message
2185
- with open(doc_file, "r") as f:
2186
- line_count = len(f.readlines())
2187
-
2188
- print(f"{BOLD}FILE SIZE:{RESET}")
2189
- print(f" Lines: {line_count:,}")
2190
- print(f" Recommended limit: 500 lines")
2191
- print()
2192
- print(f"{BOLD}SOLUTIONS:{RESET}")
2193
- print(" 1. Split your document into smaller files (<500 lines each)")
2194
- print(" 2. Upgrade your OpenAI tier (Tier 2 supports ~1,000 lines)")
2195
- print(" 3. Use manual review for this comprehensive document")
2196
- print()
2197
- print(f"{BOLD}MORE INFO:{RESET}")
2198
- print(" https://platform.openai.com/docs/guides/rate-limits")
2199
- return 1
2200
-
2201
- except subprocess.TimeoutExpired:
2202
- print(f"{RED}❌ ERROR: Proofreading timed out (>3 minutes){RESET}")
2203
- print()
2204
- print(f"{BOLD}WHY:{RESET}")
2205
- print(" The AI model took too long to respond")
2206
- print()
2207
- print(f"{BOLD}POSSIBLE CAUSES:{RESET}")
2208
- print(" • Network issues connecting to API")
2209
- print(" • Document file too large (>1000 lines)")
2210
- print(" • API rate limiting")
2211
- print()
2212
- print(f"{BOLD}FIX:{RESET}")
2213
- print(" 1. Check your network connection")
2214
- print(" 2. Try a smaller document file")
2215
- print(" 3. Wait a few minutes and retry")
2216
- return 1
2217
- except FileNotFoundError as e:
2218
- # Check if this is a bash/platform issue
2219
- if platform.system() == "Windows":
2220
- print(f"{RED}❌ ERROR: Cannot execute workflow scripts{RESET}")
2221
- print()
2222
- print(f"{BOLD}WHY:{RESET}")
2223
- print(" Native Windows (PowerShell/CMD) cannot run bash scripts")
2224
- print(" This package requires Unix shell (bash) for workflow automation")
2225
- print()
2226
- print(f"{BOLD}FIX:{RESET}")
2227
- print(" Option 1 (RECOMMENDED): Use WSL (Windows Subsystem for Linux)")
2228
- print(
2229
- " 1. Install WSL: https://learn.microsoft.com/windows/wsl/install"
2230
- )
2231
- print(" 2. Open WSL terminal")
2232
- print(" 3. Reinstall package in WSL: pip install adversarial-workflow")
2233
- print()
2234
- print(" Option 2: Try Git Bash (not officially supported)")
2235
- print(" • May have compatibility issues")
2236
- print(" • WSL is strongly recommended")
2237
- print()
2238
- print(f"{BOLD}HELP:{RESET}")
2239
- print(" See platform requirements: README.md#platform-support")
2240
- else:
2241
- print(f"{RED}❌ ERROR: Script not found: {script}{RESET}")
2242
- print()
2243
- print(f"{BOLD}WHY:{RESET}")
2244
- print(" Workflow scripts are missing or corrupted")
2245
- print()
2246
- print(f"{BOLD}FIX:{RESET}")
2247
- print(" Run: adversarial init")
2248
- print(" This will reinstall all workflow scripts")
2249
- return 1
2250
-
2251
- # Error 5: Proofreading rejected
2252
- if result.returncode != 0:
2253
- print()
2254
- print("📋 Proofreading complete (needs revision)")
2255
- print(f" Details: {config['log_directory']}")
2256
- return result.returncode
2257
-
2258
- # Error 6: Validation - Check if proofreading actually ran (not just empty output)
2259
- # Extract document name from filename to find log file
2260
- doc_basename = os.path.basename(doc_file)
2261
- doc_name = os.path.splitext(doc_basename)[0]
2262
-
2263
- log_file = os.path.join(config["log_directory"], f"{doc_name}-PROOFREADING.md")
2264
-
2265
- is_valid, verdict, message = validate_evaluation_output(log_file)
2266
- if not is_valid:
2267
- print()
2268
- print(f"{RED}❌ Proofreading failed: {message}{RESET}")
2269
- print()
2270
- print(f"{BOLD}WHY:{RESET}")
2271
- print(" The proofreading script ran but didn't produce valid output")
2272
- print(" This usually means Aider encountered an error before running GPT-4o")
2273
- print()
2274
- print(f"{BOLD}LOG FILE:{RESET}")
2275
- print(f" {log_file}")
2276
- print()
2277
- print(f"{BOLD}FIX:{RESET}")
2278
- print(" 1. Check the log file for error messages")
2279
- print(" 2. Ensure your API keys are valid: adversarial check")
2280
- print(" 3. Try running the proofreading again")
2281
- print()
2282
- return 1
2283
-
2284
- # Verify token count (warn if suspiciously low)
2285
- verify_token_count(doc_file, log_file)
2286
-
2287
- # Report based on actual verdict from proofreading
2288
- print()
2289
- if verdict == "APPROVED":
2290
- print(f"{GREEN}✅ Proofreading APPROVED!{RESET}")
2291
- print(f" Document is ready for publication")
2292
- print(f" Review output: {log_file}")
2293
- return 0
2294
- elif verdict == "NEEDS_REVISION":
2295
- print(f"{YELLOW}⚠️ Proofreading NEEDS_REVISION{RESET}")
2296
- print(f" Review feedback and update document")
2297
- print(f" Details: {log_file}")
2298
- return 1
2299
- elif verdict == "REJECTED":
2300
- print(f"{RED}❌ Proofreading REJECTED{RESET}")
2301
- print(f" Document has fundamental issues - major revision needed")
2302
- print(f" Details: {log_file}")
2303
- return 1
2304
- else: # UNKNOWN or other
2305
- print(f"{YELLOW}⚠️ Proofreading complete (verdict: {verdict}){RESET}")
2306
- print(f" Review output: {log_file}")
2307
- return 0
2308
2091
 
2309
2092
 
2310
2093
  def review() -> int:
@@ -3041,8 +2824,66 @@ def split(task_file: str, strategy: str = "sections", max_lines: int = 500, dry_
3041
2824
  print(f"{RED}Error during file splitting: {e}{RESET}")
3042
2825
  return 1
3043
2826
 
2827
+
2828
+ def list_evaluators() -> int:
2829
+ """List all available evaluators (built-in and local)."""
2830
+ from adversarial_workflow.evaluators import (
2831
+ BUILTIN_EVALUATORS,
2832
+ discover_local_evaluators,
2833
+ )
2834
+
2835
+ # Print built-in evaluators
2836
+ print(f"{BOLD}Built-in Evaluators:{RESET}")
2837
+ for name, config in sorted(BUILTIN_EVALUATORS.items()):
2838
+ print(f" {name:14} {config.description}")
2839
+
2840
+ print()
2841
+
2842
+ # Print local evaluators
2843
+ local_evaluators = discover_local_evaluators()
2844
+ if local_evaluators:
2845
+ print(f"{BOLD}Local Evaluators{RESET} (.adversarial/evaluators/):")
2846
+
2847
+ # Group by primary name (skip aliases)
2848
+ seen_configs = set()
2849
+ for _, config in sorted(local_evaluators.items()):
2850
+ if id(config) in seen_configs:
2851
+ continue
2852
+ seen_configs.add(id(config))
2853
+
2854
+ print(f" {config.name:14} {config.description}")
2855
+ if config.aliases:
2856
+ print(f" aliases: {', '.join(config.aliases)}")
2857
+ print(f" model: {config.model}")
2858
+ if config.version != "1.0.0":
2859
+ print(f" version: {config.version}")
2860
+ else:
2861
+ print(f"{GRAY}No local evaluators found.{RESET}")
2862
+ print()
2863
+ print("Create .adversarial/evaluators/*.yml to add custom evaluators.")
2864
+ print("See: https://github.com/movito/adversarial-workflow#custom-evaluators")
2865
+
2866
+ return 0
2867
+
3044
2868
  def main():
3045
2869
  """Main CLI entry point."""
2870
+ import logging
2871
+
2872
+ from adversarial_workflow.evaluators import (
2873
+ get_all_evaluators,
2874
+ run_evaluator,
2875
+ BUILTIN_EVALUATORS,
2876
+ )
2877
+
2878
+ logger = logging.getLogger(__name__)
2879
+
2880
+ # Commands that cannot be overridden by evaluators
2881
+ # Note: 'review' is special - it reviews git changes without a file argument
2882
+ STATIC_COMMANDS = {
2883
+ "init", "check", "doctor", "health", "quickstart",
2884
+ "agent", "split", "validate", "review", "list-evaluators"
2885
+ }
2886
+
3046
2887
  parser = argparse.ArgumentParser(
3047
2888
  description="Adversarial Workflow - Multi-stage AI code review",
3048
2889
  formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -3112,17 +2953,7 @@ For more information: https://github.com/movito/adversarial-workflow
3112
2953
  "--path", default=".", help="Project path (default: current directory)"
3113
2954
  )
3114
2955
 
3115
- # evaluate command
3116
- eval_parser = subparsers.add_parser("evaluate", help="Run Phase 1: Plan evaluation")
3117
- eval_parser.add_argument("task_file", help="Task file to evaluate")
3118
-
3119
- # proofread command
3120
- proofread_parser = subparsers.add_parser(
3121
- "proofread", help="Proofread teaching content and documentation"
3122
- )
3123
- proofread_parser.add_argument("doc_file", help="Document file to proofread")
3124
-
3125
- # review command
2956
+ # review command (static - reviews git changes, no file argument)
3126
2957
  subparsers.add_parser("review", help="Run Phase 3: Code review")
3127
2958
 
3128
2959
  # validate command
@@ -3151,13 +2982,74 @@ For more information: https://github.com/movito/adversarial-workflow
3151
2982
  help="Preview splits without creating files"
3152
2983
  )
3153
2984
 
2985
+ # list-evaluators command
2986
+ subparsers.add_parser(
2987
+ "list-evaluators",
2988
+ help="List all available evaluators (built-in and local)",
2989
+ )
2990
+
2991
+ # Dynamic evaluator registration
2992
+ try:
2993
+ evaluators = get_all_evaluators()
2994
+ except Exception as e:
2995
+ logger.warning("Evaluator discovery failed: %s", e)
2996
+ evaluators = BUILTIN_EVALUATORS
2997
+
2998
+ registered_configs = set() # Track by id() to avoid duplicate alias registration
2999
+
3000
+ for name, config in evaluators.items():
3001
+ # Skip if name conflicts with static command
3002
+ if name in STATIC_COMMANDS:
3003
+ logger.warning("Evaluator '%s' conflicts with CLI command; skipping", name)
3004
+ # Mark as registered to prevent alias re-registration attempts
3005
+ registered_configs.add(id(config))
3006
+ continue
3007
+
3008
+ # Skip if this config was already registered (aliases share config object)
3009
+ if id(config) in registered_configs:
3010
+ continue
3011
+ registered_configs.add(id(config))
3012
+
3013
+ # Filter aliases that conflict with static commands
3014
+ aliases = [a for a in (config.aliases or []) if a not in STATIC_COMMANDS]
3015
+ if config.aliases and len(aliases) != len(config.aliases):
3016
+ skipped = [a for a in config.aliases if a in STATIC_COMMANDS]
3017
+ logger.warning(
3018
+ "Skipping evaluator aliases that conflict with static commands: %s",
3019
+ skipped,
3020
+ )
3021
+
3022
+ # Create subparser for this evaluator
3023
+ eval_parser = subparsers.add_parser(
3024
+ config.name,
3025
+ help=config.description,
3026
+ aliases=aliases,
3027
+ )
3028
+ eval_parser.add_argument("file", help="File to evaluate")
3029
+ eval_parser.add_argument(
3030
+ "--timeout", "-t",
3031
+ type=int,
3032
+ default=180,
3033
+ help="Timeout in seconds (default: 180)"
3034
+ )
3035
+ # Store config for later execution
3036
+ eval_parser.set_defaults(evaluator_config=config)
3037
+
3154
3038
  args = parser.parse_args()
3155
3039
 
3156
3040
  if not args.command:
3157
3041
  parser.print_help()
3158
3042
  return 0
3159
3043
 
3160
- # Execute command
3044
+ # Check for evaluator command first (has evaluator_config attribute)
3045
+ if hasattr(args, "evaluator_config"):
3046
+ return run_evaluator(
3047
+ args.evaluator_config,
3048
+ args.file,
3049
+ timeout=args.timeout,
3050
+ )
3051
+
3052
+ # Execute static commands
3161
3053
  if args.command == "init":
3162
3054
  if args.interactive:
3163
3055
  return init_interactive(args.path)
@@ -3177,21 +3069,19 @@ For more information: https://github.com/movito/adversarial-workflow
3177
3069
  print(f"{RED}Error: agent command requires a subcommand{RESET}")
3178
3070
  print("Usage: adversarial agent onboard")
3179
3071
  return 1
3180
- elif args.command == "evaluate":
3181
- return evaluate(args.task_file)
3182
- elif args.command == "proofread":
3183
- return proofread(args.doc_file)
3184
3072
  elif args.command == "review":
3185
3073
  return review()
3186
3074
  elif args.command == "validate":
3187
3075
  return validate(args.test_command)
3188
3076
  elif args.command == "split":
3189
3077
  return split(
3190
- args.task_file,
3191
- strategy=args.strategy,
3192
- max_lines=args.max_lines,
3078
+ args.task_file,
3079
+ strategy=args.strategy,
3080
+ max_lines=args.max_lines,
3193
3081
  dry_run=args.dry_run
3194
3082
  )
3083
+ elif args.command == "list-evaluators":
3084
+ return list_evaluators()
3195
3085
  else:
3196
3086
  parser.print_help()
3197
3087
  return 1
@@ -0,0 +1,45 @@
1
+ """Evaluators module for adversarial-workflow plugin architecture."""
2
+
3
+ from .config import EvaluatorConfig
4
+ from .discovery import (
5
+ discover_local_evaluators,
6
+ parse_evaluator_yaml,
7
+ EvaluatorParseError,
8
+ )
9
+ from .runner import run_evaluator
10
+ from .builtins import BUILTIN_EVALUATORS
11
+
12
+
13
+ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
14
+ """Get all available evaluators (built-in + local).
15
+
16
+ Local evaluators override built-in evaluators with the same name.
17
+ Aliases from local evaluators are also included in the returned dictionary.
18
+ """
19
+ import logging
20
+ logger = logging.getLogger(__name__)
21
+
22
+ evaluators: dict[str, EvaluatorConfig] = {}
23
+
24
+ # Add built-in evaluators first
25
+ evaluators.update(BUILTIN_EVALUATORS)
26
+
27
+ # Discover and add local evaluators (may override built-ins)
28
+ local = discover_local_evaluators()
29
+ for name, config in local.items():
30
+ if name in BUILTIN_EVALUATORS:
31
+ logger.info("Local evaluator '%s' overrides built-in", name)
32
+ evaluators[name] = config
33
+
34
+ return evaluators
35
+
36
+
37
+ __all__ = [
38
+ "EvaluatorConfig",
39
+ "EvaluatorParseError",
40
+ "run_evaluator",
41
+ "get_all_evaluators",
42
+ "discover_local_evaluators",
43
+ "parse_evaluator_yaml",
44
+ "BUILTIN_EVALUATORS",
45
+ ]
@@ -0,0 +1,36 @@
1
+ """Built-in evaluator configurations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .config import EvaluatorConfig
6
+
7
+ # Built-in evaluators use shell scripts - prompts are in the scripts
8
+ BUILTIN_EVALUATORS: dict[str, EvaluatorConfig] = {
9
+ "evaluate": EvaluatorConfig(
10
+ name="evaluate",
11
+ description="Plan evaluation (GPT-4o)",
12
+ model="gpt-4o",
13
+ api_key_env="OPENAI_API_KEY",
14
+ prompt="", # Prompt is in shell script
15
+ output_suffix="PLAN-EVALUATION",
16
+ source="builtin",
17
+ ),
18
+ "proofread": EvaluatorConfig(
19
+ name="proofread",
20
+ description="Teaching content review (GPT-4o)",
21
+ model="gpt-4o",
22
+ api_key_env="OPENAI_API_KEY",
23
+ prompt="", # Prompt is in shell script
24
+ output_suffix="PROOFREADING",
25
+ source="builtin",
26
+ ),
27
+ "review": EvaluatorConfig(
28
+ name="review",
29
+ description="Code review (GPT-4o)",
30
+ model="gpt-4o",
31
+ api_key_env="OPENAI_API_KEY",
32
+ prompt="", # Prompt is in shell script
33
+ output_suffix="CODE-REVIEW",
34
+ source="builtin",
35
+ ),
36
+ }