adversarial-workflow 0.7.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/PKG-INFO +158 -3
  2. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/README.md +157 -2
  3. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/__init__.py +1 -1
  4. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/cli.py +193 -5
  5. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/__init__.py +11 -2
  6. adversarial_workflow-0.9.0/adversarial_workflow/evaluators/config.py +88 -0
  7. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/discovery.py +97 -9
  8. adversarial_workflow-0.9.0/adversarial_workflow/evaluators/resolver.py +211 -0
  9. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/runner.py +36 -13
  10. adversarial_workflow-0.9.0/adversarial_workflow/library/__init__.py +56 -0
  11. adversarial_workflow-0.9.0/adversarial_workflow/library/cache.py +184 -0
  12. adversarial_workflow-0.9.0/adversarial_workflow/library/client.py +224 -0
  13. adversarial_workflow-0.9.0/adversarial_workflow/library/commands.py +849 -0
  14. adversarial_workflow-0.9.0/adversarial_workflow/library/config.py +81 -0
  15. adversarial_workflow-0.9.0/adversarial_workflow/library/models.py +129 -0
  16. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/PKG-INFO +158 -3
  17. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/SOURCES.txt +13 -0
  18. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/pyproject.toml +4 -1
  19. adversarial_workflow-0.9.0/tests/test_evaluate_with_evaluator.py +277 -0
  20. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluator_discovery.py +305 -2
  21. adversarial_workflow-0.9.0/tests/test_evaluator_runner.py +479 -0
  22. adversarial_workflow-0.9.0/tests/test_library_client.py +354 -0
  23. adversarial_workflow-0.9.0/tests/test_library_commands.py +385 -0
  24. adversarial_workflow-0.9.0/tests/test_library_enhancements.py +746 -0
  25. adversarial_workflow-0.9.0/tests/test_library_integration.py +118 -0
  26. adversarial_workflow-0.9.0/tests/test_model_resolver.py +460 -0
  27. adversarial_workflow-0.7.0/adversarial_workflow/evaluators/config.py +0 -51
  28. adversarial_workflow-0.7.0/tests/test_evaluator_runner.py +0 -262
  29. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/LICENSE +0 -0
  30. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/__main__.py +0 -0
  31. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/builtins.py +0 -0
  32. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
  33. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/.env.example.template +0 -0
  34. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/README.template +0 -0
  35. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
  36. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
  37. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
  38. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
  39. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
  40. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/config.yml.template +0 -0
  41. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
  42. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/example-task.md.template +0 -0
  43. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
  44. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
  45. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
  46. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/__init__.py +0 -0
  47. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/citations.py +0 -0
  48. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/colors.py +0 -0
  49. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/config.py +0 -0
  50. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/file_splitter.py +0 -0
  51. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/validation.py +0 -0
  52. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
  53. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/entry_points.txt +0 -0
  54. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/requires.txt +0 -0
  55. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/top_level.txt +0 -0
  56. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/setup.cfg +0 -0
  57. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/setup.py +0 -0
  58. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_citations.py +0 -0
  59. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_cli.py +0 -0
  60. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_cli_dynamic_commands.py +0 -0
  61. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_config.py +0 -0
  62. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_env_loading.py +0 -0
  63. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluate.py +0 -0
  64. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluator_config.py +0 -0
  65. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_file_splitter.py +0 -0
  66. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_list_evaluators.py +0 -0
  67. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_python_version.py +0 -0
  68. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_scripts_project.py +0 -0
  69. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_split_command.py +0 -0
  70. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_timeout_integration.py +0 -0
  71. {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_utils_validation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: adversarial-workflow
3
- Version: 0.7.0
3
+ Version: 0.9.0
4
4
  Summary: Multi-stage AI evaluation system for task plans, code review, and test validation
5
5
  Author: Fredrik Matheson
6
6
  License: MIT
@@ -57,7 +57,7 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
57
57
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
58
58
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
59
59
 
60
- ## What's New in v0.6.3
60
+ ## What's New in v0.9.0
61
61
 
62
62
  ### Upgrade
63
63
 
@@ -65,6 +65,67 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
65
65
  pip install --upgrade adversarial-workflow
66
66
  ```
67
67
 
68
+ ### v0.9.0 - Run Library Evaluators
69
+
70
+ **Finally run your installed evaluators!** Use the new `--evaluator` flag:
71
+
72
+ ```bash
73
+ # Install an evaluator from the library
74
+ adversarial library install google/gemini-flash
75
+
76
+ # Run it with --evaluator flag
77
+ adversarial evaluate --evaluator gemini-flash task.md
78
+ adversarial evaluate -e gemini-flash task.md # short form
79
+
80
+ # Works with model_requirement for portable evaluators
81
+ # Automatically resolves to best available model
82
+ ```
83
+
84
+ **Key Features:**
85
+ - Run any installed evaluator by name
86
+ - Supports evaluator aliases
87
+ - Automatic model resolution via `model_requirement`
88
+ - Falls back to legacy `model` field if resolution fails
89
+ - Full backward compatibility - no flag uses existing behavior
90
+
91
+ See [Evaluator Library](#evaluator-library) for full documentation.
92
+
93
+ ### v0.8.1 - BugBot Fixes
94
+
95
+ - **CI/CD compatibility**: `--category --dry-run` no longer hangs in non-TTY environments
96
+ - **Proper exit codes**: Dry-run returns 1 when all previews fail
97
+ - **Config robustness**: Non-dict YAML configs no longer crash
98
+
99
+ ### v0.7.0 - Evaluator Library
100
+
101
+ Browse, install, and update evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library):
102
+
103
+ ```bash
104
+ # Browse available evaluators
105
+ adversarial library list
106
+
107
+ # Filter by provider or category
108
+ adversarial library list --provider google
109
+ adversarial library list --category quick-check
110
+
111
+ # Install evaluators
112
+ adversarial library install google/gemini-flash openai/fast-check
113
+
114
+ # Check for updates
115
+ adversarial library check-updates
116
+
117
+ # Update installed evaluators
118
+ adversarial library update --all
119
+ ```
120
+
121
+ **Key Features:**
122
+ - Index caching with 1-hour TTL for faster lookups
123
+ - Offline support with stale cache fallback
124
+ - Provenance tracking via `_meta` block in installed files
125
+ - Diff preview before applying updates
126
+
127
+ See [Evaluator Library](#evaluator-library) for full documentation.
128
+
68
129
  ### v0.6.3 - Configurable Timeouts
69
130
 
70
131
  - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
@@ -429,7 +490,8 @@ adversarial health # Comprehensive system health check
429
490
  adversarial agent onboard # Set up agent coordination system
430
491
 
431
492
  # Workflow
432
- adversarial evaluate task.md # Phase 1: Evaluate plan
493
+ adversarial evaluate task.md # Phase 1: Evaluate plan (uses config.yml)
494
+ adversarial evaluate -e <name> task.md # Phase 1: Evaluate with installed evaluator
433
495
  adversarial split task.md # Split large files into smaller parts
434
496
  adversarial split task.md --dry-run # Preview split without creating files
435
497
  adversarial review # Phase 3: Review implementation
@@ -437,6 +499,99 @@ adversarial validate "pytest" # Phase 4: Validate with tests
437
499
  adversarial list-evaluators # List all available evaluators
438
500
  ```
439
501
 
502
+ ## Evaluator Library
503
+
504
+ Browse and install pre-configured evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library).
505
+
506
+ ### Quick Start
507
+
508
+ ```bash
509
+ # Browse available evaluators
510
+ adversarial library list
511
+
512
+ # Filter by provider or category
513
+ adversarial library list --provider google
514
+ adversarial library list --category quick-check
515
+
516
+ # Install an evaluator
517
+ adversarial library install google/gemini-flash
518
+
519
+ # Run it with --evaluator flag
520
+ adversarial evaluate --evaluator gemini-flash task.md
521
+ adversarial evaluate -e gemini-flash task.md # short form
522
+ ```
523
+
524
+ ### Available Commands
525
+
526
+ | Command | Description |
527
+ |---------|-------------|
528
+ | `adversarial library list` | Browse available evaluators |
529
+ | `adversarial library install <provider>/<name>` | Install evaluator to project |
530
+ | `adversarial library check-updates` | Check for updates to installed evaluators |
531
+ | `adversarial library update <name>` | Update an evaluator (with diff preview) |
532
+
533
+ ### Running Installed Evaluators
534
+
535
+ Use the `--evaluator` flag to run any installed evaluator:
536
+
537
+ ```bash
538
+ # Run by name
539
+ adversarial evaluate --evaluator plan-evaluator task.md
540
+
541
+ # Short form
542
+ adversarial evaluate -e security-reviewer task.md
543
+
544
+ # Evaluators with model_requirement auto-resolve to best available model
545
+ adversarial evaluate -e gemini-flash task.md
546
+ ```
547
+
548
+ **How it works:**
549
+ - Looks up evaluator in `.adversarial/evaluators/*.yml`
550
+ - Uses the evaluator's model, prompt, and output settings
551
+ - Supports evaluator aliases
552
+ - If evaluator has `model_requirement`, resolves to best available model
553
+ - Falls back to legacy `model` field if resolution fails
554
+
555
+ **Without --evaluator flag**: Uses existing shell script behavior (backward compatible)
556
+
557
+ ### Philosophy: Copy, Don't Link
558
+
559
+ Installed evaluators are **copied** to your project, not referenced at runtime:
560
+ - Projects remain self-contained and work offline
561
+ - You can customize your local copies freely
562
+ - Updates are explicit and user-controlled
563
+
564
+ ### Provenance Tracking
565
+
566
+ Installed evaluators include metadata for tracking updates:
567
+
568
+ ```yaml
569
+ _meta:
570
+ source: adversarial-evaluator-library
571
+ source_path: google/gemini-flash
572
+ version: "1.2.0"
573
+ installed: "2026-02-03T10:00:00Z"
574
+
575
+ name: gemini-flash
576
+ # ... rest of evaluator config
577
+ ```
578
+
579
+ ### Options
580
+
581
+ ```bash
582
+ # Bypass cache (1-hour TTL by default)
583
+ adversarial library list --no-cache
584
+
585
+ # Force overwrite existing files
586
+ adversarial library install google/gemini-flash --force
587
+
588
+ # Update all outdated evaluators
589
+ adversarial library update --all
590
+
591
+ # Preview changes without applying
592
+ adversarial library update gemini-flash --diff-only
593
+ ```
594
+
440
595
  ## Custom Evaluators
441
596
 
442
597
  Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
@@ -20,7 +20,7 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
20
20
  - 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
21
21
  - ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
22
22
 
23
- ## What's New in v0.6.3
23
+ ## What's New in v0.9.0
24
24
 
25
25
  ### Upgrade
26
26
 
@@ -28,6 +28,67 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
28
28
  pip install --upgrade adversarial-workflow
29
29
  ```
30
30
 
31
+ ### v0.9.0 - Run Library Evaluators
32
+
33
+ **Finally run your installed evaluators!** Use the new `--evaluator` flag:
34
+
35
+ ```bash
36
+ # Install an evaluator from the library
37
+ adversarial library install google/gemini-flash
38
+
39
+ # Run it with --evaluator flag
40
+ adversarial evaluate --evaluator gemini-flash task.md
41
+ adversarial evaluate -e gemini-flash task.md # short form
42
+
43
+ # Works with model_requirement for portable evaluators
44
+ # Automatically resolves to best available model
45
+ ```
46
+
47
+ **Key Features:**
48
+ - Run any installed evaluator by name
49
+ - Supports evaluator aliases
50
+ - Automatic model resolution via `model_requirement`
51
+ - Falls back to legacy `model` field if resolution fails
52
+ - Full backward compatibility - no flag uses existing behavior
53
+
54
+ See [Evaluator Library](#evaluator-library) for full documentation.
55
+
56
+ ### v0.8.1 - BugBot Fixes
57
+
58
+ - **CI/CD compatibility**: `--category --dry-run` no longer hangs in non-TTY environments
59
+ - **Proper exit codes**: Dry-run returns 1 when all previews fail
60
+ - **Config robustness**: Non-dict YAML configs no longer crash
61
+
62
+ ### v0.7.0 - Evaluator Library
63
+
64
+ Browse, install, and update evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library):
65
+
66
+ ```bash
67
+ # Browse available evaluators
68
+ adversarial library list
69
+
70
+ # Filter by provider or category
71
+ adversarial library list --provider google
72
+ adversarial library list --category quick-check
73
+
74
+ # Install evaluators
75
+ adversarial library install google/gemini-flash openai/fast-check
76
+
77
+ # Check for updates
78
+ adversarial library check-updates
79
+
80
+ # Update installed evaluators
81
+ adversarial library update --all
82
+ ```
83
+
84
+ **Key Features:**
85
+ - Index caching with 1-hour TTL for faster lookups
86
+ - Offline support with stale cache fallback
87
+ - Provenance tracking via `_meta` block in installed files
88
+ - Diff preview before applying updates
89
+
90
+ See [Evaluator Library](#evaluator-library) for full documentation.
91
+
31
92
  ### v0.6.3 - Configurable Timeouts
32
93
 
33
94
  - **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
@@ -392,7 +453,8 @@ adversarial health # Comprehensive system health check
392
453
  adversarial agent onboard # Set up agent coordination system
393
454
 
394
455
  # Workflow
395
- adversarial evaluate task.md # Phase 1: Evaluate plan
456
+ adversarial evaluate task.md # Phase 1: Evaluate plan (uses config.yml)
457
+ adversarial evaluate -e <name> task.md # Phase 1: Evaluate with installed evaluator
396
458
  adversarial split task.md # Split large files into smaller parts
397
459
  adversarial split task.md --dry-run # Preview split without creating files
398
460
  adversarial review # Phase 3: Review implementation
@@ -400,6 +462,99 @@ adversarial validate "pytest" # Phase 4: Validate with tests
400
462
  adversarial list-evaluators # List all available evaluators
401
463
  ```
402
464
 
465
+ ## Evaluator Library
466
+
467
+ Browse and install pre-configured evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library).
468
+
469
+ ### Quick Start
470
+
471
+ ```bash
472
+ # Browse available evaluators
473
+ adversarial library list
474
+
475
+ # Filter by provider or category
476
+ adversarial library list --provider google
477
+ adversarial library list --category quick-check
478
+
479
+ # Install an evaluator
480
+ adversarial library install google/gemini-flash
481
+
482
+ # Run it with --evaluator flag
483
+ adversarial evaluate --evaluator gemini-flash task.md
484
+ adversarial evaluate -e gemini-flash task.md # short form
485
+ ```
486
+
487
+ ### Available Commands
488
+
489
+ | Command | Description |
490
+ |---------|-------------|
491
+ | `adversarial library list` | Browse available evaluators |
492
+ | `adversarial library install <provider>/<name>` | Install evaluator to project |
493
+ | `adversarial library check-updates` | Check for updates to installed evaluators |
494
+ | `adversarial library update <name>` | Update an evaluator (with diff preview) |
495
+
496
+ ### Running Installed Evaluators
497
+
498
+ Use the `--evaluator` flag to run any installed evaluator:
499
+
500
+ ```bash
501
+ # Run by name
502
+ adversarial evaluate --evaluator plan-evaluator task.md
503
+
504
+ # Short form
505
+ adversarial evaluate -e security-reviewer task.md
506
+
507
+ # Evaluators with model_requirement auto-resolve to best available model
508
+ adversarial evaluate -e gemini-flash task.md
509
+ ```
510
+
511
+ **How it works:**
512
+ - Looks up evaluator in `.adversarial/evaluators/*.yml`
513
+ - Uses the evaluator's model, prompt, and output settings
514
+ - Supports evaluator aliases
515
+ - If evaluator has `model_requirement`, resolves to best available model
516
+ - Falls back to legacy `model` field if resolution fails
517
+
518
+ **Without --evaluator flag**: Uses existing shell script behavior (backward compatible)
519
+
520
+ ### Philosophy: Copy, Don't Link
521
+
522
+ Installed evaluators are **copied** to your project, not referenced at runtime:
523
+ - Projects remain self-contained and work offline
524
+ - You can customize your local copies freely
525
+ - Updates are explicit and user-controlled
526
+
527
+ ### Provenance Tracking
528
+
529
+ Installed evaluators include metadata for tracking updates:
530
+
531
+ ```yaml
532
+ _meta:
533
+ source: adversarial-evaluator-library
534
+ source_path: google/gemini-flash
535
+ version: "1.2.0"
536
+ installed: "2026-02-03T10:00:00Z"
537
+
538
+ name: gemini-flash
539
+ # ... rest of evaluator config
540
+ ```
541
+
542
+ ### Options
543
+
544
+ ```bash
545
+ # Bypass cache (1-hour TTL by default)
546
+ adversarial library list --no-cache
547
+
548
+ # Force overwrite existing files
549
+ adversarial library install google/gemini-flash --force
550
+
551
+ # Update all outdated evaluators
552
+ adversarial library update --all
553
+
554
+ # Preview changes without applying
555
+ adversarial library update gemini-flash --diff-only
556
+ ```
557
+
403
558
  ## Custom Evaluators
404
559
 
405
560
  Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
@@ -12,7 +12,7 @@ Usage:
12
12
  adversarial validate "pytest"
13
13
  """
14
14
 
15
- __version__ = "0.7.0"
15
+ __version__ = "0.9.0"
16
16
  __author__ = "Fredrik Matheson"
17
17
  __license__ = "MIT"
18
18
 
@@ -30,7 +30,7 @@ from typing import Dict, List, Optional, Tuple
30
30
  import yaml
31
31
  from dotenv import dotenv_values, load_dotenv
32
32
 
33
- __version__ = "0.7.0"
33
+ __version__ = "0.9.0"
34
34
 
35
35
  # ANSI color codes for better output
36
36
  RESET = "\033[0m"
@@ -2944,6 +2944,7 @@ def main():
2944
2944
 
2945
2945
  from adversarial_workflow.evaluators import (
2946
2946
  BUILTIN_EVALUATORS,
2947
+ discover_local_evaluators,
2947
2948
  get_all_evaluators,
2948
2949
  run_evaluator,
2949
2950
  )
@@ -2959,6 +2960,7 @@ def main():
2959
2960
  "health",
2960
2961
  "quickstart",
2961
2962
  "agent",
2963
+ "library",
2962
2964
  "split",
2963
2965
  "validate",
2964
2966
  "review",
@@ -2982,6 +2984,8 @@ Examples:
2982
2984
  adversarial validate "npm test" # Validate with tests
2983
2985
  adversarial split large-task.md # Split large files
2984
2986
  adversarial check-citations doc.md # Verify URLs in document
2987
+ adversarial library list # Browse available evaluators
2988
+ adversarial library install google/gemini-flash # Install evaluator
2985
2989
 
2986
2990
  For more information: https://github.com/movito/adversarial-workflow
2987
2991
  """,
@@ -3028,6 +3032,98 @@ For more information: https://github.com/movito/adversarial-workflow
3028
3032
  "--path", default=".", help="Project path (default: current directory)"
3029
3033
  )
3030
3034
 
3035
+ # library command (with subcommands)
3036
+ library_parser = subparsers.add_parser(
3037
+ "library", help="Browse and install evaluators from the community library"
3038
+ )
3039
+ library_subparsers = library_parser.add_subparsers(
3040
+ dest="library_subcommand", help="Library subcommand"
3041
+ )
3042
+
3043
+ # library list subcommand
3044
+ library_list_parser = library_subparsers.add_parser(
3045
+ "list", help="List available evaluators from the library"
3046
+ )
3047
+ library_list_parser.add_argument(
3048
+ "--provider", "-p", help="Filter by provider (e.g., google, openai)"
3049
+ )
3050
+ library_list_parser.add_argument(
3051
+ "--category", "-c", help="Filter by category (e.g., quick-check, deep-reasoning)"
3052
+ )
3053
+ library_list_parser.add_argument(
3054
+ "--verbose", "-v", action="store_true", help="Show detailed information"
3055
+ )
3056
+ library_list_parser.add_argument(
3057
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3058
+ )
3059
+
3060
+ # library info subcommand
3061
+ library_info_parser = library_subparsers.add_parser(
3062
+ "info", help="Show detailed information about an evaluator"
3063
+ )
3064
+ library_info_parser.add_argument(
3065
+ "evaluator_spec", help="Evaluator to show info for (format: provider/name)"
3066
+ )
3067
+
3068
+ # library install subcommand
3069
+ library_install_parser = library_subparsers.add_parser(
3070
+ "install", help="Install evaluator(s) from the library"
3071
+ )
3072
+ library_install_parser.add_argument(
3073
+ "evaluators", nargs="*", help="Evaluator(s) to install (format: provider/name)"
3074
+ )
3075
+ library_install_parser.add_argument(
3076
+ "--force", "-f", action="store_true", help="Overwrite existing files"
3077
+ )
3078
+ library_install_parser.add_argument(
3079
+ "--skip-validation", action="store_true", help="Skip YAML validation (advanced)"
3080
+ )
3081
+ library_install_parser.add_argument(
3082
+ "--dry-run", action="store_true", help="Preview without making changes"
3083
+ )
3084
+ library_install_parser.add_argument("--category", help="Install all evaluators in a category")
3085
+ library_install_parser.add_argument(
3086
+ "--yes", "-y", action="store_true", help="Skip confirmation prompts (required for CI/CD)"
3087
+ )
3088
+
3089
+ # library check-updates subcommand
3090
+ library_check_parser = library_subparsers.add_parser(
3091
+ "check-updates", help="Check for updates to installed evaluators"
3092
+ )
3093
+ library_check_parser.add_argument(
3094
+ "name", nargs="?", help="Specific evaluator to check (optional)"
3095
+ )
3096
+ library_check_parser.add_argument(
3097
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3098
+ )
3099
+
3100
+ # library update subcommand
3101
+ library_update_parser = library_subparsers.add_parser(
3102
+ "update", help="Update installed evaluator(s) to newer versions"
3103
+ )
3104
+ library_update_parser.add_argument("name", nargs="?", help="Evaluator name to update")
3105
+ library_update_parser.add_argument(
3106
+ "--all",
3107
+ "-a",
3108
+ action="store_true",
3109
+ dest="all_evaluators",
3110
+ help="Update all outdated evaluators",
3111
+ )
3112
+ library_update_parser.add_argument(
3113
+ "--yes", "-y", action="store_true", help="Skip confirmation prompts"
3114
+ )
3115
+ library_update_parser.add_argument(
3116
+ "--diff-only", action="store_true", help="Show diff without applying changes"
3117
+ )
3118
+ library_update_parser.add_argument(
3119
+ "--dry-run",
3120
+ action="store_true",
3121
+ help="Preview without making changes (same as --diff-only)",
3122
+ )
3123
+ library_update_parser.add_argument(
3124
+ "--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
3125
+ )
3126
+
3031
3127
  # review command (static - reviews git changes, no file argument)
3032
3128
  subparsers.add_parser("review", help="Run Phase 3: Code review")
3033
3129
 
@@ -3149,6 +3245,15 @@ For more information: https://github.com/movito/adversarial-workflow
3149
3245
  action="store_true",
3150
3246
  help="Verify URLs in document before evaluation",
3151
3247
  )
3248
+ # Add --evaluator flag for the "evaluate" command only
3249
+ # This allows selecting a library-installed evaluator
3250
+ if config.name == "evaluate":
3251
+ eval_parser.add_argument(
3252
+ "--evaluator",
3253
+ "-e",
3254
+ metavar="NAME",
3255
+ help="Use a specific evaluator from .adversarial/evaluators/",
3256
+ )
3152
3257
  # Store config for later execution
3153
3258
  eval_parser.set_defaults(evaluator_config=config)
3154
3259
 
@@ -3160,15 +3265,45 @@ For more information: https://github.com/movito/adversarial-workflow
3160
3265
 
3161
3266
  # Check for evaluator command first (has evaluator_config attribute)
3162
3267
  if hasattr(args, "evaluator_config"):
3268
+ # Default to the command's evaluator config
3269
+ config_to_use = args.evaluator_config
3270
+
3271
+ # Check if --evaluator flag was specified (only on evaluate command)
3272
+ evaluator_override = getattr(args, "evaluator", None)
3273
+ if evaluator_override:
3274
+ local_evaluators = discover_local_evaluators()
3275
+
3276
+ if not local_evaluators:
3277
+ print(f"{RED}Error: No evaluators installed.{RESET}")
3278
+ print("Install evaluators with: adversarial library install <name>")
3279
+ return 1
3280
+
3281
+ if evaluator_override not in local_evaluators:
3282
+ print(f"{RED}Error: Evaluator '{evaluator_override}' not found.{RESET}")
3283
+ print()
3284
+ print("Available evaluators:")
3285
+ # Show unique evaluators (avoid duplicates from aliases)
3286
+ seen = set()
3287
+ for _, cfg in sorted(local_evaluators.items()):
3288
+ if id(cfg) not in seen:
3289
+ print(f" {cfg.name}")
3290
+ if cfg.aliases:
3291
+ print(f" aliases: {', '.join(cfg.aliases)}")
3292
+ seen.add(id(cfg))
3293
+ return 1
3294
+
3295
+ config_to_use = local_evaluators[evaluator_override]
3296
+ print(f"Using evaluator: {config_to_use.name}")
3297
+
3163
3298
  # Determine timeout: CLI flag > YAML config > default (180s)
3164
3299
  if args.timeout is not None:
3165
3300
  timeout = args.timeout
3166
3301
  source = "CLI override"
3167
- elif args.evaluator_config.timeout != 180:
3168
- timeout = args.evaluator_config.timeout
3302
+ elif config_to_use.timeout != 180:
3303
+ timeout = config_to_use.timeout
3169
3304
  source = "evaluator config"
3170
3305
  else:
3171
- timeout = args.evaluator_config.timeout # 180 (default)
3306
+ timeout = config_to_use.timeout # 180 (default)
3172
3307
  source = "default"
3173
3308
 
3174
3309
  # Validate CLI timeout (consistent with YAML validation)
@@ -3195,7 +3330,7 @@ For more information: https://github.com/movito/adversarial-workflow
3195
3330
  print()
3196
3331
 
3197
3332
  return run_evaluator(
3198
- args.evaluator_config,
3333
+ config_to_use,
3199
3334
  args.file,
3200
3335
  timeout=timeout,
3201
3336
  )
@@ -3220,6 +3355,59 @@ For more information: https://github.com/movito/adversarial-workflow
3220
3355
  print(f"{RED}Error: agent command requires a subcommand{RESET}")
3221
3356
  print("Usage: adversarial agent onboard")
3222
3357
  return 1
3358
+ elif args.command == "library":
3359
+ from adversarial_workflow.library import (
3360
+ library_check_updates,
3361
+ library_info,
3362
+ library_install,
3363
+ library_list,
3364
+ library_update,
3365
+ )
3366
+
3367
+ if args.library_subcommand == "list":
3368
+ return library_list(
3369
+ provider=args.provider,
3370
+ category=args.category,
3371
+ verbose=args.verbose,
3372
+ no_cache=args.no_cache,
3373
+ )
3374
+ elif args.library_subcommand == "info":
3375
+ return library_info(
3376
+ evaluator_spec=args.evaluator_spec,
3377
+ )
3378
+ elif args.library_subcommand == "install":
3379
+ return library_install(
3380
+ evaluator_specs=args.evaluators,
3381
+ force=args.force,
3382
+ skip_validation=args.skip_validation,
3383
+ dry_run=args.dry_run,
3384
+ category=args.category,
3385
+ yes=args.yes,
3386
+ )
3387
+ elif args.library_subcommand == "check-updates":
3388
+ return library_check_updates(
3389
+ name=args.name,
3390
+ no_cache=args.no_cache,
3391
+ )
3392
+ elif args.library_subcommand == "update":
3393
+ return library_update(
3394
+ name=args.name,
3395
+ all_evaluators=args.all_evaluators,
3396
+ yes=args.yes,
3397
+ diff_only=args.diff_only,
3398
+ no_cache=args.no_cache,
3399
+ dry_run=args.dry_run,
3400
+ )
3401
+ else:
3402
+ # No subcommand provided
3403
+ print(f"{RED}Error: library command requires a subcommand{RESET}")
3404
+ print("Usage:")
3405
+ print(" adversarial library list")
3406
+ print(" adversarial library info <provider>/<name>")
3407
+ print(" adversarial library install <provider>/<name>")
3408
+ print(" adversarial library check-updates")
3409
+ print(" adversarial library update <name>")
3410
+ return 1
3223
3411
  elif args.command == "review":
3224
3412
  return review()
3225
3413
  elif args.command == "validate":
@@ -1,12 +1,18 @@
1
- """Evaluators module for adversarial-workflow plugin architecture."""
1
+ """Evaluators module for adversarial-workflow plugin architecture.
2
+
3
+ Supports dual-field model specification (ADV-0015):
4
+ - Legacy: model + api_key_env fields (backwards compatible)
5
+ - New: model_requirement field (resolved via ModelResolver)
6
+ """
2
7
 
3
8
  from .builtins import BUILTIN_EVALUATORS
4
- from .config import EvaluatorConfig
9
+ from .config import EvaluatorConfig, ModelRequirement
5
10
  from .discovery import (
6
11
  EvaluatorParseError,
7
12
  discover_local_evaluators,
8
13
  parse_evaluator_yaml,
9
14
  )
15
+ from .resolver import ModelResolver, ResolutionError
10
16
  from .runner import run_evaluator
11
17
 
12
18
 
@@ -38,6 +44,9 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
38
44
  __all__ = [
39
45
  "EvaluatorConfig",
40
46
  "EvaluatorParseError",
47
+ "ModelRequirement",
48
+ "ModelResolver",
49
+ "ResolutionError",
41
50
  "run_evaluator",
42
51
  "get_all_evaluators",
43
52
  "discover_local_evaluators",