adversarial-workflow 0.7.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/PKG-INFO +158 -3
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/README.md +157 -2
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/__init__.py +1 -1
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/cli.py +193 -5
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/__init__.py +11 -2
- adversarial_workflow-0.9.0/adversarial_workflow/evaluators/config.py +88 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/discovery.py +97 -9
- adversarial_workflow-0.9.0/adversarial_workflow/evaluators/resolver.py +211 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/runner.py +36 -13
- adversarial_workflow-0.9.0/adversarial_workflow/library/__init__.py +56 -0
- adversarial_workflow-0.9.0/adversarial_workflow/library/cache.py +184 -0
- adversarial_workflow-0.9.0/adversarial_workflow/library/client.py +224 -0
- adversarial_workflow-0.9.0/adversarial_workflow/library/commands.py +849 -0
- adversarial_workflow-0.9.0/adversarial_workflow/library/config.py +81 -0
- adversarial_workflow-0.9.0/adversarial_workflow/library/models.py +129 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/PKG-INFO +158 -3
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/SOURCES.txt +13 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/pyproject.toml +4 -1
- adversarial_workflow-0.9.0/tests/test_evaluate_with_evaluator.py +277 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluator_discovery.py +305 -2
- adversarial_workflow-0.9.0/tests/test_evaluator_runner.py +479 -0
- adversarial_workflow-0.9.0/tests/test_library_client.py +354 -0
- adversarial_workflow-0.9.0/tests/test_library_commands.py +385 -0
- adversarial_workflow-0.9.0/tests/test_library_enhancements.py +746 -0
- adversarial_workflow-0.9.0/tests/test_library_integration.py +118 -0
- adversarial_workflow-0.9.0/tests/test_model_resolver.py +460 -0
- adversarial_workflow-0.7.0/adversarial_workflow/evaluators/config.py +0 -51
- adversarial_workflow-0.7.0/tests/test_evaluator_runner.py +0 -262
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/LICENSE +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/__main__.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/evaluators/builtins.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/.aider.conf.yml.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/.env.example.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/README.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/AGENT-SYSTEM-GUIDE.md +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/README.md.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/agent-handoffs.json.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/agent-context/current-state.json.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/config.yml.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/evaluate_plan.sh.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/example-task.md.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/proofread_content.sh.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/review_implementation.sh.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/templates/validate_tests.sh.template +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/__init__.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/citations.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/colors.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/config.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/file_splitter.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow/utils/validation.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/dependency_links.txt +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/entry_points.txt +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/requires.txt +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/adversarial_workflow.egg-info/top_level.txt +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/setup.cfg +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/setup.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_citations.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_cli.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_cli_dynamic_commands.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_config.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_env_loading.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluate.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_evaluator_config.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_file_splitter.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_list_evaluators.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_python_version.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_scripts_project.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_split_command.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_timeout_integration.py +0 -0
- {adversarial_workflow-0.7.0 → adversarial_workflow-0.9.0}/tests/test_utils_validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Multi-stage AI evaluation system for task plans, code review, and test validation
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -57,7 +57,7 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
57
57
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
58
58
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
59
59
|
|
|
60
|
-
## What's New in v0.
|
|
60
|
+
## What's New in v0.9.0
|
|
61
61
|
|
|
62
62
|
### Upgrade
|
|
63
63
|
|
|
@@ -65,6 +65,67 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
65
65
|
pip install --upgrade adversarial-workflow
|
|
66
66
|
```
|
|
67
67
|
|
|
68
|
+
### v0.9.0 - Run Library Evaluators
|
|
69
|
+
|
|
70
|
+
**Finally run your installed evaluators!** Use the new `--evaluator` flag:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Install an evaluator from the library
|
|
74
|
+
adversarial library install google/gemini-flash
|
|
75
|
+
|
|
76
|
+
# Run it with --evaluator flag
|
|
77
|
+
adversarial evaluate --evaluator gemini-flash task.md
|
|
78
|
+
adversarial evaluate -e gemini-flash task.md # short form
|
|
79
|
+
|
|
80
|
+
# Works with model_requirement for portable evaluators
|
|
81
|
+
# Automatically resolves to best available model
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Key Features:**
|
|
85
|
+
- Run any installed evaluator by name
|
|
86
|
+
- Supports evaluator aliases
|
|
87
|
+
- Automatic model resolution via `model_requirement`
|
|
88
|
+
- Falls back to legacy `model` field if resolution fails
|
|
89
|
+
- Full backward compatibility - no flag uses existing behavior
|
|
90
|
+
|
|
91
|
+
See [Evaluator Library](#evaluator-library) for full documentation.
|
|
92
|
+
|
|
93
|
+
### v0.8.1 - BugBot Fixes
|
|
94
|
+
|
|
95
|
+
- **CI/CD compatibility**: `--category --dry-run` no longer hangs in non-TTY environments
|
|
96
|
+
- **Proper exit codes**: Dry-run returns 1 when all previews fail
|
|
97
|
+
- **Config robustness**: Non-dict YAML configs no longer crash
|
|
98
|
+
|
|
99
|
+
### v0.7.0 - Evaluator Library
|
|
100
|
+
|
|
101
|
+
Browse, install, and update evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library):
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Browse available evaluators
|
|
105
|
+
adversarial library list
|
|
106
|
+
|
|
107
|
+
# Filter by provider or category
|
|
108
|
+
adversarial library list --provider google
|
|
109
|
+
adversarial library list --category quick-check
|
|
110
|
+
|
|
111
|
+
# Install evaluators
|
|
112
|
+
adversarial library install google/gemini-flash openai/fast-check
|
|
113
|
+
|
|
114
|
+
# Check for updates
|
|
115
|
+
adversarial library check-updates
|
|
116
|
+
|
|
117
|
+
# Update installed evaluators
|
|
118
|
+
adversarial library update --all
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Key Features:**
|
|
122
|
+
- Index caching with 1-hour TTL for faster lookups
|
|
123
|
+
- Offline support with stale cache fallback
|
|
124
|
+
- Provenance tracking via `_meta` block in installed files
|
|
125
|
+
- Diff preview before applying updates
|
|
126
|
+
|
|
127
|
+
See [Evaluator Library](#evaluator-library) for full documentation.
|
|
128
|
+
|
|
68
129
|
### v0.6.3 - Configurable Timeouts
|
|
69
130
|
|
|
70
131
|
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
@@ -429,7 +490,8 @@ adversarial health # Comprehensive system health check
|
|
|
429
490
|
adversarial agent onboard # Set up agent coordination system
|
|
430
491
|
|
|
431
492
|
# Workflow
|
|
432
|
-
adversarial evaluate task.md # Phase 1: Evaluate plan
|
|
493
|
+
adversarial evaluate task.md # Phase 1: Evaluate plan (uses config.yml)
|
|
494
|
+
adversarial evaluate -e <name> task.md # Phase 1: Evaluate with installed evaluator
|
|
433
495
|
adversarial split task.md # Split large files into smaller parts
|
|
434
496
|
adversarial split task.md --dry-run # Preview split without creating files
|
|
435
497
|
adversarial review # Phase 3: Review implementation
|
|
@@ -437,6 +499,99 @@ adversarial validate "pytest" # Phase 4: Validate with tests
|
|
|
437
499
|
adversarial list-evaluators # List all available evaluators
|
|
438
500
|
```
|
|
439
501
|
|
|
502
|
+
## Evaluator Library
|
|
503
|
+
|
|
504
|
+
Browse and install pre-configured evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library).
|
|
505
|
+
|
|
506
|
+
### Quick Start
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
# Browse available evaluators
|
|
510
|
+
adversarial library list
|
|
511
|
+
|
|
512
|
+
# Filter by provider or category
|
|
513
|
+
adversarial library list --provider google
|
|
514
|
+
adversarial library list --category quick-check
|
|
515
|
+
|
|
516
|
+
# Install an evaluator
|
|
517
|
+
adversarial library install google/gemini-flash
|
|
518
|
+
|
|
519
|
+
# Run it with --evaluator flag
|
|
520
|
+
adversarial evaluate --evaluator gemini-flash task.md
|
|
521
|
+
adversarial evaluate -e gemini-flash task.md # short form
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
### Available Commands
|
|
525
|
+
|
|
526
|
+
| Command | Description |
|
|
527
|
+
|---------|-------------|
|
|
528
|
+
| `adversarial library list` | Browse available evaluators |
|
|
529
|
+
| `adversarial library install <provider>/<name>` | Install evaluator to project |
|
|
530
|
+
| `adversarial library check-updates` | Check for updates to installed evaluators |
|
|
531
|
+
| `adversarial library update <name>` | Update an evaluator (with diff preview) |
|
|
532
|
+
|
|
533
|
+
### Running Installed Evaluators
|
|
534
|
+
|
|
535
|
+
Use the `--evaluator` flag to run any installed evaluator:
|
|
536
|
+
|
|
537
|
+
```bash
|
|
538
|
+
# Run by name
|
|
539
|
+
adversarial evaluate --evaluator plan-evaluator task.md
|
|
540
|
+
|
|
541
|
+
# Short form
|
|
542
|
+
adversarial evaluate -e security-reviewer task.md
|
|
543
|
+
|
|
544
|
+
# Evaluators with model_requirement auto-resolve to best available model
|
|
545
|
+
adversarial evaluate -e gemini-flash task.md
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
**How it works:**
|
|
549
|
+
- Looks up evaluator in `.adversarial/evaluators/*.yml`
|
|
550
|
+
- Uses the evaluator's model, prompt, and output settings
|
|
551
|
+
- Supports evaluator aliases
|
|
552
|
+
- If evaluator has `model_requirement`, resolves to best available model
|
|
553
|
+
- Falls back to legacy `model` field if resolution fails
|
|
554
|
+
|
|
555
|
+
**Without --evaluator flag**: Uses existing shell script behavior (backward compatible)
|
|
556
|
+
|
|
557
|
+
### Philosophy: Copy, Don't Link
|
|
558
|
+
|
|
559
|
+
Installed evaluators are **copied** to your project, not referenced at runtime:
|
|
560
|
+
- Projects remain self-contained and work offline
|
|
561
|
+
- You can customize your local copies freely
|
|
562
|
+
- Updates are explicit and user-controlled
|
|
563
|
+
|
|
564
|
+
### Provenance Tracking
|
|
565
|
+
|
|
566
|
+
Installed evaluators include metadata for tracking updates:
|
|
567
|
+
|
|
568
|
+
```yaml
|
|
569
|
+
_meta:
|
|
570
|
+
source: adversarial-evaluator-library
|
|
571
|
+
source_path: google/gemini-flash
|
|
572
|
+
version: "1.2.0"
|
|
573
|
+
installed: "2026-02-03T10:00:00Z"
|
|
574
|
+
|
|
575
|
+
name: gemini-flash
|
|
576
|
+
# ... rest of evaluator config
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
### Options
|
|
580
|
+
|
|
581
|
+
```bash
|
|
582
|
+
# Bypass cache (1-hour TTL by default)
|
|
583
|
+
adversarial library list --no-cache
|
|
584
|
+
|
|
585
|
+
# Force overwrite existing files
|
|
586
|
+
adversarial library install google/gemini-flash --force
|
|
587
|
+
|
|
588
|
+
# Update all outdated evaluators
|
|
589
|
+
adversarial library update --all
|
|
590
|
+
|
|
591
|
+
# Preview changes without applying
|
|
592
|
+
adversarial library update gemini-flash --diff-only
|
|
593
|
+
```
|
|
594
|
+
|
|
440
595
|
## Custom Evaluators
|
|
441
596
|
|
|
442
597
|
Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
|
|
@@ -20,7 +20,7 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
20
20
|
- 🎯 **Tool-agnostic**: Use with Claude Code, Cursor, Aider, manual coding, or any workflow
|
|
21
21
|
- ✨ **Interactive onboarding**: Guided setup wizard gets you started in <5 minutes
|
|
22
22
|
|
|
23
|
-
## What's New in v0.
|
|
23
|
+
## What's New in v0.9.0
|
|
24
24
|
|
|
25
25
|
### Upgrade
|
|
26
26
|
|
|
@@ -28,6 +28,67 @@ Evaluate proposals, sort out ideas, and prevent "phantom work" (AI claiming to i
|
|
|
28
28
|
pip install --upgrade adversarial-workflow
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
### v0.9.0 - Run Library Evaluators
|
|
32
|
+
|
|
33
|
+
**Finally run your installed evaluators!** Use the new `--evaluator` flag:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Install an evaluator from the library
|
|
37
|
+
adversarial library install google/gemini-flash
|
|
38
|
+
|
|
39
|
+
# Run it with --evaluator flag
|
|
40
|
+
adversarial evaluate --evaluator gemini-flash task.md
|
|
41
|
+
adversarial evaluate -e gemini-flash task.md # short form
|
|
42
|
+
|
|
43
|
+
# Works with model_requirement for portable evaluators
|
|
44
|
+
# Automatically resolves to best available model
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**Key Features:**
|
|
48
|
+
- Run any installed evaluator by name
|
|
49
|
+
- Supports evaluator aliases
|
|
50
|
+
- Automatic model resolution via `model_requirement`
|
|
51
|
+
- Falls back to legacy `model` field if resolution fails
|
|
52
|
+
- Full backward compatibility - no flag uses existing behavior
|
|
53
|
+
|
|
54
|
+
See [Evaluator Library](#evaluator-library) for full documentation.
|
|
55
|
+
|
|
56
|
+
### v0.8.1 - BugBot Fixes
|
|
57
|
+
|
|
58
|
+
- **CI/CD compatibility**: `--category --dry-run` no longer hangs in non-TTY environments
|
|
59
|
+
- **Proper exit codes**: Dry-run returns 1 when all previews fail
|
|
60
|
+
- **Config robustness**: Non-dict YAML configs no longer crash
|
|
61
|
+
|
|
62
|
+
### v0.7.0 - Evaluator Library
|
|
63
|
+
|
|
64
|
+
Browse, install, and update evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library):
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Browse available evaluators
|
|
68
|
+
adversarial library list
|
|
69
|
+
|
|
70
|
+
# Filter by provider or category
|
|
71
|
+
adversarial library list --provider google
|
|
72
|
+
adversarial library list --category quick-check
|
|
73
|
+
|
|
74
|
+
# Install evaluators
|
|
75
|
+
adversarial library install google/gemini-flash openai/fast-check
|
|
76
|
+
|
|
77
|
+
# Check for updates
|
|
78
|
+
adversarial library check-updates
|
|
79
|
+
|
|
80
|
+
# Update installed evaluators
|
|
81
|
+
adversarial library update --all
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Key Features:**
|
|
85
|
+
- Index caching with 1-hour TTL for faster lookups
|
|
86
|
+
- Offline support with stale cache fallback
|
|
87
|
+
- Provenance tracking via `_meta` block in installed files
|
|
88
|
+
- Diff preview before applying updates
|
|
89
|
+
|
|
90
|
+
See [Evaluator Library](#evaluator-library) for full documentation.
|
|
91
|
+
|
|
31
92
|
### v0.6.3 - Configurable Timeouts
|
|
32
93
|
|
|
33
94
|
- **Per-evaluator timeout**: Add `timeout: 300` to evaluator YAML for slow models like Mistral Large
|
|
@@ -392,7 +453,8 @@ adversarial health # Comprehensive system health check
|
|
|
392
453
|
adversarial agent onboard # Set up agent coordination system
|
|
393
454
|
|
|
394
455
|
# Workflow
|
|
395
|
-
adversarial evaluate task.md # Phase 1: Evaluate plan
|
|
456
|
+
adversarial evaluate task.md # Phase 1: Evaluate plan (uses config.yml)
|
|
457
|
+
adversarial evaluate -e <name> task.md # Phase 1: Evaluate with installed evaluator
|
|
396
458
|
adversarial split task.md # Split large files into smaller parts
|
|
397
459
|
adversarial split task.md --dry-run # Preview split without creating files
|
|
398
460
|
adversarial review # Phase 3: Review implementation
|
|
@@ -400,6 +462,99 @@ adversarial validate "pytest" # Phase 4: Validate with tests
|
|
|
400
462
|
adversarial list-evaluators # List all available evaluators
|
|
401
463
|
```
|
|
402
464
|
|
|
465
|
+
## Evaluator Library
|
|
466
|
+
|
|
467
|
+
Browse and install pre-configured evaluators from the community [adversarial-evaluator-library](https://github.com/movito/adversarial-evaluator-library).
|
|
468
|
+
|
|
469
|
+
### Quick Start
|
|
470
|
+
|
|
471
|
+
```bash
|
|
472
|
+
# Browse available evaluators
|
|
473
|
+
adversarial library list
|
|
474
|
+
|
|
475
|
+
# Filter by provider or category
|
|
476
|
+
adversarial library list --provider google
|
|
477
|
+
adversarial library list --category quick-check
|
|
478
|
+
|
|
479
|
+
# Install an evaluator
|
|
480
|
+
adversarial library install google/gemini-flash
|
|
481
|
+
|
|
482
|
+
# Run it with --evaluator flag
|
|
483
|
+
adversarial evaluate --evaluator gemini-flash task.md
|
|
484
|
+
adversarial evaluate -e gemini-flash task.md # short form
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
### Available Commands
|
|
488
|
+
|
|
489
|
+
| Command | Description |
|
|
490
|
+
|---------|-------------|
|
|
491
|
+
| `adversarial library list` | Browse available evaluators |
|
|
492
|
+
| `adversarial library install <provider>/<name>` | Install evaluator to project |
|
|
493
|
+
| `adversarial library check-updates` | Check for updates to installed evaluators |
|
|
494
|
+
| `adversarial library update <name>` | Update an evaluator (with diff preview) |
|
|
495
|
+
|
|
496
|
+
### Running Installed Evaluators
|
|
497
|
+
|
|
498
|
+
Use the `--evaluator` flag to run any installed evaluator:
|
|
499
|
+
|
|
500
|
+
```bash
|
|
501
|
+
# Run by name
|
|
502
|
+
adversarial evaluate --evaluator plan-evaluator task.md
|
|
503
|
+
|
|
504
|
+
# Short form
|
|
505
|
+
adversarial evaluate -e security-reviewer task.md
|
|
506
|
+
|
|
507
|
+
# Evaluators with model_requirement auto-resolve to best available model
|
|
508
|
+
adversarial evaluate -e gemini-flash task.md
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
**How it works:**
|
|
512
|
+
- Looks up evaluator in `.adversarial/evaluators/*.yml`
|
|
513
|
+
- Uses the evaluator's model, prompt, and output settings
|
|
514
|
+
- Supports evaluator aliases
|
|
515
|
+
- If evaluator has `model_requirement`, resolves to best available model
|
|
516
|
+
- Falls back to legacy `model` field if resolution fails
|
|
517
|
+
|
|
518
|
+
**Without --evaluator flag**: Uses existing shell script behavior (backward compatible)
|
|
519
|
+
|
|
520
|
+
### Philosophy: Copy, Don't Link
|
|
521
|
+
|
|
522
|
+
Installed evaluators are **copied** to your project, not referenced at runtime:
|
|
523
|
+
- Projects remain self-contained and work offline
|
|
524
|
+
- You can customize your local copies freely
|
|
525
|
+
- Updates are explicit and user-controlled
|
|
526
|
+
|
|
527
|
+
### Provenance Tracking
|
|
528
|
+
|
|
529
|
+
Installed evaluators include metadata for tracking updates:
|
|
530
|
+
|
|
531
|
+
```yaml
|
|
532
|
+
_meta:
|
|
533
|
+
source: adversarial-evaluator-library
|
|
534
|
+
source_path: google/gemini-flash
|
|
535
|
+
version: "1.2.0"
|
|
536
|
+
installed: "2026-02-03T10:00:00Z"
|
|
537
|
+
|
|
538
|
+
name: gemini-flash
|
|
539
|
+
# ... rest of evaluator config
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
### Options
|
|
543
|
+
|
|
544
|
+
```bash
|
|
545
|
+
# Bypass cache (1-hour TTL by default)
|
|
546
|
+
adversarial library list --no-cache
|
|
547
|
+
|
|
548
|
+
# Force overwrite existing files
|
|
549
|
+
adversarial library install google/gemini-flash --force
|
|
550
|
+
|
|
551
|
+
# Update all outdated evaluators
|
|
552
|
+
adversarial library update --all
|
|
553
|
+
|
|
554
|
+
# Preview changes without applying
|
|
555
|
+
adversarial library update gemini-flash --diff-only
|
|
556
|
+
```
|
|
557
|
+
|
|
403
558
|
## Custom Evaluators
|
|
404
559
|
|
|
405
560
|
Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
|
|
@@ -30,7 +30,7 @@ from typing import Dict, List, Optional, Tuple
|
|
|
30
30
|
import yaml
|
|
31
31
|
from dotenv import dotenv_values, load_dotenv
|
|
32
32
|
|
|
33
|
-
__version__ = "0.
|
|
33
|
+
__version__ = "0.9.0"
|
|
34
34
|
|
|
35
35
|
# ANSI color codes for better output
|
|
36
36
|
RESET = "\033[0m"
|
|
@@ -2944,6 +2944,7 @@ def main():
|
|
|
2944
2944
|
|
|
2945
2945
|
from adversarial_workflow.evaluators import (
|
|
2946
2946
|
BUILTIN_EVALUATORS,
|
|
2947
|
+
discover_local_evaluators,
|
|
2947
2948
|
get_all_evaluators,
|
|
2948
2949
|
run_evaluator,
|
|
2949
2950
|
)
|
|
@@ -2959,6 +2960,7 @@ def main():
|
|
|
2959
2960
|
"health",
|
|
2960
2961
|
"quickstart",
|
|
2961
2962
|
"agent",
|
|
2963
|
+
"library",
|
|
2962
2964
|
"split",
|
|
2963
2965
|
"validate",
|
|
2964
2966
|
"review",
|
|
@@ -2982,6 +2984,8 @@ Examples:
|
|
|
2982
2984
|
adversarial validate "npm test" # Validate with tests
|
|
2983
2985
|
adversarial split large-task.md # Split large files
|
|
2984
2986
|
adversarial check-citations doc.md # Verify URLs in document
|
|
2987
|
+
adversarial library list # Browse available evaluators
|
|
2988
|
+
adversarial library install google/gemini-flash # Install evaluator
|
|
2985
2989
|
|
|
2986
2990
|
For more information: https://github.com/movito/adversarial-workflow
|
|
2987
2991
|
""",
|
|
@@ -3028,6 +3032,98 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3028
3032
|
"--path", default=".", help="Project path (default: current directory)"
|
|
3029
3033
|
)
|
|
3030
3034
|
|
|
3035
|
+
# library command (with subcommands)
|
|
3036
|
+
library_parser = subparsers.add_parser(
|
|
3037
|
+
"library", help="Browse and install evaluators from the community library"
|
|
3038
|
+
)
|
|
3039
|
+
library_subparsers = library_parser.add_subparsers(
|
|
3040
|
+
dest="library_subcommand", help="Library subcommand"
|
|
3041
|
+
)
|
|
3042
|
+
|
|
3043
|
+
# library list subcommand
|
|
3044
|
+
library_list_parser = library_subparsers.add_parser(
|
|
3045
|
+
"list", help="List available evaluators from the library"
|
|
3046
|
+
)
|
|
3047
|
+
library_list_parser.add_argument(
|
|
3048
|
+
"--provider", "-p", help="Filter by provider (e.g., google, openai)"
|
|
3049
|
+
)
|
|
3050
|
+
library_list_parser.add_argument(
|
|
3051
|
+
"--category", "-c", help="Filter by category (e.g., quick-check, deep-reasoning)"
|
|
3052
|
+
)
|
|
3053
|
+
library_list_parser.add_argument(
|
|
3054
|
+
"--verbose", "-v", action="store_true", help="Show detailed information"
|
|
3055
|
+
)
|
|
3056
|
+
library_list_parser.add_argument(
|
|
3057
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3058
|
+
)
|
|
3059
|
+
|
|
3060
|
+
# library info subcommand
|
|
3061
|
+
library_info_parser = library_subparsers.add_parser(
|
|
3062
|
+
"info", help="Show detailed information about an evaluator"
|
|
3063
|
+
)
|
|
3064
|
+
library_info_parser.add_argument(
|
|
3065
|
+
"evaluator_spec", help="Evaluator to show info for (format: provider/name)"
|
|
3066
|
+
)
|
|
3067
|
+
|
|
3068
|
+
# library install subcommand
|
|
3069
|
+
library_install_parser = library_subparsers.add_parser(
|
|
3070
|
+
"install", help="Install evaluator(s) from the library"
|
|
3071
|
+
)
|
|
3072
|
+
library_install_parser.add_argument(
|
|
3073
|
+
"evaluators", nargs="*", help="Evaluator(s) to install (format: provider/name)"
|
|
3074
|
+
)
|
|
3075
|
+
library_install_parser.add_argument(
|
|
3076
|
+
"--force", "-f", action="store_true", help="Overwrite existing files"
|
|
3077
|
+
)
|
|
3078
|
+
library_install_parser.add_argument(
|
|
3079
|
+
"--skip-validation", action="store_true", help="Skip YAML validation (advanced)"
|
|
3080
|
+
)
|
|
3081
|
+
library_install_parser.add_argument(
|
|
3082
|
+
"--dry-run", action="store_true", help="Preview without making changes"
|
|
3083
|
+
)
|
|
3084
|
+
library_install_parser.add_argument("--category", help="Install all evaluators in a category")
|
|
3085
|
+
library_install_parser.add_argument(
|
|
3086
|
+
"--yes", "-y", action="store_true", help="Skip confirmation prompts (required for CI/CD)"
|
|
3087
|
+
)
|
|
3088
|
+
|
|
3089
|
+
# library check-updates subcommand
|
|
3090
|
+
library_check_parser = library_subparsers.add_parser(
|
|
3091
|
+
"check-updates", help="Check for updates to installed evaluators"
|
|
3092
|
+
)
|
|
3093
|
+
library_check_parser.add_argument(
|
|
3094
|
+
"name", nargs="?", help="Specific evaluator to check (optional)"
|
|
3095
|
+
)
|
|
3096
|
+
library_check_parser.add_argument(
|
|
3097
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3098
|
+
)
|
|
3099
|
+
|
|
3100
|
+
# library update subcommand
|
|
3101
|
+
library_update_parser = library_subparsers.add_parser(
|
|
3102
|
+
"update", help="Update installed evaluator(s) to newer versions"
|
|
3103
|
+
)
|
|
3104
|
+
library_update_parser.add_argument("name", nargs="?", help="Evaluator name to update")
|
|
3105
|
+
library_update_parser.add_argument(
|
|
3106
|
+
"--all",
|
|
3107
|
+
"-a",
|
|
3108
|
+
action="store_true",
|
|
3109
|
+
dest="all_evaluators",
|
|
3110
|
+
help="Update all outdated evaluators",
|
|
3111
|
+
)
|
|
3112
|
+
library_update_parser.add_argument(
|
|
3113
|
+
"--yes", "-y", action="store_true", help="Skip confirmation prompts"
|
|
3114
|
+
)
|
|
3115
|
+
library_update_parser.add_argument(
|
|
3116
|
+
"--diff-only", action="store_true", help="Show diff without applying changes"
|
|
3117
|
+
)
|
|
3118
|
+
library_update_parser.add_argument(
|
|
3119
|
+
"--dry-run",
|
|
3120
|
+
action="store_true",
|
|
3121
|
+
help="Preview without making changes (same as --diff-only)",
|
|
3122
|
+
)
|
|
3123
|
+
library_update_parser.add_argument(
|
|
3124
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3125
|
+
)
|
|
3126
|
+
|
|
3031
3127
|
# review command (static - reviews git changes, no file argument)
|
|
3032
3128
|
subparsers.add_parser("review", help="Run Phase 3: Code review")
|
|
3033
3129
|
|
|
@@ -3149,6 +3245,15 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3149
3245
|
action="store_true",
|
|
3150
3246
|
help="Verify URLs in document before evaluation",
|
|
3151
3247
|
)
|
|
3248
|
+
# Add --evaluator flag for the "evaluate" command only
|
|
3249
|
+
# This allows selecting a library-installed evaluator
|
|
3250
|
+
if config.name == "evaluate":
|
|
3251
|
+
eval_parser.add_argument(
|
|
3252
|
+
"--evaluator",
|
|
3253
|
+
"-e",
|
|
3254
|
+
metavar="NAME",
|
|
3255
|
+
help="Use a specific evaluator from .adversarial/evaluators/",
|
|
3256
|
+
)
|
|
3152
3257
|
# Store config for later execution
|
|
3153
3258
|
eval_parser.set_defaults(evaluator_config=config)
|
|
3154
3259
|
|
|
@@ -3160,15 +3265,45 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3160
3265
|
|
|
3161
3266
|
# Check for evaluator command first (has evaluator_config attribute)
|
|
3162
3267
|
if hasattr(args, "evaluator_config"):
|
|
3268
|
+
# Default to the command's evaluator config
|
|
3269
|
+
config_to_use = args.evaluator_config
|
|
3270
|
+
|
|
3271
|
+
# Check if --evaluator flag was specified (only on evaluate command)
|
|
3272
|
+
evaluator_override = getattr(args, "evaluator", None)
|
|
3273
|
+
if evaluator_override:
|
|
3274
|
+
local_evaluators = discover_local_evaluators()
|
|
3275
|
+
|
|
3276
|
+
if not local_evaluators:
|
|
3277
|
+
print(f"{RED}Error: No evaluators installed.{RESET}")
|
|
3278
|
+
print("Install evaluators with: adversarial library install <name>")
|
|
3279
|
+
return 1
|
|
3280
|
+
|
|
3281
|
+
if evaluator_override not in local_evaluators:
|
|
3282
|
+
print(f"{RED}Error: Evaluator '{evaluator_override}' not found.{RESET}")
|
|
3283
|
+
print()
|
|
3284
|
+
print("Available evaluators:")
|
|
3285
|
+
# Show unique evaluators (avoid duplicates from aliases)
|
|
3286
|
+
seen = set()
|
|
3287
|
+
for _, cfg in sorted(local_evaluators.items()):
|
|
3288
|
+
if id(cfg) not in seen:
|
|
3289
|
+
print(f" {cfg.name}")
|
|
3290
|
+
if cfg.aliases:
|
|
3291
|
+
print(f" aliases: {', '.join(cfg.aliases)}")
|
|
3292
|
+
seen.add(id(cfg))
|
|
3293
|
+
return 1
|
|
3294
|
+
|
|
3295
|
+
config_to_use = local_evaluators[evaluator_override]
|
|
3296
|
+
print(f"Using evaluator: {config_to_use.name}")
|
|
3297
|
+
|
|
3163
3298
|
# Determine timeout: CLI flag > YAML config > default (180s)
|
|
3164
3299
|
if args.timeout is not None:
|
|
3165
3300
|
timeout = args.timeout
|
|
3166
3301
|
source = "CLI override"
|
|
3167
|
-
elif
|
|
3168
|
-
timeout =
|
|
3302
|
+
elif config_to_use.timeout != 180:
|
|
3303
|
+
timeout = config_to_use.timeout
|
|
3169
3304
|
source = "evaluator config"
|
|
3170
3305
|
else:
|
|
3171
|
-
timeout =
|
|
3306
|
+
timeout = config_to_use.timeout # 180 (default)
|
|
3172
3307
|
source = "default"
|
|
3173
3308
|
|
|
3174
3309
|
# Validate CLI timeout (consistent with YAML validation)
|
|
@@ -3195,7 +3330,7 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3195
3330
|
print()
|
|
3196
3331
|
|
|
3197
3332
|
return run_evaluator(
|
|
3198
|
-
|
|
3333
|
+
config_to_use,
|
|
3199
3334
|
args.file,
|
|
3200
3335
|
timeout=timeout,
|
|
3201
3336
|
)
|
|
@@ -3220,6 +3355,59 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3220
3355
|
print(f"{RED}Error: agent command requires a subcommand{RESET}")
|
|
3221
3356
|
print("Usage: adversarial agent onboard")
|
|
3222
3357
|
return 1
|
|
3358
|
+
elif args.command == "library":
|
|
3359
|
+
from adversarial_workflow.library import (
|
|
3360
|
+
library_check_updates,
|
|
3361
|
+
library_info,
|
|
3362
|
+
library_install,
|
|
3363
|
+
library_list,
|
|
3364
|
+
library_update,
|
|
3365
|
+
)
|
|
3366
|
+
|
|
3367
|
+
if args.library_subcommand == "list":
|
|
3368
|
+
return library_list(
|
|
3369
|
+
provider=args.provider,
|
|
3370
|
+
category=args.category,
|
|
3371
|
+
verbose=args.verbose,
|
|
3372
|
+
no_cache=args.no_cache,
|
|
3373
|
+
)
|
|
3374
|
+
elif args.library_subcommand == "info":
|
|
3375
|
+
return library_info(
|
|
3376
|
+
evaluator_spec=args.evaluator_spec,
|
|
3377
|
+
)
|
|
3378
|
+
elif args.library_subcommand == "install":
|
|
3379
|
+
return library_install(
|
|
3380
|
+
evaluator_specs=args.evaluators,
|
|
3381
|
+
force=args.force,
|
|
3382
|
+
skip_validation=args.skip_validation,
|
|
3383
|
+
dry_run=args.dry_run,
|
|
3384
|
+
category=args.category,
|
|
3385
|
+
yes=args.yes,
|
|
3386
|
+
)
|
|
3387
|
+
elif args.library_subcommand == "check-updates":
|
|
3388
|
+
return library_check_updates(
|
|
3389
|
+
name=args.name,
|
|
3390
|
+
no_cache=args.no_cache,
|
|
3391
|
+
)
|
|
3392
|
+
elif args.library_subcommand == "update":
|
|
3393
|
+
return library_update(
|
|
3394
|
+
name=args.name,
|
|
3395
|
+
all_evaluators=args.all_evaluators,
|
|
3396
|
+
yes=args.yes,
|
|
3397
|
+
diff_only=args.diff_only,
|
|
3398
|
+
no_cache=args.no_cache,
|
|
3399
|
+
dry_run=args.dry_run,
|
|
3400
|
+
)
|
|
3401
|
+
else:
|
|
3402
|
+
# No subcommand provided
|
|
3403
|
+
print(f"{RED}Error: library command requires a subcommand{RESET}")
|
|
3404
|
+
print("Usage:")
|
|
3405
|
+
print(" adversarial library list")
|
|
3406
|
+
print(" adversarial library info <provider>/<name>")
|
|
3407
|
+
print(" adversarial library install <provider>/<name>")
|
|
3408
|
+
print(" adversarial library check-updates")
|
|
3409
|
+
print(" adversarial library update <name>")
|
|
3410
|
+
return 1
|
|
3223
3411
|
elif args.command == "review":
|
|
3224
3412
|
return review()
|
|
3225
3413
|
elif args.command == "validate":
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
-
"""Evaluators module for adversarial-workflow plugin architecture.
|
|
1
|
+
"""Evaluators module for adversarial-workflow plugin architecture.
|
|
2
|
+
|
|
3
|
+
Supports dual-field model specification (ADV-0015):
|
|
4
|
+
- Legacy: model + api_key_env fields (backwards compatible)
|
|
5
|
+
- New: model_requirement field (resolved via ModelResolver)
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from .builtins import BUILTIN_EVALUATORS
|
|
4
|
-
from .config import EvaluatorConfig
|
|
9
|
+
from .config import EvaluatorConfig, ModelRequirement
|
|
5
10
|
from .discovery import (
|
|
6
11
|
EvaluatorParseError,
|
|
7
12
|
discover_local_evaluators,
|
|
8
13
|
parse_evaluator_yaml,
|
|
9
14
|
)
|
|
15
|
+
from .resolver import ModelResolver, ResolutionError
|
|
10
16
|
from .runner import run_evaluator
|
|
11
17
|
|
|
12
18
|
|
|
@@ -38,6 +44,9 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
|
38
44
|
__all__ = [
|
|
39
45
|
"EvaluatorConfig",
|
|
40
46
|
"EvaluatorParseError",
|
|
47
|
+
"ModelRequirement",
|
|
48
|
+
"ModelResolver",
|
|
49
|
+
"ResolutionError",
|
|
41
50
|
"run_evaluator",
|
|
42
51
|
"get_all_evaluators",
|
|
43
52
|
"discover_local_evaluators",
|