adversarial-workflow 0.6.6__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/cli.py +351 -5
- adversarial_workflow/evaluators/__init__.py +11 -2
- adversarial_workflow/evaluators/config.py +39 -2
- adversarial_workflow/evaluators/discovery.py +97 -9
- adversarial_workflow/evaluators/resolver.py +211 -0
- adversarial_workflow/evaluators/runner.py +36 -13
- adversarial_workflow/library/__init__.py +56 -0
- adversarial_workflow/library/cache.py +184 -0
- adversarial_workflow/library/client.py +224 -0
- adversarial_workflow/library/commands.py +849 -0
- adversarial_workflow/library/config.py +81 -0
- adversarial_workflow/library/models.py +129 -0
- adversarial_workflow/utils/citations.py +643 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/METADATA +160 -3
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/RECORD +20 -12
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/WHEEL +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.6.6.dist-info → adversarial_workflow-0.9.0.dist-info}/top_level.txt +0 -0
adversarial_workflow/__init__.py
CHANGED
adversarial_workflow/cli.py
CHANGED
|
@@ -13,6 +13,7 @@ Commands:
|
|
|
13
13
|
review - Run Phase 3: Code review
|
|
14
14
|
validate - Run Phase 4: Test validation
|
|
15
15
|
split - Split large task files into smaller evaluable chunks
|
|
16
|
+
check-citations - Verify URLs in documents before evaluation
|
|
16
17
|
"""
|
|
17
18
|
|
|
18
19
|
import argparse
|
|
@@ -29,7 +30,7 @@ from typing import Dict, List, Optional, Tuple
|
|
|
29
30
|
import yaml
|
|
30
31
|
from dotenv import dotenv_values, load_dotenv
|
|
31
32
|
|
|
32
|
-
__version__ = "0.
|
|
33
|
+
__version__ = "0.9.0"
|
|
33
34
|
|
|
34
35
|
# ANSI color codes for better output
|
|
35
36
|
RESET = "\033[0m"
|
|
@@ -2819,6 +2820,106 @@ def list_evaluators() -> int:
|
|
|
2819
2820
|
return 0
|
|
2820
2821
|
|
|
2821
2822
|
|
|
2823
|
+
def check_citations(
|
|
2824
|
+
file_path: str,
|
|
2825
|
+
output_tasks: Optional[str] = None,
|
|
2826
|
+
mark_inline: bool = False,
|
|
2827
|
+
concurrency: int = 10,
|
|
2828
|
+
timeout: int = 10,
|
|
2829
|
+
) -> int:
|
|
2830
|
+
"""
|
|
2831
|
+
Check citations (URLs) in a document.
|
|
2832
|
+
|
|
2833
|
+
Args:
|
|
2834
|
+
file_path: Path to document to check
|
|
2835
|
+
output_tasks: Optional path to write blocked URL tasks
|
|
2836
|
+
mark_inline: Whether to mark URLs inline with status badges
|
|
2837
|
+
concurrency: Maximum concurrent URL checks
|
|
2838
|
+
timeout: Timeout per URL in seconds
|
|
2839
|
+
|
|
2840
|
+
Returns:
|
|
2841
|
+
0 on success, 1 on error
|
|
2842
|
+
"""
|
|
2843
|
+
from adversarial_workflow.utils.citations import (
|
|
2844
|
+
URLStatus,
|
|
2845
|
+
check_urls,
|
|
2846
|
+
extract_urls,
|
|
2847
|
+
generate_blocked_tasks,
|
|
2848
|
+
mark_urls_inline,
|
|
2849
|
+
print_verification_summary,
|
|
2850
|
+
)
|
|
2851
|
+
|
|
2852
|
+
# Check file exists
|
|
2853
|
+
if not os.path.exists(file_path):
|
|
2854
|
+
print(f"{RED}Error: File not found: {file_path}{RESET}")
|
|
2855
|
+
return 1
|
|
2856
|
+
|
|
2857
|
+
# Validate parameters
|
|
2858
|
+
if concurrency < 1:
|
|
2859
|
+
print(f"{RED}Error: Concurrency must be at least 1, got {concurrency}{RESET}")
|
|
2860
|
+
return 1
|
|
2861
|
+
if timeout < 1:
|
|
2862
|
+
print(f"{RED}Error: Timeout must be at least 1 second, got {timeout}{RESET}")
|
|
2863
|
+
return 1
|
|
2864
|
+
|
|
2865
|
+
print(f"🔗 Checking citations in: {file_path}")
|
|
2866
|
+
print()
|
|
2867
|
+
|
|
2868
|
+
# Read document
|
|
2869
|
+
with open(file_path, encoding="utf-8") as f:
|
|
2870
|
+
document = f.read()
|
|
2871
|
+
|
|
2872
|
+
# Extract URLs
|
|
2873
|
+
extracted = extract_urls(document)
|
|
2874
|
+
urls = [e.url for e in extracted]
|
|
2875
|
+
|
|
2876
|
+
if not urls:
|
|
2877
|
+
print(f"{YELLOW}No URLs found in document.{RESET}")
|
|
2878
|
+
return 0
|
|
2879
|
+
|
|
2880
|
+
print(f" Found {len(urls)} URLs to check")
|
|
2881
|
+
print(f" Checking with concurrency={concurrency}, timeout={timeout}s...")
|
|
2882
|
+
print()
|
|
2883
|
+
|
|
2884
|
+
# Check URLs
|
|
2885
|
+
results = check_urls(
|
|
2886
|
+
urls,
|
|
2887
|
+
concurrency=concurrency,
|
|
2888
|
+
timeout=timeout,
|
|
2889
|
+
)
|
|
2890
|
+
|
|
2891
|
+
# Print summary
|
|
2892
|
+
print_verification_summary(results)
|
|
2893
|
+
|
|
2894
|
+
# Count blocked/broken
|
|
2895
|
+
blocked_count = sum(1 for r in results if r.status in (URLStatus.BLOCKED, URLStatus.BROKEN))
|
|
2896
|
+
|
|
2897
|
+
# Mark document inline if requested
|
|
2898
|
+
if mark_inline and results:
|
|
2899
|
+
marked_document = mark_urls_inline(document, results)
|
|
2900
|
+
if marked_document != document:
|
|
2901
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
2902
|
+
f.write(marked_document)
|
|
2903
|
+
print("\n ✅ Updated document with status badges")
|
|
2904
|
+
|
|
2905
|
+
# Generate blocked tasks if requested or if there are blocked URLs
|
|
2906
|
+
if blocked_count > 0:
|
|
2907
|
+
if output_tasks:
|
|
2908
|
+
output_path = Path(output_tasks)
|
|
2909
|
+
else:
|
|
2910
|
+
# Default to .adversarial/blocked-citations/
|
|
2911
|
+
output_dir = Path.cwd() / ".adversarial" / "blocked-citations"
|
|
2912
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
2913
|
+
base_name = Path(file_path).stem
|
|
2914
|
+
output_path = output_dir / f"{base_name}-blocked-urls.md"
|
|
2915
|
+
|
|
2916
|
+
task_content = generate_blocked_tasks(results, file_path, output_path)
|
|
2917
|
+
if task_content:
|
|
2918
|
+
print(f" 📋 Blocked URL tasks: {output_path}")
|
|
2919
|
+
|
|
2920
|
+
return 0
|
|
2921
|
+
|
|
2922
|
+
|
|
2822
2923
|
def main():
|
|
2823
2924
|
"""Main CLI entry point."""
|
|
2824
2925
|
import logging
|
|
@@ -2843,6 +2944,7 @@ def main():
|
|
|
2843
2944
|
|
|
2844
2945
|
from adversarial_workflow.evaluators import (
|
|
2845
2946
|
BUILTIN_EVALUATORS,
|
|
2947
|
+
discover_local_evaluators,
|
|
2846
2948
|
get_all_evaluators,
|
|
2847
2949
|
run_evaluator,
|
|
2848
2950
|
)
|
|
@@ -2858,10 +2960,12 @@ def main():
|
|
|
2858
2960
|
"health",
|
|
2859
2961
|
"quickstart",
|
|
2860
2962
|
"agent",
|
|
2963
|
+
"library",
|
|
2861
2964
|
"split",
|
|
2862
2965
|
"validate",
|
|
2863
2966
|
"review",
|
|
2864
2967
|
"list-evaluators",
|
|
2968
|
+
"check-citations",
|
|
2865
2969
|
}
|
|
2866
2970
|
|
|
2867
2971
|
parser = argparse.ArgumentParser(
|
|
@@ -2879,6 +2983,9 @@ Examples:
|
|
|
2879
2983
|
adversarial review # Review implementation
|
|
2880
2984
|
adversarial validate "npm test" # Validate with tests
|
|
2881
2985
|
adversarial split large-task.md # Split large files
|
|
2986
|
+
adversarial check-citations doc.md # Verify URLs in document
|
|
2987
|
+
adversarial library list # Browse available evaluators
|
|
2988
|
+
adversarial library install google/gemini-flash # Install evaluator
|
|
2882
2989
|
|
|
2883
2990
|
For more information: https://github.com/movito/adversarial-workflow
|
|
2884
2991
|
""",
|
|
@@ -2925,6 +3032,98 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
2925
3032
|
"--path", default=".", help="Project path (default: current directory)"
|
|
2926
3033
|
)
|
|
2927
3034
|
|
|
3035
|
+
# library command (with subcommands)
|
|
3036
|
+
library_parser = subparsers.add_parser(
|
|
3037
|
+
"library", help="Browse and install evaluators from the community library"
|
|
3038
|
+
)
|
|
3039
|
+
library_subparsers = library_parser.add_subparsers(
|
|
3040
|
+
dest="library_subcommand", help="Library subcommand"
|
|
3041
|
+
)
|
|
3042
|
+
|
|
3043
|
+
# library list subcommand
|
|
3044
|
+
library_list_parser = library_subparsers.add_parser(
|
|
3045
|
+
"list", help="List available evaluators from the library"
|
|
3046
|
+
)
|
|
3047
|
+
library_list_parser.add_argument(
|
|
3048
|
+
"--provider", "-p", help="Filter by provider (e.g., google, openai)"
|
|
3049
|
+
)
|
|
3050
|
+
library_list_parser.add_argument(
|
|
3051
|
+
"--category", "-c", help="Filter by category (e.g., quick-check, deep-reasoning)"
|
|
3052
|
+
)
|
|
3053
|
+
library_list_parser.add_argument(
|
|
3054
|
+
"--verbose", "-v", action="store_true", help="Show detailed information"
|
|
3055
|
+
)
|
|
3056
|
+
library_list_parser.add_argument(
|
|
3057
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3058
|
+
)
|
|
3059
|
+
|
|
3060
|
+
# library info subcommand
|
|
3061
|
+
library_info_parser = library_subparsers.add_parser(
|
|
3062
|
+
"info", help="Show detailed information about an evaluator"
|
|
3063
|
+
)
|
|
3064
|
+
library_info_parser.add_argument(
|
|
3065
|
+
"evaluator_spec", help="Evaluator to show info for (format: provider/name)"
|
|
3066
|
+
)
|
|
3067
|
+
|
|
3068
|
+
# library install subcommand
|
|
3069
|
+
library_install_parser = library_subparsers.add_parser(
|
|
3070
|
+
"install", help="Install evaluator(s) from the library"
|
|
3071
|
+
)
|
|
3072
|
+
library_install_parser.add_argument(
|
|
3073
|
+
"evaluators", nargs="*", help="Evaluator(s) to install (format: provider/name)"
|
|
3074
|
+
)
|
|
3075
|
+
library_install_parser.add_argument(
|
|
3076
|
+
"--force", "-f", action="store_true", help="Overwrite existing files"
|
|
3077
|
+
)
|
|
3078
|
+
library_install_parser.add_argument(
|
|
3079
|
+
"--skip-validation", action="store_true", help="Skip YAML validation (advanced)"
|
|
3080
|
+
)
|
|
3081
|
+
library_install_parser.add_argument(
|
|
3082
|
+
"--dry-run", action="store_true", help="Preview without making changes"
|
|
3083
|
+
)
|
|
3084
|
+
library_install_parser.add_argument("--category", help="Install all evaluators in a category")
|
|
3085
|
+
library_install_parser.add_argument(
|
|
3086
|
+
"--yes", "-y", action="store_true", help="Skip confirmation prompts (required for CI/CD)"
|
|
3087
|
+
)
|
|
3088
|
+
|
|
3089
|
+
# library check-updates subcommand
|
|
3090
|
+
library_check_parser = library_subparsers.add_parser(
|
|
3091
|
+
"check-updates", help="Check for updates to installed evaluators"
|
|
3092
|
+
)
|
|
3093
|
+
library_check_parser.add_argument(
|
|
3094
|
+
"name", nargs="?", help="Specific evaluator to check (optional)"
|
|
3095
|
+
)
|
|
3096
|
+
library_check_parser.add_argument(
|
|
3097
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3098
|
+
)
|
|
3099
|
+
|
|
3100
|
+
# library update subcommand
|
|
3101
|
+
library_update_parser = library_subparsers.add_parser(
|
|
3102
|
+
"update", help="Update installed evaluator(s) to newer versions"
|
|
3103
|
+
)
|
|
3104
|
+
library_update_parser.add_argument("name", nargs="?", help="Evaluator name to update")
|
|
3105
|
+
library_update_parser.add_argument(
|
|
3106
|
+
"--all",
|
|
3107
|
+
"-a",
|
|
3108
|
+
action="store_true",
|
|
3109
|
+
dest="all_evaluators",
|
|
3110
|
+
help="Update all outdated evaluators",
|
|
3111
|
+
)
|
|
3112
|
+
library_update_parser.add_argument(
|
|
3113
|
+
"--yes", "-y", action="store_true", help="Skip confirmation prompts"
|
|
3114
|
+
)
|
|
3115
|
+
library_update_parser.add_argument(
|
|
3116
|
+
"--diff-only", action="store_true", help="Show diff without applying changes"
|
|
3117
|
+
)
|
|
3118
|
+
library_update_parser.add_argument(
|
|
3119
|
+
"--dry-run",
|
|
3120
|
+
action="store_true",
|
|
3121
|
+
help="Preview without making changes (same as --diff-only)",
|
|
3122
|
+
)
|
|
3123
|
+
library_update_parser.add_argument(
|
|
3124
|
+
"--no-cache", action="store_true", help="Bypass cache and fetch fresh data"
|
|
3125
|
+
)
|
|
3126
|
+
|
|
2928
3127
|
# review command (static - reviews git changes, no file argument)
|
|
2929
3128
|
subparsers.add_parser("review", help="Run Phase 3: Code review")
|
|
2930
3129
|
|
|
@@ -2961,6 +3160,38 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
2961
3160
|
help="List all available evaluators (built-in and local)",
|
|
2962
3161
|
)
|
|
2963
3162
|
|
|
3163
|
+
# check-citations command
|
|
3164
|
+
citations_parser = subparsers.add_parser(
|
|
3165
|
+
"check-citations",
|
|
3166
|
+
help="Verify URLs in a document before evaluation",
|
|
3167
|
+
)
|
|
3168
|
+
citations_parser.add_argument("file", help="Document to check citations in")
|
|
3169
|
+
citations_parser.add_argument(
|
|
3170
|
+
"--output-tasks",
|
|
3171
|
+
"-o",
|
|
3172
|
+
help="Output file for blocked URL tasks (markdown)",
|
|
3173
|
+
)
|
|
3174
|
+
citations_parser.add_argument(
|
|
3175
|
+
"--mark-inline",
|
|
3176
|
+
action="store_true",
|
|
3177
|
+
default=False,
|
|
3178
|
+
help="Mark URLs inline with status badges (modifies document)",
|
|
3179
|
+
)
|
|
3180
|
+
citations_parser.add_argument(
|
|
3181
|
+
"--concurrency",
|
|
3182
|
+
"-c",
|
|
3183
|
+
type=int,
|
|
3184
|
+
default=10,
|
|
3185
|
+
help="Maximum concurrent URL checks (default: 10)",
|
|
3186
|
+
)
|
|
3187
|
+
citations_parser.add_argument(
|
|
3188
|
+
"--timeout",
|
|
3189
|
+
"-t",
|
|
3190
|
+
type=int,
|
|
3191
|
+
default=10,
|
|
3192
|
+
help="Timeout per URL in seconds (default: 10)",
|
|
3193
|
+
)
|
|
3194
|
+
|
|
2964
3195
|
# Dynamic evaluator registration
|
|
2965
3196
|
try:
|
|
2966
3197
|
evaluators = get_all_evaluators()
|
|
@@ -3009,6 +3240,20 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3009
3240
|
default=None,
|
|
3010
3241
|
help="Timeout in seconds (default: from evaluator config or 180, max: 600)",
|
|
3011
3242
|
)
|
|
3243
|
+
eval_parser.add_argument(
|
|
3244
|
+
"--check-citations",
|
|
3245
|
+
action="store_true",
|
|
3246
|
+
help="Verify URLs in document before evaluation",
|
|
3247
|
+
)
|
|
3248
|
+
# Add --evaluator flag for the "evaluate" command only
|
|
3249
|
+
# This allows selecting a library-installed evaluator
|
|
3250
|
+
if config.name == "evaluate":
|
|
3251
|
+
eval_parser.add_argument(
|
|
3252
|
+
"--evaluator",
|
|
3253
|
+
"-e",
|
|
3254
|
+
metavar="NAME",
|
|
3255
|
+
help="Use a specific evaluator from .adversarial/evaluators/",
|
|
3256
|
+
)
|
|
3012
3257
|
# Store config for later execution
|
|
3013
3258
|
eval_parser.set_defaults(evaluator_config=config)
|
|
3014
3259
|
|
|
@@ -3020,15 +3265,45 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3020
3265
|
|
|
3021
3266
|
# Check for evaluator command first (has evaluator_config attribute)
|
|
3022
3267
|
if hasattr(args, "evaluator_config"):
|
|
3268
|
+
# Default to the command's evaluator config
|
|
3269
|
+
config_to_use = args.evaluator_config
|
|
3270
|
+
|
|
3271
|
+
# Check if --evaluator flag was specified (only on evaluate command)
|
|
3272
|
+
evaluator_override = getattr(args, "evaluator", None)
|
|
3273
|
+
if evaluator_override:
|
|
3274
|
+
local_evaluators = discover_local_evaluators()
|
|
3275
|
+
|
|
3276
|
+
if not local_evaluators:
|
|
3277
|
+
print(f"{RED}Error: No evaluators installed.{RESET}")
|
|
3278
|
+
print("Install evaluators with: adversarial library install <name>")
|
|
3279
|
+
return 1
|
|
3280
|
+
|
|
3281
|
+
if evaluator_override not in local_evaluators:
|
|
3282
|
+
print(f"{RED}Error: Evaluator '{evaluator_override}' not found.{RESET}")
|
|
3283
|
+
print()
|
|
3284
|
+
print("Available evaluators:")
|
|
3285
|
+
# Show unique evaluators (avoid duplicates from aliases)
|
|
3286
|
+
seen = set()
|
|
3287
|
+
for _, cfg in sorted(local_evaluators.items()):
|
|
3288
|
+
if id(cfg) not in seen:
|
|
3289
|
+
print(f" {cfg.name}")
|
|
3290
|
+
if cfg.aliases:
|
|
3291
|
+
print(f" aliases: {', '.join(cfg.aliases)}")
|
|
3292
|
+
seen.add(id(cfg))
|
|
3293
|
+
return 1
|
|
3294
|
+
|
|
3295
|
+
config_to_use = local_evaluators[evaluator_override]
|
|
3296
|
+
print(f"Using evaluator: {config_to_use.name}")
|
|
3297
|
+
|
|
3023
3298
|
# Determine timeout: CLI flag > YAML config > default (180s)
|
|
3024
3299
|
if args.timeout is not None:
|
|
3025
3300
|
timeout = args.timeout
|
|
3026
3301
|
source = "CLI override"
|
|
3027
|
-
elif
|
|
3028
|
-
timeout =
|
|
3302
|
+
elif config_to_use.timeout != 180:
|
|
3303
|
+
timeout = config_to_use.timeout
|
|
3029
3304
|
source = "evaluator config"
|
|
3030
3305
|
else:
|
|
3031
|
-
timeout =
|
|
3306
|
+
timeout = config_to_use.timeout # 180 (default)
|
|
3032
3307
|
source = "default"
|
|
3033
3308
|
|
|
3034
3309
|
# Validate CLI timeout (consistent with YAML validation)
|
|
@@ -3044,8 +3319,18 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3044
3319
|
# Log actual timeout and source
|
|
3045
3320
|
print(f"Using timeout: {timeout}s ({source})")
|
|
3046
3321
|
|
|
3322
|
+
# Check citations first if requested (read-only, doesn't modify file)
|
|
3323
|
+
if getattr(args, "check_citations", False):
|
|
3324
|
+
print()
|
|
3325
|
+
result = check_citations(args.file, mark_inline=False)
|
|
3326
|
+
if result != 0:
|
|
3327
|
+
print(
|
|
3328
|
+
f"{YELLOW}Warning: Citation check had issues, continuing with evaluation...{RESET}"
|
|
3329
|
+
)
|
|
3330
|
+
print()
|
|
3331
|
+
|
|
3047
3332
|
return run_evaluator(
|
|
3048
|
-
|
|
3333
|
+
config_to_use,
|
|
3049
3334
|
args.file,
|
|
3050
3335
|
timeout=timeout,
|
|
3051
3336
|
)
|
|
@@ -3070,6 +3355,59 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3070
3355
|
print(f"{RED}Error: agent command requires a subcommand{RESET}")
|
|
3071
3356
|
print("Usage: adversarial agent onboard")
|
|
3072
3357
|
return 1
|
|
3358
|
+
elif args.command == "library":
|
|
3359
|
+
from adversarial_workflow.library import (
|
|
3360
|
+
library_check_updates,
|
|
3361
|
+
library_info,
|
|
3362
|
+
library_install,
|
|
3363
|
+
library_list,
|
|
3364
|
+
library_update,
|
|
3365
|
+
)
|
|
3366
|
+
|
|
3367
|
+
if args.library_subcommand == "list":
|
|
3368
|
+
return library_list(
|
|
3369
|
+
provider=args.provider,
|
|
3370
|
+
category=args.category,
|
|
3371
|
+
verbose=args.verbose,
|
|
3372
|
+
no_cache=args.no_cache,
|
|
3373
|
+
)
|
|
3374
|
+
elif args.library_subcommand == "info":
|
|
3375
|
+
return library_info(
|
|
3376
|
+
evaluator_spec=args.evaluator_spec,
|
|
3377
|
+
)
|
|
3378
|
+
elif args.library_subcommand == "install":
|
|
3379
|
+
return library_install(
|
|
3380
|
+
evaluator_specs=args.evaluators,
|
|
3381
|
+
force=args.force,
|
|
3382
|
+
skip_validation=args.skip_validation,
|
|
3383
|
+
dry_run=args.dry_run,
|
|
3384
|
+
category=args.category,
|
|
3385
|
+
yes=args.yes,
|
|
3386
|
+
)
|
|
3387
|
+
elif args.library_subcommand == "check-updates":
|
|
3388
|
+
return library_check_updates(
|
|
3389
|
+
name=args.name,
|
|
3390
|
+
no_cache=args.no_cache,
|
|
3391
|
+
)
|
|
3392
|
+
elif args.library_subcommand == "update":
|
|
3393
|
+
return library_update(
|
|
3394
|
+
name=args.name,
|
|
3395
|
+
all_evaluators=args.all_evaluators,
|
|
3396
|
+
yes=args.yes,
|
|
3397
|
+
diff_only=args.diff_only,
|
|
3398
|
+
no_cache=args.no_cache,
|
|
3399
|
+
dry_run=args.dry_run,
|
|
3400
|
+
)
|
|
3401
|
+
else:
|
|
3402
|
+
# No subcommand provided
|
|
3403
|
+
print(f"{RED}Error: library command requires a subcommand{RESET}")
|
|
3404
|
+
print("Usage:")
|
|
3405
|
+
print(" adversarial library list")
|
|
3406
|
+
print(" adversarial library info <provider>/<name>")
|
|
3407
|
+
print(" adversarial library install <provider>/<name>")
|
|
3408
|
+
print(" adversarial library check-updates")
|
|
3409
|
+
print(" adversarial library update <name>")
|
|
3410
|
+
return 1
|
|
3073
3411
|
elif args.command == "review":
|
|
3074
3412
|
return review()
|
|
3075
3413
|
elif args.command == "validate":
|
|
@@ -3083,6 +3421,14 @@ For more information: https://github.com/movito/adversarial-workflow
|
|
|
3083
3421
|
)
|
|
3084
3422
|
elif args.command == "list-evaluators":
|
|
3085
3423
|
return list_evaluators()
|
|
3424
|
+
elif args.command == "check-citations":
|
|
3425
|
+
return check_citations(
|
|
3426
|
+
args.file,
|
|
3427
|
+
output_tasks=args.output_tasks,
|
|
3428
|
+
mark_inline=args.mark_inline,
|
|
3429
|
+
concurrency=args.concurrency,
|
|
3430
|
+
timeout=args.timeout,
|
|
3431
|
+
)
|
|
3086
3432
|
else:
|
|
3087
3433
|
parser.print_help()
|
|
3088
3434
|
return 1
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
-
"""Evaluators module for adversarial-workflow plugin architecture.
|
|
1
|
+
"""Evaluators module for adversarial-workflow plugin architecture.
|
|
2
|
+
|
|
3
|
+
Supports dual-field model specification (ADV-0015):
|
|
4
|
+
- Legacy: model + api_key_env fields (backwards compatible)
|
|
5
|
+
- New: model_requirement field (resolved via ModelResolver)
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from .builtins import BUILTIN_EVALUATORS
|
|
4
|
-
from .config import EvaluatorConfig
|
|
9
|
+
from .config import EvaluatorConfig, ModelRequirement
|
|
5
10
|
from .discovery import (
|
|
6
11
|
EvaluatorParseError,
|
|
7
12
|
discover_local_evaluators,
|
|
8
13
|
parse_evaluator_yaml,
|
|
9
14
|
)
|
|
15
|
+
from .resolver import ModelResolver, ResolutionError
|
|
10
16
|
from .runner import run_evaluator
|
|
11
17
|
|
|
12
18
|
|
|
@@ -38,6 +44,9 @@ def get_all_evaluators() -> dict[str, EvaluatorConfig]:
|
|
|
38
44
|
__all__ = [
|
|
39
45
|
"EvaluatorConfig",
|
|
40
46
|
"EvaluatorParseError",
|
|
47
|
+
"ModelRequirement",
|
|
48
|
+
"ModelResolver",
|
|
49
|
+
"ResolutionError",
|
|
41
50
|
"run_evaluator",
|
|
42
51
|
"get_all_evaluators",
|
|
43
52
|
"discover_local_evaluators",
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""
|
|
2
2
|
EvaluatorConfig dataclass for evaluator definitions.
|
|
3
|
+
|
|
4
|
+
Supports dual-field model specification (ADV-0015):
|
|
5
|
+
- Legacy: model + api_key_env fields (backwards compatible)
|
|
6
|
+
- New: model_requirement field (structured capability requirements)
|
|
3
7
|
"""
|
|
4
8
|
|
|
5
9
|
from __future__ import annotations
|
|
@@ -7,6 +11,27 @@ from __future__ import annotations
|
|
|
7
11
|
from dataclasses import dataclass, field
|
|
8
12
|
|
|
9
13
|
|
|
14
|
+
@dataclass
|
|
15
|
+
class ModelRequirement:
|
|
16
|
+
"""Model capability requirements (from library).
|
|
17
|
+
|
|
18
|
+
This dataclass represents structured model requirements that can be
|
|
19
|
+
resolved to actual model IDs via the ModelResolver. It separates
|
|
20
|
+
WHAT capability is needed from HOW to access it.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
family: Model family (e.g., "claude", "gpt", "o", "gemini", "mistral", "codestral", "llama")
|
|
24
|
+
tier: Performance tier (e.g., "opus", "sonnet", "haiku", "flagship", "mini", "latest")
|
|
25
|
+
min_version: Optional minimum model generation (e.g., "4" for Claude 4+)
|
|
26
|
+
min_context: Optional minimum context window in tokens (e.g., 128000)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
family: str
|
|
30
|
+
tier: str
|
|
31
|
+
min_version: str = ""
|
|
32
|
+
min_context: int = 0
|
|
33
|
+
|
|
34
|
+
|
|
10
35
|
@dataclass
|
|
11
36
|
class EvaluatorConfig:
|
|
12
37
|
"""Configuration for an evaluator (built-in or custom).
|
|
@@ -15,11 +40,18 @@ class EvaluatorConfig:
|
|
|
15
40
|
whether built-in (evaluate, proofread, review) or custom
|
|
16
41
|
(defined in .adversarial/evaluators/*.yml).
|
|
17
42
|
|
|
43
|
+
Supports dual-field model specification (ADV-0015):
|
|
44
|
+
- Legacy: model + api_key_env fields (always backwards compatible)
|
|
45
|
+
- New: model_requirement field (resolved via ModelResolver)
|
|
46
|
+
|
|
47
|
+
When both are present, model_requirement takes precedence. If resolution
|
|
48
|
+
fails, falls back to legacy model field with a warning.
|
|
49
|
+
|
|
18
50
|
Attributes:
|
|
19
51
|
name: Command name (e.g., "evaluate", "athena")
|
|
20
52
|
description: Help text shown in CLI
|
|
21
|
-
model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro")
|
|
22
|
-
api_key_env: Environment variable name for API key
|
|
53
|
+
model: Model to use (e.g., "gpt-4o", "gemini-2.5-pro") - legacy field
|
|
54
|
+
api_key_env: Environment variable name for API key - legacy field
|
|
23
55
|
prompt: The evaluation prompt template
|
|
24
56
|
output_suffix: Log file suffix (e.g., "PLAN-EVALUATION")
|
|
25
57
|
log_prefix: CLI output prefix (e.g., "ATHENA")
|
|
@@ -27,6 +59,7 @@ class EvaluatorConfig:
|
|
|
27
59
|
aliases: Alternative command names
|
|
28
60
|
version: Evaluator version
|
|
29
61
|
timeout: Timeout in seconds (default: 180, max: 600)
|
|
62
|
+
model_requirement: Structured model requirement (resolved via ModelResolver)
|
|
30
63
|
source: "builtin" or "local" (set internally)
|
|
31
64
|
config_file: Path to YAML file if local (set internally)
|
|
32
65
|
"""
|
|
@@ -46,6 +79,10 @@ class EvaluatorConfig:
|
|
|
46
79
|
version: str = "1.0.0"
|
|
47
80
|
timeout: int = 180 # Timeout in seconds (default: 180, max: 600)
|
|
48
81
|
|
|
82
|
+
# NEW: Structured model requirement (Phase 1 - ADV-0015)
|
|
83
|
+
# When present, resolved via ModelResolver to actual model ID
|
|
84
|
+
model_requirement: ModelRequirement | None = None
|
|
85
|
+
|
|
49
86
|
# Metadata (set internally during discovery, not from YAML)
|
|
50
87
|
source: str = "builtin"
|
|
51
88
|
config_file: str | None = None
|