adversarial-workflow 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/cli.py +127 -237
- adversarial_workflow/evaluators/__init__.py +45 -0
- adversarial_workflow/evaluators/builtins.py +36 -0
- adversarial_workflow/evaluators/config.py +49 -0
- adversarial_workflow/evaluators/discovery.py +212 -0
- adversarial_workflow/evaluators/runner.py +313 -0
- adversarial_workflow/utils/__init__.py +17 -0
- adversarial_workflow/utils/colors.py +9 -0
- adversarial_workflow/utils/config.py +44 -0
- adversarial_workflow/utils/file_splitter.py +378 -0
- adversarial_workflow/utils/validation.py +76 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/METADATA +61 -1
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/RECORD +18 -8
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/WHEEL +1 -1
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YAML parsing and discovery for custom evaluators.
|
|
3
|
+
|
|
4
|
+
This module handles discovering evaluator definitions from
|
|
5
|
+
.adversarial/evaluators/*.yml files and parsing them into
|
|
6
|
+
EvaluatorConfig objects.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
17
|
+
from .config import EvaluatorConfig
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EvaluatorParseError(Exception):
|
|
23
|
+
"""Raised when evaluator YAML is invalid."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def parse_evaluator_yaml(yml_file: Path) -> EvaluatorConfig:
|
|
27
|
+
"""Parse a YAML file into an EvaluatorConfig.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
yml_file: Path to the YAML file
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
EvaluatorConfig instance
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
EvaluatorParseError: If YAML is invalid or missing required fields
|
|
37
|
+
yaml.YAMLError: If YAML syntax is invalid
|
|
38
|
+
"""
|
|
39
|
+
# Read file with explicit UTF-8 encoding
|
|
40
|
+
try:
|
|
41
|
+
content = yml_file.read_text(encoding="utf-8")
|
|
42
|
+
except UnicodeDecodeError as e:
|
|
43
|
+
raise EvaluatorParseError(
|
|
44
|
+
f"File encoding error (not UTF-8): {yml_file}"
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
# Parse YAML
|
|
48
|
+
data = yaml.safe_load(content)
|
|
49
|
+
|
|
50
|
+
# Check for empty YAML
|
|
51
|
+
if data is None or (isinstance(data, str) and not data.strip()):
|
|
52
|
+
raise EvaluatorParseError(f"Empty or invalid YAML file: {yml_file}")
|
|
53
|
+
|
|
54
|
+
# Ensure parsed data is a dict (YAML can parse scalars, lists, etc.)
|
|
55
|
+
if not isinstance(data, dict):
|
|
56
|
+
raise EvaluatorParseError(
|
|
57
|
+
f"YAML must be a mapping, got {type(data).__name__}: {yml_file}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Validate required fields exist
|
|
61
|
+
required = ["name", "description", "model", "api_key_env", "prompt", "output_suffix"]
|
|
62
|
+
missing = [f for f in required if f not in data]
|
|
63
|
+
if missing:
|
|
64
|
+
raise EvaluatorParseError(f"Missing required fields: {', '.join(missing)}")
|
|
65
|
+
|
|
66
|
+
# Validate required fields are strings (YAML can parse 'yes' as bool, '123' as int)
|
|
67
|
+
for field in required:
|
|
68
|
+
value = data[field]
|
|
69
|
+
if not isinstance(value, str):
|
|
70
|
+
raise EvaluatorParseError(
|
|
71
|
+
f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Validate name format (valid CLI command name)
|
|
75
|
+
name = data["name"]
|
|
76
|
+
if not re.match(r"^[a-zA-Z][a-zA-Z0-9_-]*$", name):
|
|
77
|
+
raise EvaluatorParseError(
|
|
78
|
+
f"Invalid evaluator name '{name}': must start with letter, "
|
|
79
|
+
"contain only letters, numbers, hyphens, underscores"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Normalize aliases (handle None, string, or list)
|
|
83
|
+
aliases = data.get("aliases")
|
|
84
|
+
if aliases is None:
|
|
85
|
+
data["aliases"] = []
|
|
86
|
+
elif isinstance(aliases, str):
|
|
87
|
+
data["aliases"] = [aliases]
|
|
88
|
+
elif not isinstance(aliases, list):
|
|
89
|
+
raise EvaluatorParseError(
|
|
90
|
+
f"aliases must be string or list, got {type(aliases).__name__}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Validate alias names - must be strings with valid format
|
|
94
|
+
for alias in data.get("aliases", []):
|
|
95
|
+
if not isinstance(alias, str):
|
|
96
|
+
raise EvaluatorParseError(
|
|
97
|
+
f"Alias must be a string, got {type(alias).__name__}: {alias!r}"
|
|
98
|
+
)
|
|
99
|
+
if not re.match(r"^[a-zA-Z][a-zA-Z0-9_-]*$", alias):
|
|
100
|
+
raise EvaluatorParseError(
|
|
101
|
+
f"Invalid alias '{alias}': must start with letter, "
|
|
102
|
+
"contain only letters, numbers, hyphens, underscores"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Validate prompt is non-empty
|
|
106
|
+
prompt = data.get("prompt", "")
|
|
107
|
+
if not prompt or not prompt.strip():
|
|
108
|
+
raise EvaluatorParseError("prompt cannot be empty")
|
|
109
|
+
|
|
110
|
+
# Validate optional string fields if present (YAML can parse '2' as int, 'yes' as bool)
|
|
111
|
+
optional_string_fields = ["log_prefix", "fallback_model", "version"]
|
|
112
|
+
for field in optional_string_fields:
|
|
113
|
+
if field in data and data[field] is not None:
|
|
114
|
+
value = data[field]
|
|
115
|
+
if not isinstance(value, str):
|
|
116
|
+
raise EvaluatorParseError(
|
|
117
|
+
f"Field '{field}' must be a string, got {type(value).__name__}: {value!r}"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Filter to known fields only (log unknown fields)
|
|
121
|
+
known_fields = {
|
|
122
|
+
"name",
|
|
123
|
+
"description",
|
|
124
|
+
"model",
|
|
125
|
+
"api_key_env",
|
|
126
|
+
"prompt",
|
|
127
|
+
"output_suffix",
|
|
128
|
+
"log_prefix",
|
|
129
|
+
"fallback_model",
|
|
130
|
+
"aliases",
|
|
131
|
+
"version",
|
|
132
|
+
}
|
|
133
|
+
unknown = set(data.keys()) - known_fields
|
|
134
|
+
if unknown:
|
|
135
|
+
logger.warning(
|
|
136
|
+
"Unknown fields in %s: %s", yml_file.name, ", ".join(sorted(unknown))
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Build filtered data dict
|
|
140
|
+
filtered_data = {k: v for k, v in data.items() if k in known_fields}
|
|
141
|
+
|
|
142
|
+
# Create config with metadata
|
|
143
|
+
config = EvaluatorConfig(
|
|
144
|
+
**filtered_data,
|
|
145
|
+
source="local",
|
|
146
|
+
config_file=str(yml_file),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return config
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def discover_local_evaluators(
|
|
153
|
+
base_path: Path | None = None,
|
|
154
|
+
) -> dict[str, EvaluatorConfig]:
|
|
155
|
+
"""Discover evaluators from .adversarial/evaluators/*.yml
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
base_path: Project root (default: current directory)
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dict mapping evaluator name (and aliases) to EvaluatorConfig
|
|
162
|
+
"""
|
|
163
|
+
if base_path is None:
|
|
164
|
+
base_path = Path.cwd()
|
|
165
|
+
|
|
166
|
+
evaluators: dict[str, EvaluatorConfig] = {}
|
|
167
|
+
local_dir = base_path / ".adversarial" / "evaluators"
|
|
168
|
+
|
|
169
|
+
if not local_dir.exists():
|
|
170
|
+
return evaluators
|
|
171
|
+
|
|
172
|
+
# Get yml files with error handling for permission/access issues
|
|
173
|
+
try:
|
|
174
|
+
yml_files = sorted(local_dir.glob("*.yml"))
|
|
175
|
+
except OSError as e:
|
|
176
|
+
logger.warning("Could not read evaluators directory: %s", e)
|
|
177
|
+
return evaluators
|
|
178
|
+
|
|
179
|
+
for yml_file in yml_files:
|
|
180
|
+
try:
|
|
181
|
+
config = parse_evaluator_yaml(yml_file)
|
|
182
|
+
|
|
183
|
+
# Check for name conflicts
|
|
184
|
+
if config.name in evaluators:
|
|
185
|
+
logger.warning(
|
|
186
|
+
"Evaluator '%s' in %s conflicts with existing; skipping",
|
|
187
|
+
config.name,
|
|
188
|
+
yml_file.name,
|
|
189
|
+
)
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
# Register primary name
|
|
193
|
+
evaluators[config.name] = config
|
|
194
|
+
|
|
195
|
+
# Register aliases (point to same config object)
|
|
196
|
+
for alias in config.aliases:
|
|
197
|
+
if alias in evaluators:
|
|
198
|
+
logger.warning(
|
|
199
|
+
"Alias '%s' conflicts with existing evaluator; skipping alias",
|
|
200
|
+
alias,
|
|
201
|
+
)
|
|
202
|
+
continue
|
|
203
|
+
evaluators[alias] = config
|
|
204
|
+
|
|
205
|
+
except EvaluatorParseError as e:
|
|
206
|
+
logger.warning("Skipping %s: %s", yml_file.name, e)
|
|
207
|
+
except yaml.YAMLError as e:
|
|
208
|
+
logger.warning("Skipping %s: YAML syntax error: %s", yml_file.name, e)
|
|
209
|
+
except OSError as e:
|
|
210
|
+
logger.warning("Could not load %s: %s", yml_file.name, e)
|
|
211
|
+
|
|
212
|
+
return evaluators
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Generic evaluator runner."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .config import EvaluatorConfig
|
|
14
|
+
from ..utils.colors import RESET, BOLD, GREEN, YELLOW, RED
|
|
15
|
+
from ..utils.config import load_config
|
|
16
|
+
from ..utils.validation import validate_evaluation_output
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run_evaluator(config: EvaluatorConfig, file_path: str, timeout: int = 180) -> int:
|
|
20
|
+
"""Run an evaluator on a file.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
config: Evaluator configuration
|
|
24
|
+
file_path: Path to file to evaluate
|
|
25
|
+
timeout: Timeout in seconds (default: 180)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
0 on success, non-zero on failure
|
|
29
|
+
"""
|
|
30
|
+
prefix = config.log_prefix or config.name.upper()
|
|
31
|
+
print(f"{prefix}: Evaluating {file_path}")
|
|
32
|
+
print()
|
|
33
|
+
|
|
34
|
+
# 1. Validate file exists
|
|
35
|
+
if not os.path.exists(file_path):
|
|
36
|
+
print(f"{RED}Error: File not found: {file_path}{RESET}")
|
|
37
|
+
return 1
|
|
38
|
+
|
|
39
|
+
# 2. Load project config (check initialization first)
|
|
40
|
+
config_path = Path(".adversarial/config.yml")
|
|
41
|
+
if not config_path.exists():
|
|
42
|
+
print(f"{RED}Error: Not initialized. Run 'adversarial init' first.{RESET}")
|
|
43
|
+
return 1
|
|
44
|
+
project_config = load_config()
|
|
45
|
+
|
|
46
|
+
# 3. Check aider available
|
|
47
|
+
if not shutil.which("aider"):
|
|
48
|
+
print(f"{RED}Error: Aider not found{RESET}")
|
|
49
|
+
_print_aider_help()
|
|
50
|
+
return 1
|
|
51
|
+
|
|
52
|
+
# 4. Check API key
|
|
53
|
+
api_key = os.environ.get(config.api_key_env)
|
|
54
|
+
if not api_key:
|
|
55
|
+
print(f"{RED}Error: {config.api_key_env} not set{RESET}")
|
|
56
|
+
print(f" Set in .env or export {config.api_key_env}=your-key")
|
|
57
|
+
return 1
|
|
58
|
+
|
|
59
|
+
# 5. Pre-flight file size check
|
|
60
|
+
line_count, estimated_tokens = _check_file_size(file_path)
|
|
61
|
+
if line_count > 500 or estimated_tokens > 20000:
|
|
62
|
+
_warn_large_file(line_count, estimated_tokens)
|
|
63
|
+
if line_count > 700:
|
|
64
|
+
if not _confirm_continue():
|
|
65
|
+
print("Evaluation cancelled.")
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
# 6. Determine execution method
|
|
69
|
+
if config.source == "builtin":
|
|
70
|
+
return _run_builtin_evaluator(config, file_path, project_config, timeout)
|
|
71
|
+
else:
|
|
72
|
+
return _run_custom_evaluator(config, file_path, project_config, timeout)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _run_builtin_evaluator(
|
|
76
|
+
config: EvaluatorConfig,
|
|
77
|
+
file_path: str,
|
|
78
|
+
project_config: dict,
|
|
79
|
+
timeout: int,
|
|
80
|
+
) -> int:
|
|
81
|
+
"""Run a built-in evaluator using existing shell scripts."""
|
|
82
|
+
script_map = {
|
|
83
|
+
"evaluate": ".adversarial/scripts/evaluate_plan.sh",
|
|
84
|
+
"proofread": ".adversarial/scripts/proofread_content.sh",
|
|
85
|
+
"review": ".adversarial/scripts/code_review.sh",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
script = script_map.get(config.name)
|
|
89
|
+
if not script or not os.path.exists(script):
|
|
90
|
+
print(f"{RED}Error: Script not found: {script}{RESET}")
|
|
91
|
+
print(" Fix: Run 'adversarial init' to reinstall scripts")
|
|
92
|
+
return 1
|
|
93
|
+
|
|
94
|
+
return _execute_script(script, file_path, config, project_config, timeout)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _run_custom_evaluator(
|
|
98
|
+
config: EvaluatorConfig,
|
|
99
|
+
file_path: str,
|
|
100
|
+
project_config: dict,
|
|
101
|
+
timeout: int,
|
|
102
|
+
) -> int:
|
|
103
|
+
"""Run a custom evaluator by invoking aider directly."""
|
|
104
|
+
# Prepare output path
|
|
105
|
+
logs_dir = Path(project_config["log_directory"])
|
|
106
|
+
logs_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
|
|
108
|
+
file_basename = Path(file_path).stem
|
|
109
|
+
output_file = logs_dir / f"{file_basename}-{config.output_suffix}.md"
|
|
110
|
+
|
|
111
|
+
# Read input file
|
|
112
|
+
file_content = Path(file_path).read_text()
|
|
113
|
+
|
|
114
|
+
# Build full prompt
|
|
115
|
+
full_prompt = f"""{config.prompt}
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Document to Evaluate
|
|
120
|
+
|
|
121
|
+
**File**: {file_path}
|
|
122
|
+
|
|
123
|
+
{file_content}
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
# Create temp file for prompt
|
|
127
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
128
|
+
f.write(full_prompt)
|
|
129
|
+
prompt_file = f.name
|
|
130
|
+
|
|
131
|
+
prefix = config.log_prefix or config.name.upper()
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
print(f"{prefix}: Using model {config.model}")
|
|
135
|
+
|
|
136
|
+
# Build aider command
|
|
137
|
+
cmd = [
|
|
138
|
+
"aider",
|
|
139
|
+
"--model", config.model,
|
|
140
|
+
"--yes",
|
|
141
|
+
"--no-git",
|
|
142
|
+
"--no-auto-commits",
|
|
143
|
+
"--message-file", prompt_file,
|
|
144
|
+
"--read", file_path,
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
result = subprocess.run(
|
|
148
|
+
cmd,
|
|
149
|
+
capture_output=True,
|
|
150
|
+
text=True,
|
|
151
|
+
timeout=timeout,
|
|
152
|
+
env=os.environ,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Check for errors
|
|
156
|
+
output = result.stdout + result.stderr
|
|
157
|
+
if "RateLimitError" in output or "tokens per min" in output:
|
|
158
|
+
_print_rate_limit_error(file_path)
|
|
159
|
+
return 1
|
|
160
|
+
|
|
161
|
+
# Write output
|
|
162
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
|
163
|
+
header = f"""# {config.output_suffix.replace('-', ' ').replace('_', ' ').title()}
|
|
164
|
+
|
|
165
|
+
**Source**: {file_path}
|
|
166
|
+
**Evaluator**: {config.name}
|
|
167
|
+
**Model**: {config.model}
|
|
168
|
+
**Generated**: {timestamp}
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
"""
|
|
173
|
+
output_file.write_text(header + result.stdout)
|
|
174
|
+
|
|
175
|
+
print(f"{prefix}: Output written to {output_file}")
|
|
176
|
+
|
|
177
|
+
# Validate output and determine verdict
|
|
178
|
+
is_valid, verdict, message = validate_evaluation_output(str(output_file))
|
|
179
|
+
|
|
180
|
+
if not is_valid:
|
|
181
|
+
print(f"{RED}Evaluation failed: {message}{RESET}")
|
|
182
|
+
return 1
|
|
183
|
+
|
|
184
|
+
return _report_verdict(verdict, output_file, config)
|
|
185
|
+
|
|
186
|
+
except subprocess.TimeoutExpired:
|
|
187
|
+
_print_timeout_error(timeout)
|
|
188
|
+
return 1
|
|
189
|
+
except FileNotFoundError:
|
|
190
|
+
_print_platform_error()
|
|
191
|
+
return 1
|
|
192
|
+
finally:
|
|
193
|
+
Path(prompt_file).unlink(missing_ok=True)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _execute_script(
|
|
197
|
+
script: str,
|
|
198
|
+
file_path: str,
|
|
199
|
+
config: EvaluatorConfig,
|
|
200
|
+
project_config: dict,
|
|
201
|
+
timeout: int,
|
|
202
|
+
) -> int:
|
|
203
|
+
"""Execute a shell script evaluator."""
|
|
204
|
+
try:
|
|
205
|
+
result = subprocess.run(
|
|
206
|
+
[script, file_path],
|
|
207
|
+
text=True,
|
|
208
|
+
capture_output=True,
|
|
209
|
+
timeout=timeout,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Check for rate limit errors
|
|
213
|
+
output = result.stdout + result.stderr
|
|
214
|
+
if "RateLimitError" in output or "tokens per min" in output:
|
|
215
|
+
_print_rate_limit_error(file_path)
|
|
216
|
+
return 1
|
|
217
|
+
|
|
218
|
+
except subprocess.TimeoutExpired:
|
|
219
|
+
_print_timeout_error(timeout)
|
|
220
|
+
return 1
|
|
221
|
+
except FileNotFoundError:
|
|
222
|
+
_print_platform_error()
|
|
223
|
+
return 1
|
|
224
|
+
|
|
225
|
+
# Validate output
|
|
226
|
+
file_basename = Path(file_path).stem
|
|
227
|
+
log_file = Path(project_config["log_directory"]) / f"{file_basename}-{config.output_suffix}.md"
|
|
228
|
+
|
|
229
|
+
is_valid, verdict, message = validate_evaluation_output(str(log_file))
|
|
230
|
+
|
|
231
|
+
if not is_valid:
|
|
232
|
+
print(f"{RED}Evaluation failed: {message}{RESET}")
|
|
233
|
+
return 1
|
|
234
|
+
|
|
235
|
+
return _report_verdict(verdict, log_file, config)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _report_verdict(verdict: str | None, log_file: Path, config: EvaluatorConfig) -> int:
|
|
239
|
+
"""Report the evaluation verdict to terminal."""
|
|
240
|
+
print()
|
|
241
|
+
if verdict == "APPROVED":
|
|
242
|
+
print(f"{GREEN}Evaluation APPROVED!{RESET}")
|
|
243
|
+
print(f" Review output: {log_file}")
|
|
244
|
+
return 0
|
|
245
|
+
elif verdict == "NEEDS_REVISION":
|
|
246
|
+
print(f"{YELLOW}Evaluation NEEDS_REVISION{RESET}")
|
|
247
|
+
print(f" Details: {log_file}")
|
|
248
|
+
return 1
|
|
249
|
+
elif verdict == "REJECTED":
|
|
250
|
+
print(f"{RED}Evaluation REJECTED{RESET}")
|
|
251
|
+
print(f" Details: {log_file}")
|
|
252
|
+
return 1
|
|
253
|
+
else:
|
|
254
|
+
print(f"{YELLOW}Evaluation complete (verdict: {verdict}){RESET}")
|
|
255
|
+
print(f" Review output: {log_file}")
|
|
256
|
+
return 0
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# Helper functions
|
|
260
|
+
def _check_file_size(file_path: str) -> tuple[int, int]:
|
|
261
|
+
"""Return (line_count, estimated_tokens)."""
|
|
262
|
+
with open(file_path, "r") as f:
|
|
263
|
+
lines = f.readlines()
|
|
264
|
+
f.seek(0)
|
|
265
|
+
content = f.read()
|
|
266
|
+
return len(lines), len(content) // 4
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _warn_large_file(line_count: int, tokens: int) -> None:
|
|
270
|
+
"""Print large file warning."""
|
|
271
|
+
print(f"{YELLOW}Large file detected:{RESET}")
|
|
272
|
+
print(f" Lines: {line_count:,}")
|
|
273
|
+
print(f" Estimated tokens: ~{tokens:,}")
|
|
274
|
+
print()
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _confirm_continue() -> bool:
|
|
278
|
+
"""Ask user to confirm continuing with large file."""
|
|
279
|
+
response = input("Continue anyway? [y/N]: ").strip().lower()
|
|
280
|
+
return response in ["y", "yes"]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _print_aider_help() -> None:
|
|
284
|
+
"""Print aider installation help."""
|
|
285
|
+
print()
|
|
286
|
+
print(f"{BOLD}FIX:{RESET}")
|
|
287
|
+
print(" 1. Install aider: pip install aider-chat")
|
|
288
|
+
print(" 2. Verify: aider --version")
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _print_rate_limit_error(file_path: str) -> None:
|
|
292
|
+
"""Print rate limit error with suggestions."""
|
|
293
|
+
print(f"{RED}Error: API rate limit exceeded{RESET}")
|
|
294
|
+
print()
|
|
295
|
+
print(f"{BOLD}SOLUTIONS:{RESET}")
|
|
296
|
+
print(" 1. Split into smaller documents (<500 lines)")
|
|
297
|
+
print(" 2. Upgrade your API tier")
|
|
298
|
+
print(" 3. Wait and retry")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _print_timeout_error(timeout: int) -> None:
|
|
302
|
+
"""Print timeout error."""
|
|
303
|
+
print(f"{RED}Error: Evaluation timed out (>{timeout}s){RESET}")
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _print_platform_error() -> None:
|
|
307
|
+
"""Print platform compatibility error."""
|
|
308
|
+
if platform.system() == "Windows":
|
|
309
|
+
print(f"{RED}Error: Windows not supported{RESET}")
|
|
310
|
+
print(" Use WSL (Windows Subsystem for Linux)")
|
|
311
|
+
else:
|
|
312
|
+
print(f"{RED}Error: Script not found{RESET}")
|
|
313
|
+
print(" Run: adversarial init")
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Shared utilities for adversarial-workflow."""
|
|
2
|
+
|
|
3
|
+
from .colors import BOLD, CYAN, GRAY, GREEN, RED, RESET, YELLOW
|
|
4
|
+
from .config import load_config
|
|
5
|
+
from .validation import validate_evaluation_output
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"BOLD",
|
|
9
|
+
"CYAN",
|
|
10
|
+
"GRAY",
|
|
11
|
+
"GREEN",
|
|
12
|
+
"RED",
|
|
13
|
+
"RESET",
|
|
14
|
+
"YELLOW",
|
|
15
|
+
"load_config",
|
|
16
|
+
"validate_evaluation_output",
|
|
17
|
+
]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Configuration loading utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_config(config_path: str = ".adversarial/config.yml") -> dict[str, Any]:
|
|
12
|
+
"""Load configuration from YAML file with environment variable overrides."""
|
|
13
|
+
# Default configuration
|
|
14
|
+
config: dict[str, Any] = {
|
|
15
|
+
"evaluator_model": "gpt-4o",
|
|
16
|
+
"task_directory": "tasks/",
|
|
17
|
+
"test_command": "pytest",
|
|
18
|
+
"log_directory": ".adversarial/logs/",
|
|
19
|
+
"artifacts_directory": ".adversarial/artifacts/",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
# Load from file if exists
|
|
23
|
+
if os.path.exists(config_path):
|
|
24
|
+
with open(config_path) as f:
|
|
25
|
+
file_config = yaml.safe_load(f) or {}
|
|
26
|
+
if not isinstance(file_config, dict):
|
|
27
|
+
raise ValueError(
|
|
28
|
+
f"Config file must be a mapping, got {type(file_config).__name__}"
|
|
29
|
+
)
|
|
30
|
+
config.update(file_config)
|
|
31
|
+
|
|
32
|
+
# Override with environment variables
|
|
33
|
+
env_overrides = {
|
|
34
|
+
"ADVERSARIAL_EVALUATOR_MODEL": "evaluator_model",
|
|
35
|
+
"ADVERSARIAL_TEST_COMMAND": "test_command",
|
|
36
|
+
"ADVERSARIAL_LOG_DIR": "log_directory",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for env_var, config_key in env_overrides.items():
|
|
40
|
+
value = os.getenv(env_var)
|
|
41
|
+
if value:
|
|
42
|
+
config[config_key] = value
|
|
43
|
+
|
|
44
|
+
return config
|