adversarial-workflow 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adversarial_workflow/__init__.py +1 -1
- adversarial_workflow/cli.py +127 -237
- adversarial_workflow/evaluators/__init__.py +45 -0
- adversarial_workflow/evaluators/builtins.py +36 -0
- adversarial_workflow/evaluators/config.py +49 -0
- adversarial_workflow/evaluators/discovery.py +212 -0
- adversarial_workflow/evaluators/runner.py +313 -0
- adversarial_workflow/utils/__init__.py +17 -0
- adversarial_workflow/utils/colors.py +9 -0
- adversarial_workflow/utils/config.py +44 -0
- adversarial_workflow/utils/file_splitter.py +378 -0
- adversarial_workflow/utils/validation.py +76 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/METADATA +61 -1
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/RECORD +18 -8
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/WHEEL +1 -1
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/entry_points.txt +0 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""File splitting utility for large task specifications.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to split large markdown files into smaller,
|
|
4
|
+
independently evaluable chunks to work around OpenAI's rate limits.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def analyze_task_file(file_path: str) -> Dict[str, Any]:
|
|
14
|
+
"""Analyze file structure and suggest split points.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
file_path: Path to the markdown file to analyze
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Dict containing:
|
|
21
|
+
- total_lines: Total number of lines
|
|
22
|
+
- sections: List of detected sections with metadata
|
|
23
|
+
- estimated_tokens: Rough token estimate (lines * 4)
|
|
24
|
+
- suggested_splits: List of suggested split points
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
FileNotFoundError: If file doesn't exist
|
|
28
|
+
ValueError: If file is empty or too small
|
|
29
|
+
"""
|
|
30
|
+
if not os.path.exists(file_path):
|
|
31
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
32
|
+
|
|
33
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
34
|
+
content = f.read()
|
|
35
|
+
|
|
36
|
+
if not content.strip():
|
|
37
|
+
raise ValueError("File is empty or too small")
|
|
38
|
+
|
|
39
|
+
lines = content.split('\n')
|
|
40
|
+
total_lines = len(lines)
|
|
41
|
+
|
|
42
|
+
# Detect markdown sections
|
|
43
|
+
sections = []
|
|
44
|
+
current_section = None
|
|
45
|
+
current_start = 1
|
|
46
|
+
|
|
47
|
+
for i, line in enumerate(lines, 1):
|
|
48
|
+
# Check for markdown headings (# or ##)
|
|
49
|
+
if re.match(r'^#+\s+', line.strip()):
|
|
50
|
+
# Close previous section
|
|
51
|
+
if current_section is not None:
|
|
52
|
+
current_section['end_line'] = i - 1
|
|
53
|
+
current_section['line_count'] = current_section['end_line'] - current_section['start_line'] + 1
|
|
54
|
+
sections.append(current_section)
|
|
55
|
+
|
|
56
|
+
# Start new section
|
|
57
|
+
heading_level = len(line.lstrip().split()[0]) # Count # characters
|
|
58
|
+
title = re.sub(r'^#+\s+', '', line.strip())
|
|
59
|
+
current_section = {
|
|
60
|
+
'title': title,
|
|
61
|
+
'heading_level': heading_level,
|
|
62
|
+
'start_line': i,
|
|
63
|
+
'end_line': None,
|
|
64
|
+
'line_count': 0
|
|
65
|
+
}
|
|
66
|
+
current_start = i
|
|
67
|
+
|
|
68
|
+
# Close final section
|
|
69
|
+
if current_section is not None:
|
|
70
|
+
current_section['end_line'] = total_lines
|
|
71
|
+
current_section['line_count'] = current_section['end_line'] - current_section['start_line'] + 1
|
|
72
|
+
sections.append(current_section)
|
|
73
|
+
|
|
74
|
+
# If no sections found, treat entire file as one section
|
|
75
|
+
if not sections:
|
|
76
|
+
sections = [{
|
|
77
|
+
'title': 'Full Document',
|
|
78
|
+
'heading_level': 1,
|
|
79
|
+
'start_line': 1,
|
|
80
|
+
'end_line': total_lines,
|
|
81
|
+
'line_count': total_lines
|
|
82
|
+
}]
|
|
83
|
+
|
|
84
|
+
# Estimate tokens (rough approximation: 1 line ≈ 4 tokens)
|
|
85
|
+
estimated_tokens = total_lines * 4
|
|
86
|
+
|
|
87
|
+
# Suggest splits if file is large
|
|
88
|
+
suggested_splits = []
|
|
89
|
+
if total_lines > 500:
|
|
90
|
+
# Suggest section-based splits
|
|
91
|
+
suggested_splits = _suggest_section_splits(sections, max_lines=500)
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
'total_lines': total_lines,
|
|
95
|
+
'sections': sections,
|
|
96
|
+
'estimated_tokens': estimated_tokens,
|
|
97
|
+
'suggested_splits': suggested_splits
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def split_by_sections(content: str, max_lines: int = 500) -> List[Dict[str, Any]]:
|
|
102
|
+
"""Split file by markdown sections.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
content: The markdown content to split
|
|
106
|
+
max_lines: Maximum lines per split
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of split dictionaries with metadata
|
|
110
|
+
"""
|
|
111
|
+
lines = content.split('\n')
|
|
112
|
+
total_lines = len(lines)
|
|
113
|
+
|
|
114
|
+
if total_lines <= max_lines:
|
|
115
|
+
return [{
|
|
116
|
+
'content': content,
|
|
117
|
+
'title': 'Full Document',
|
|
118
|
+
'start_line': 1,
|
|
119
|
+
'end_line': total_lines,
|
|
120
|
+
'line_count': total_lines
|
|
121
|
+
}]
|
|
122
|
+
|
|
123
|
+
splits = []
|
|
124
|
+
current_split_lines = []
|
|
125
|
+
current_start = 1
|
|
126
|
+
current_title = "Part"
|
|
127
|
+
split_count = 1
|
|
128
|
+
|
|
129
|
+
for i, line in enumerate(lines, 1):
|
|
130
|
+
current_split_lines.append(line)
|
|
131
|
+
|
|
132
|
+
# Check if we hit a section boundary and are near limit
|
|
133
|
+
is_section_boundary = re.match(r'^#+\s+', line.strip())
|
|
134
|
+
approaching_limit = len(current_split_lines) >= max_lines * 0.8
|
|
135
|
+
|
|
136
|
+
if len(current_split_lines) >= max_lines or (is_section_boundary and approaching_limit):
|
|
137
|
+
# Create split
|
|
138
|
+
split_content = '\n'.join(current_split_lines)
|
|
139
|
+
splits.append({
|
|
140
|
+
'content': split_content,
|
|
141
|
+
'title': f"Part {split_count}",
|
|
142
|
+
'start_line': current_start,
|
|
143
|
+
'end_line': i,
|
|
144
|
+
'line_count': len(current_split_lines)
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
# Reset for next split
|
|
148
|
+
current_split_lines = []
|
|
149
|
+
current_start = i + 1
|
|
150
|
+
split_count += 1
|
|
151
|
+
|
|
152
|
+
# Handle remaining lines
|
|
153
|
+
if current_split_lines:
|
|
154
|
+
split_content = '\n'.join(current_split_lines)
|
|
155
|
+
splits.append({
|
|
156
|
+
'content': split_content,
|
|
157
|
+
'title': f"Part {split_count}",
|
|
158
|
+
'start_line': current_start,
|
|
159
|
+
'end_line': total_lines,
|
|
160
|
+
'line_count': len(current_split_lines)
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
return splits
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def split_by_phases(content: str) -> List[Dict[str, Any]]:
|
|
167
|
+
"""Split file by implementation phases.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
content: The markdown content to split
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
List of split dictionaries, one per phase
|
|
174
|
+
"""
|
|
175
|
+
lines = content.split('\n')
|
|
176
|
+
splits = []
|
|
177
|
+
current_split_lines = []
|
|
178
|
+
current_phase = None
|
|
179
|
+
current_start = 1
|
|
180
|
+
|
|
181
|
+
for i, line in enumerate(lines, 1):
|
|
182
|
+
# Check for phase markers
|
|
183
|
+
phase_match = re.search(r'#+\s+Phase\s+(\d+)', line, re.IGNORECASE)
|
|
184
|
+
|
|
185
|
+
if phase_match:
|
|
186
|
+
# Close previous split
|
|
187
|
+
if current_split_lines:
|
|
188
|
+
split_content = '\n'.join(current_split_lines)
|
|
189
|
+
title = f"Phase {current_phase}" if current_phase else "Overview"
|
|
190
|
+
splits.append({
|
|
191
|
+
'content': split_content,
|
|
192
|
+
'title': title,
|
|
193
|
+
'phase_number': current_phase,
|
|
194
|
+
'start_line': current_start,
|
|
195
|
+
'end_line': i - 1,
|
|
196
|
+
'line_count': len(current_split_lines)
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
# Start new split
|
|
200
|
+
current_phase = int(phase_match.group(1))
|
|
201
|
+
current_split_lines = [line]
|
|
202
|
+
current_start = i
|
|
203
|
+
else:
|
|
204
|
+
current_split_lines.append(line)
|
|
205
|
+
|
|
206
|
+
# Handle final split
|
|
207
|
+
if current_split_lines:
|
|
208
|
+
split_content = '\n'.join(current_split_lines)
|
|
209
|
+
title = f"Phase {current_phase}" if current_phase else "Full Document"
|
|
210
|
+
phase_info = {'phase_number': current_phase} if current_phase else {}
|
|
211
|
+
splits.append({
|
|
212
|
+
'content': split_content,
|
|
213
|
+
'title': title,
|
|
214
|
+
'start_line': current_start,
|
|
215
|
+
'end_line': len(lines),
|
|
216
|
+
'line_count': len(current_split_lines),
|
|
217
|
+
**phase_info
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
# If no phases found, return entire content
|
|
221
|
+
if not splits:
|
|
222
|
+
splits = [{
|
|
223
|
+
'content': content,
|
|
224
|
+
'title': 'Full Document',
|
|
225
|
+
'start_line': 1,
|
|
226
|
+
'end_line': len(lines),
|
|
227
|
+
'line_count': len(lines)
|
|
228
|
+
}]
|
|
229
|
+
|
|
230
|
+
return splits
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def split_at_lines(content: str, line_numbers: List[int]) -> List[Dict[str, Any]]:
|
|
234
|
+
"""Split at specified line numbers.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
content: The content to split
|
|
238
|
+
line_numbers: Line numbers where splits should occur
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
List of split dictionaries
|
|
242
|
+
"""
|
|
243
|
+
lines = content.split('\n')
|
|
244
|
+
total_lines = len(lines)
|
|
245
|
+
|
|
246
|
+
if not line_numbers:
|
|
247
|
+
return [{
|
|
248
|
+
'content': content,
|
|
249
|
+
'title': 'Full Document',
|
|
250
|
+
'start_line': 1,
|
|
251
|
+
'end_line': total_lines,
|
|
252
|
+
'line_count': total_lines
|
|
253
|
+
}]
|
|
254
|
+
|
|
255
|
+
# Sort and deduplicate line numbers
|
|
256
|
+
split_points = sorted(set(line_numbers))
|
|
257
|
+
|
|
258
|
+
splits = []
|
|
259
|
+
current_start = 1
|
|
260
|
+
|
|
261
|
+
for split_line in split_points:
|
|
262
|
+
if split_line >= total_lines:
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# Create split from current_start to split_line
|
|
266
|
+
split_lines = lines[current_start - 1:split_line]
|
|
267
|
+
split_content = '\n'.join(split_lines)
|
|
268
|
+
|
|
269
|
+
splits.append({
|
|
270
|
+
'content': split_content,
|
|
271
|
+
'title': f"Lines {current_start}-{split_line}",
|
|
272
|
+
'start_line': current_start,
|
|
273
|
+
'end_line': split_line,
|
|
274
|
+
'line_count': len(split_lines)
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
current_start = split_line + 1
|
|
278
|
+
|
|
279
|
+
# Handle remaining lines after final split
|
|
280
|
+
if current_start <= total_lines:
|
|
281
|
+
remaining_lines = lines[current_start - 1:]
|
|
282
|
+
split_content = '\n'.join(remaining_lines)
|
|
283
|
+
|
|
284
|
+
splits.append({
|
|
285
|
+
'content': split_content,
|
|
286
|
+
'title': f"Lines {current_start}-{total_lines}",
|
|
287
|
+
'start_line': current_start,
|
|
288
|
+
'end_line': total_lines,
|
|
289
|
+
'line_count': len(remaining_lines)
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
return splits
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def generate_split_files(original: str, splits: List[Dict[str, Any]], output_dir: str) -> List[str]:
|
|
296
|
+
"""Generate split files with metadata and cross-references.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
original: Original filename
|
|
300
|
+
splits: List of split dictionaries
|
|
301
|
+
output_dir: Directory to write split files
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of created file paths
|
|
305
|
+
"""
|
|
306
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
307
|
+
|
|
308
|
+
created_files = []
|
|
309
|
+
original_name = Path(original).stem
|
|
310
|
+
original_ext = Path(original).suffix
|
|
311
|
+
|
|
312
|
+
for i, split in enumerate(splits, 1):
|
|
313
|
+
# Generate filename
|
|
314
|
+
filename = f"{original_name}-part{i}{original_ext}"
|
|
315
|
+
file_path = os.path.join(output_dir, filename)
|
|
316
|
+
|
|
317
|
+
# Create content with metadata header
|
|
318
|
+
metadata_header = f"""<!-- Split from {original} -->
|
|
319
|
+
<!-- Part {i} of {len(splits)} -->
|
|
320
|
+
<!-- Lines {split['start_line']}-{split['end_line']} ({split['line_count']} lines) -->
|
|
321
|
+
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
full_content = metadata_header + split['content']
|
|
325
|
+
|
|
326
|
+
# Write file
|
|
327
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
328
|
+
f.write(full_content)
|
|
329
|
+
|
|
330
|
+
created_files.append(file_path)
|
|
331
|
+
|
|
332
|
+
return created_files
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _suggest_section_splits(sections: List[Dict[str, Any]], max_lines: int = 500) -> List[Dict[str, Any]]:
|
|
336
|
+
"""Suggest optimal split points based on sections.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
sections: List of section metadata
|
|
340
|
+
max_lines: Maximum lines per split
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
List of suggested split configurations
|
|
344
|
+
"""
|
|
345
|
+
suggestions = []
|
|
346
|
+
current_chunk_lines = 0
|
|
347
|
+
current_chunk_sections = []
|
|
348
|
+
|
|
349
|
+
for section in sections:
|
|
350
|
+
section_lines = section['line_count']
|
|
351
|
+
|
|
352
|
+
# If adding this section would exceed limit, finish current chunk
|
|
353
|
+
if current_chunk_lines + section_lines > max_lines and current_chunk_sections:
|
|
354
|
+
suggestions.append({
|
|
355
|
+
'sections': current_chunk_sections.copy(),
|
|
356
|
+
'total_lines': current_chunk_lines,
|
|
357
|
+
'start_line': current_chunk_sections[0]['start_line'],
|
|
358
|
+
'end_line': current_chunk_sections[-1]['end_line']
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
# Start new chunk
|
|
362
|
+
current_chunk_sections = [section]
|
|
363
|
+
current_chunk_lines = section_lines
|
|
364
|
+
else:
|
|
365
|
+
# Add section to current chunk
|
|
366
|
+
current_chunk_sections.append(section)
|
|
367
|
+
current_chunk_lines += section_lines
|
|
368
|
+
|
|
369
|
+
# Add final chunk
|
|
370
|
+
if current_chunk_sections:
|
|
371
|
+
suggestions.append({
|
|
372
|
+
'sections': current_chunk_sections,
|
|
373
|
+
'total_lines': current_chunk_lines,
|
|
374
|
+
'start_line': current_chunk_sections[0]['start_line'],
|
|
375
|
+
'end_line': current_chunk_sections[-1]['end_line']
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
return suggestions
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Output validation utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def validate_evaluation_output(
|
|
10
|
+
log_file_path: str,
|
|
11
|
+
) -> tuple[bool, str | None, str]:
|
|
12
|
+
"""
|
|
13
|
+
Validate that evaluation log contains actual evaluation content.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
log_file_path: Path to the evaluation log file
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
(is_valid, verdict, message):
|
|
20
|
+
- is_valid: True if valid evaluation, False if failed
|
|
21
|
+
- verdict: "APPROVED", "NEEDS_REVISION", "REJECTED", or None
|
|
22
|
+
- message: Descriptive message about validation result
|
|
23
|
+
"""
|
|
24
|
+
if not os.path.exists(log_file_path):
|
|
25
|
+
return False, None, f"Log file not found: {log_file_path}"
|
|
26
|
+
|
|
27
|
+
with open(log_file_path) as f:
|
|
28
|
+
content = f.read()
|
|
29
|
+
|
|
30
|
+
# Check minimum content size
|
|
31
|
+
if len(content) < 500:
|
|
32
|
+
return (
|
|
33
|
+
False,
|
|
34
|
+
None,
|
|
35
|
+
f"Log file too small ({len(content)} bytes) - evaluation likely failed",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Check for evaluation markers (case-insensitive)
|
|
39
|
+
content_lower = content.lower()
|
|
40
|
+
evaluation_markers = [
|
|
41
|
+
"verdict:",
|
|
42
|
+
"approved",
|
|
43
|
+
"needs_revision",
|
|
44
|
+
"rejected",
|
|
45
|
+
"evaluation summary",
|
|
46
|
+
"strengths",
|
|
47
|
+
"concerns",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
has_evaluation_content = any(marker in content_lower for marker in evaluation_markers)
|
|
51
|
+
if not has_evaluation_content:
|
|
52
|
+
return (
|
|
53
|
+
False,
|
|
54
|
+
None,
|
|
55
|
+
"Log file missing evaluation content - no verdict or analysis found",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Extract verdict
|
|
59
|
+
verdict = None
|
|
60
|
+
verdict_patterns = [
|
|
61
|
+
r"Verdict:\s*(APPROVED|NEEDS_REVISION|REJECTED)",
|
|
62
|
+
r"\*\*Verdict\*\*:\s*(APPROVED|NEEDS_REVISION|REJECTED)",
|
|
63
|
+
r"^(APPROVED|NEEDS_REVISION|REJECTED)\s*$",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
for pattern in verdict_patterns:
|
|
67
|
+
match = re.search(pattern, content, re.MULTILINE | re.IGNORECASE)
|
|
68
|
+
if match:
|
|
69
|
+
verdict = match.group(1).upper()
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
if verdict:
|
|
73
|
+
return True, verdict, f"Valid evaluation with verdict: {verdict}"
|
|
74
|
+
else:
|
|
75
|
+
# Has content but no clear verdict
|
|
76
|
+
return True, None, "Evaluation complete (verdict not detected)"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: adversarial-workflow
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Multi-stage AI code review system preventing phantom work - Author/Evaluator pattern
|
|
5
5
|
Author: Fredrik Matheson
|
|
6
6
|
License: MIT
|
|
@@ -382,8 +382,68 @@ adversarial split task.md # Split large files into smaller parts
|
|
|
382
382
|
adversarial split task.md --dry-run # Preview split without creating files
|
|
383
383
|
adversarial review # Phase 3: Review implementation
|
|
384
384
|
adversarial validate "pytest" # Phase 4: Validate with tests
|
|
385
|
+
adversarial list-evaluators # List all available evaluators
|
|
385
386
|
```
|
|
386
387
|
|
|
388
|
+
## Custom Evaluators
|
|
389
|
+
|
|
390
|
+
Starting with v0.6.0, you can define project-specific evaluators without modifying the package.
|
|
391
|
+
|
|
392
|
+
### Creating a Custom Evaluator
|
|
393
|
+
|
|
394
|
+
1. Create the evaluators directory:
|
|
395
|
+
```bash
|
|
396
|
+
mkdir -p .adversarial/evaluators
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
2. Create a YAML definition:
|
|
400
|
+
```yaml
|
|
401
|
+
# .adversarial/evaluators/athena.yml
|
|
402
|
+
name: athena
|
|
403
|
+
description: Knowledge evaluation using Gemini 2.5 Pro
|
|
404
|
+
model: gemini-2.5-pro
|
|
405
|
+
api_key_env: GEMINI_API_KEY
|
|
406
|
+
output_suffix: KNOWLEDGE-EVALUATION
|
|
407
|
+
prompt: |
|
|
408
|
+
You are Athena, a knowledge evaluation specialist...
|
|
409
|
+
|
|
410
|
+
# Optional
|
|
411
|
+
aliases:
|
|
412
|
+
- knowledge
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
3. Use it like any built-in evaluator:
|
|
416
|
+
```bash
|
|
417
|
+
adversarial athena docs/research-plan.md
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### Evaluator YAML Schema
|
|
421
|
+
|
|
422
|
+
| Field | Required | Description |
|
|
423
|
+
|-------|----------|-------------|
|
|
424
|
+
| `name` | Yes | Command name |
|
|
425
|
+
| `description` | Yes | Help text shown in CLI |
|
|
426
|
+
| `model` | Yes | Model to use (e.g., `gpt-4o`, `gemini-2.5-pro`) |
|
|
427
|
+
| `api_key_env` | Yes | Environment variable for API key |
|
|
428
|
+
| `output_suffix` | Yes | Log file suffix (e.g., `KNOWLEDGE-EVAL`) |
|
|
429
|
+
| `prompt` | Yes | The evaluation prompt |
|
|
430
|
+
| `aliases` | No | Alternative command names |
|
|
431
|
+
| `log_prefix` | No | CLI output prefix |
|
|
432
|
+
| `fallback_model` | No | Fallback model if primary fails |
|
|
433
|
+
| `version` | No | Evaluator version (default: 1.0.0) |
|
|
434
|
+
|
|
435
|
+
### Listing Available Evaluators
|
|
436
|
+
|
|
437
|
+
```bash
|
|
438
|
+
adversarial list-evaluators
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
### Example: Athena Knowledge Evaluator
|
|
442
|
+
|
|
443
|
+
See [docs/examples/athena.yml](docs/examples/athena.yml) for a complete example of a knowledge-focused evaluator using Gemini 2.5 Pro.
|
|
444
|
+
|
|
445
|
+
For full documentation on custom evaluators, see [docs/CUSTOM_EVALUATORS.md](docs/CUSTOM_EVALUATORS.md).
|
|
446
|
+
|
|
387
447
|
## Configuration
|
|
388
448
|
|
|
389
449
|
### Option 1: YAML Config (persistent)
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
-
adversarial_workflow/__init__.py,sha256=
|
|
1
|
+
adversarial_workflow/__init__.py,sha256=0e_PxWBE6XJGkei7qZJRwQhWx51NyDoCPHJTn5upXmY,596
|
|
2
2
|
adversarial_workflow/__main__.py,sha256=Ibb0CngDCh4mpCe8Zxnf3kyKnMddBxQy2JAk_kfTUMQ,119
|
|
3
|
-
adversarial_workflow/cli.py,sha256=
|
|
3
|
+
adversarial_workflow/cli.py,sha256=ssE7xXFgY_0SRvLFGbb38trmxqwXLT1m5jo3VZKToxA,109198
|
|
4
|
+
adversarial_workflow/evaluators/__init__.py,sha256=vB4gGaoP46a-ZLOeoVKjR6WohAsgeif4JMhaak9AIPo,1266
|
|
5
|
+
adversarial_workflow/evaluators/builtins.py,sha256=u5LokYLe8ruEW2tunhOQaNSkpcZ9Ee2IeTkaC0dZDSY,1102
|
|
6
|
+
adversarial_workflow/evaluators/config.py,sha256=05qYPIiIpCxXBVJzs70WQQLxi8I7MedfhE_oydXEcq0,1520
|
|
7
|
+
adversarial_workflow/evaluators/discovery.py,sha256=V5vyFLKfh3Q9MVEipWMdD0tzsW3xC3RttVS_oEeWIb8,6801
|
|
8
|
+
adversarial_workflow/evaluators/runner.py,sha256=27fdz49wdkNyBfdjr9VcZZA53rw270XYQWp_2y5s0PU,9244
|
|
4
9
|
adversarial_workflow/templates/.aider.conf.yml.template,sha256=jT2jWIgsnmS3HLhoQWMTO3GV07bUcsT2keYw60jqiDw,183
|
|
5
10
|
adversarial_workflow/templates/.env.example.template,sha256=TmTlcgz44uZqIbqgXqdfHMl-0vVn96F_EGNohClFkb8,1821
|
|
6
11
|
adversarial_workflow/templates/README.template,sha256=FQAMPO99eIt_kgQfwhGHcrK736rm_MEvWSbPnqBSjAE,1349
|
|
@@ -15,9 +20,14 @@ adversarial_workflow/templates/agent-context/README.md.template,sha256=gF31N-s_t
|
|
|
15
20
|
adversarial_workflow/templates/agent-context/agent-handoffs-minimal.json.template,sha256=DCSi4NFl0z0OwQxIw6FnCIVPTXz3AvSvl76ZkIZKWC4,1466
|
|
16
21
|
adversarial_workflow/templates/agent-context/agent-handoffs.json.template,sha256=rdRX79xdNKyTGd8_g6pvSdQK5VC5sDiErq_OZFslSXI,3904
|
|
17
22
|
adversarial_workflow/templates/agent-context/current-state.json.template,sha256=UH3SQGjXGNzScqHnQcPrhrI9ZvjQBC3oBp4S9TilzoY,2325
|
|
18
|
-
adversarial_workflow
|
|
19
|
-
adversarial_workflow
|
|
20
|
-
adversarial_workflow
|
|
21
|
-
adversarial_workflow
|
|
22
|
-
adversarial_workflow
|
|
23
|
-
adversarial_workflow-0.
|
|
23
|
+
adversarial_workflow/utils/__init__.py,sha256=Pnm-a_jqoMVOxHdvVWXeVrL0IKI-zkY7EAdbQmZAkSI,352
|
|
24
|
+
adversarial_workflow/utils/colors.py,sha256=uRrG6KfIDBLo0F5_vPwms9NCm9-x8YXBiyZ4naCr868,160
|
|
25
|
+
adversarial_workflow/utils/config.py,sha256=NBoC_-YYukEVo6BgpX2cDyeqV-3tnn_sHNU9L1AuSLQ,1341
|
|
26
|
+
adversarial_workflow/utils/file_splitter.py,sha256=rVRMHJgzJ7uNiytimqbBY8PAr-SevXdRqUpr4xf6LdM,12061
|
|
27
|
+
adversarial_workflow/utils/validation.py,sha256=0QfuRd-kurcadUCd9XQvO-N8RsmLp6ONQnc0vaQTUBA,2188
|
|
28
|
+
adversarial_workflow-0.6.0.dist-info/licenses/LICENSE,sha256=M-dOQlre-NmicyPa55hYOJUW8roGpCKEgtq-z0z1KCA,1073
|
|
29
|
+
adversarial_workflow-0.6.0.dist-info/METADATA,sha256=2GWjMoaxNkF9PF16I4QN0BDTupwoXtT_6uZpxW_wvTI,28799
|
|
30
|
+
adversarial_workflow-0.6.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
31
|
+
adversarial_workflow-0.6.0.dist-info/entry_points.txt,sha256=9H-iZ-yF1uKZ8P0G1suc6kWR0NvK7uPZJbhN7nvt1sE,62
|
|
32
|
+
adversarial_workflow-0.6.0.dist-info/top_level.txt,sha256=8irutNxLRjUbTlzfAibIpz7_ovkkF2h8ES69NQpv24c,21
|
|
33
|
+
adversarial_workflow-0.6.0.dist-info/RECORD,,
|
{adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{adversarial_workflow-0.5.0.dist-info → adversarial_workflow-0.6.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|