fleet-python 0.2.66b2__py3-none-any.whl → 0.2.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/export_tasks.py +16 -5
- examples/export_tasks_filtered.py +245 -0
- examples/fetch_tasks.py +230 -0
- examples/import_tasks.py +140 -8
- examples/iterate_verifiers.py +725 -0
- fleet/__init__.py +128 -5
- fleet/_async/__init__.py +27 -3
- fleet/_async/base.py +24 -9
- fleet/_async/client.py +938 -41
- fleet/_async/env/client.py +60 -3
- fleet/_async/instance/client.py +52 -7
- fleet/_async/models.py +15 -0
- fleet/_async/resources/api.py +200 -0
- fleet/_async/resources/sqlite.py +1801 -46
- fleet/_async/tasks.py +122 -25
- fleet/_async/verifiers/bundler.py +22 -21
- fleet/_async/verifiers/verifier.py +25 -19
- fleet/agent/__init__.py +32 -0
- fleet/agent/gemini_cua/Dockerfile +45 -0
- fleet/agent/gemini_cua/__init__.py +10 -0
- fleet/agent/gemini_cua/agent.py +759 -0
- fleet/agent/gemini_cua/mcp/main.py +108 -0
- fleet/agent/gemini_cua/mcp_server/__init__.py +5 -0
- fleet/agent/gemini_cua/mcp_server/main.py +105 -0
- fleet/agent/gemini_cua/mcp_server/tools.py +178 -0
- fleet/agent/gemini_cua/requirements.txt +5 -0
- fleet/agent/gemini_cua/start.sh +30 -0
- fleet/agent/orchestrator.py +854 -0
- fleet/agent/types.py +49 -0
- fleet/agent/utils.py +34 -0
- fleet/base.py +34 -9
- fleet/cli.py +1061 -0
- fleet/client.py +1060 -48
- fleet/config.py +1 -1
- fleet/env/__init__.py +16 -0
- fleet/env/client.py +60 -3
- fleet/eval/__init__.py +15 -0
- fleet/eval/uploader.py +231 -0
- fleet/exceptions.py +8 -0
- fleet/instance/client.py +53 -8
- fleet/instance/models.py +1 -0
- fleet/models.py +303 -0
- fleet/proxy/__init__.py +25 -0
- fleet/proxy/proxy.py +453 -0
- fleet/proxy/whitelist.py +244 -0
- fleet/resources/api.py +200 -0
- fleet/resources/sqlite.py +1845 -46
- fleet/tasks.py +113 -20
- fleet/utils/__init__.py +7 -0
- fleet/utils/http_logging.py +178 -0
- fleet/utils/logging.py +13 -0
- fleet/utils/playwright.py +440 -0
- fleet/verifiers/bundler.py +22 -21
- fleet/verifiers/db.py +985 -1
- fleet/verifiers/decorator.py +1 -1
- fleet/verifiers/verifier.py +25 -19
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/METADATA +28 -1
- fleet_python-0.2.105.dist-info/RECORD +115 -0
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/WHEEL +1 -1
- fleet_python-0.2.105.dist-info/entry_points.txt +2 -0
- tests/test_app_method.py +85 -0
- tests/test_expect_exactly.py +4148 -0
- tests/test_expect_only.py +2593 -0
- tests/test_instance_dispatch.py +607 -0
- tests/test_sqlite_resource_dual_mode.py +263 -0
- tests/test_sqlite_shared_memory_behavior.py +117 -0
- fleet_python-0.2.66b2.dist-info/RECORD +0 -81
- tests/test_verifier_security.py +0 -427
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/licenses/LICENSE +0 -0
- {fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Dict, Tuple, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Marker for storing leading content (docstrings, imports) in the Python file
|
|
9
|
+
LEADING_CONTENT_START = "# @LEADING_CONTENT_START"
|
|
10
|
+
LEADING_CONTENT_END = "# @LEADING_CONTENT_END"
|
|
11
|
+
# Legacy markers for backwards compatibility
|
|
12
|
+
LEADING_DOCSTRING_START = "# @LEADING_DOCSTRING_START"
|
|
13
|
+
LEADING_DOCSTRING_END = "# @LEADING_DOCSTRING_END"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def extract_function_info(function_code: str) -> Optional[Tuple[str, bool]]:
|
|
17
|
+
"""
|
|
18
|
+
Extract function name and async status from Python function code.
|
|
19
|
+
Handles both regular functions (def) and async functions (async def).
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
function_code: Python function code as a string
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
A tuple of (function_name, is_async) if found, None otherwise
|
|
26
|
+
"""
|
|
27
|
+
# Normalize escaped newlines and strip common Markdown code fences
|
|
28
|
+
code = function_code.replace("\\n", "\n").strip()
|
|
29
|
+
if "```" in code:
|
|
30
|
+
# Extract the first fenced block if present
|
|
31
|
+
fence_blocks = re.findall(r"```[a-zA-Z0-9_+-]*\n([\s\S]*?)\n```", code)
|
|
32
|
+
if fence_blocks:
|
|
33
|
+
code = fence_blocks[0].strip()
|
|
34
|
+
|
|
35
|
+
# Remove leading decorators (keep them for regex but allow preceding lines)
|
|
36
|
+
# Robust regex: allow optional decorators and whitespace before the def
|
|
37
|
+
pattern = r"^\s*(?:@[\w\.\n+() ,]*\n\s*)*(async\s+)?def\s+([A-Za-z_]\w*)\s*\("
|
|
38
|
+
|
|
39
|
+
match = re.search(pattern, code, flags=re.MULTILINE)
|
|
40
|
+
if match:
|
|
41
|
+
is_async = match.group(1) is not None
|
|
42
|
+
function_name = match.group(2)
|
|
43
|
+
return (function_name, is_async)
|
|
44
|
+
|
|
45
|
+
# Fallback: search anywhere (not anchored) for a def signature
|
|
46
|
+
fallback = r"(async\s+)?def\s+([A-Za-z_]\w*)\s*\("
|
|
47
|
+
match = re.search(fallback, code)
|
|
48
|
+
if match:
|
|
49
|
+
is_async = match.group(1) is not None
|
|
50
|
+
function_name = match.group(2)
|
|
51
|
+
return (function_name, is_async)
|
|
52
|
+
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def extract_leading_content(code: str) -> Tuple[Optional[str], str]:
|
|
57
|
+
"""
|
|
58
|
+
Extract leading content (docstrings, imports, etc.) before the main function definition.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
code: The verifier code
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple of (leading_content or None, function_code)
|
|
65
|
+
"""
|
|
66
|
+
code = code.strip()
|
|
67
|
+
|
|
68
|
+
# Find the first top-level function definition (def or async def at column 0)
|
|
69
|
+
# We need to find "def " or "async def " that's not indented
|
|
70
|
+
lines = code.split("\n")
|
|
71
|
+
func_start_idx = None
|
|
72
|
+
|
|
73
|
+
for i, line in enumerate(lines):
|
|
74
|
+
# Check for unindented def or async def
|
|
75
|
+
if line.startswith("def ") or line.startswith("async def "):
|
|
76
|
+
func_start_idx = i
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
if func_start_idx is None or func_start_idx == 0:
|
|
80
|
+
# No leading content or function not found
|
|
81
|
+
return (None, code)
|
|
82
|
+
|
|
83
|
+
# Everything before the function is leading content
|
|
84
|
+
leading_lines = lines[:func_start_idx]
|
|
85
|
+
func_lines = lines[func_start_idx:]
|
|
86
|
+
|
|
87
|
+
# Clean up leading content - remove empty lines at the end
|
|
88
|
+
while leading_lines and not leading_lines[-1].strip():
|
|
89
|
+
leading_lines.pop()
|
|
90
|
+
|
|
91
|
+
if not leading_lines:
|
|
92
|
+
return (None, code)
|
|
93
|
+
|
|
94
|
+
leading_content = "\n".join(leading_lines)
|
|
95
|
+
function_code = "\n".join(func_lines)
|
|
96
|
+
|
|
97
|
+
return (leading_content, function_code)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def clean_verifier_code(code: str) -> Tuple[str, Optional[str]]:
|
|
101
|
+
"""
|
|
102
|
+
Clean verifier code by removing markdown code fences and extracting leading content.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
code: Raw verifier code string
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Tuple of (function code, leading_content or None)
|
|
109
|
+
"""
|
|
110
|
+
code = code.strip()
|
|
111
|
+
|
|
112
|
+
# Remove markdown code fences if present
|
|
113
|
+
if "```" in code:
|
|
114
|
+
fence_blocks = re.findall(r"```[a-zA-Z0-9_+-]*\n([\s\S]*?)\n```", code)
|
|
115
|
+
if fence_blocks:
|
|
116
|
+
code = fence_blocks[0].strip()
|
|
117
|
+
|
|
118
|
+
# Extract leading content (docstrings, imports, etc.) if present
|
|
119
|
+
leading_content, code = extract_leading_content(code)
|
|
120
|
+
|
|
121
|
+
return (code, leading_content)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def format_leading_content_as_comment(content: str) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Format leading content (docstrings, imports, etc.) as a comment block with markers.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
content: The leading content (docstrings, imports, etc.)
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Formatted comment block
|
|
133
|
+
"""
|
|
134
|
+
lines = [LEADING_CONTENT_START]
|
|
135
|
+
|
|
136
|
+
for line in content.split("\n"):
|
|
137
|
+
# Prefix each line with "# |" to preserve exact content including empty lines
|
|
138
|
+
lines.append(f"# |{line}")
|
|
139
|
+
|
|
140
|
+
lines.append(LEADING_CONTENT_END)
|
|
141
|
+
return "\n".join(lines)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def parse_leading_content_from_comments(comment_block: str) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Parse leading content from a comment block with markers.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
comment_block: The comment block between markers
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Reconstructed leading content
|
|
153
|
+
"""
|
|
154
|
+
lines = []
|
|
155
|
+
for line in comment_block.split("\n"):
|
|
156
|
+
# Remove "# |" prefix (new format)
|
|
157
|
+
if line.startswith("# |"):
|
|
158
|
+
lines.append(line[3:])
|
|
159
|
+
# Legacy format: "# " prefix
|
|
160
|
+
elif line.startswith("# "):
|
|
161
|
+
lines.append(line[2:])
|
|
162
|
+
elif line == "#":
|
|
163
|
+
lines.append("")
|
|
164
|
+
|
|
165
|
+
return "\n".join(lines)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def parse_legacy_docstring_from_comments(comment_block: str) -> str:
|
|
169
|
+
"""
|
|
170
|
+
Parse a docstring from legacy comment block with markers.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
comment_block: The comment block between markers
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Reconstructed docstring with triple quotes
|
|
177
|
+
"""
|
|
178
|
+
lines = []
|
|
179
|
+
for line in comment_block.split("\n"):
|
|
180
|
+
# Remove "# " prefix
|
|
181
|
+
if line.startswith("# "):
|
|
182
|
+
lines.append(line[2:])
|
|
183
|
+
elif line == "#":
|
|
184
|
+
lines.append("")
|
|
185
|
+
|
|
186
|
+
return '"""' + "\n".join(lines) + '"""'
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def extract_verifiers_to_file(json_path: str, py_path: str) -> None:
|
|
190
|
+
"""
|
|
191
|
+
Extract verifiers from JSON file and write them to a Python file with decorators.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
json_path: Path to input JSON file
|
|
195
|
+
py_path: Path to output Python file
|
|
196
|
+
"""
|
|
197
|
+
print(f"Reading tasks from: {json_path}")
|
|
198
|
+
|
|
199
|
+
# Load JSON file
|
|
200
|
+
try:
|
|
201
|
+
with open(json_path, "r", encoding="utf-8") as f:
|
|
202
|
+
tasks = json.load(f)
|
|
203
|
+
except FileNotFoundError:
|
|
204
|
+
print(f"✗ Error: File '{json_path}' not found")
|
|
205
|
+
sys.exit(1)
|
|
206
|
+
except json.JSONDecodeError as e:
|
|
207
|
+
print(f"✗ Error: Invalid JSON in '{json_path}': {e}")
|
|
208
|
+
sys.exit(1)
|
|
209
|
+
|
|
210
|
+
if not isinstance(tasks, list):
|
|
211
|
+
print("✗ Error: JSON file must contain an array of tasks")
|
|
212
|
+
sys.exit(1)
|
|
213
|
+
|
|
214
|
+
print(f"Found {len(tasks)} task(s)")
|
|
215
|
+
|
|
216
|
+
# Extract verifiers
|
|
217
|
+
verifiers = []
|
|
218
|
+
missing_verifier = []
|
|
219
|
+
duplicate_keys = set()
|
|
220
|
+
seen_keys = set()
|
|
221
|
+
|
|
222
|
+
for i, task in enumerate(tasks):
|
|
223
|
+
task_key = task.get("key") or task.get("id")
|
|
224
|
+
if not task_key:
|
|
225
|
+
print(f"⚠ Warning: Task at index {i} has no key or id, skipping")
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
# Check for duplicate keys
|
|
229
|
+
if task_key in seen_keys:
|
|
230
|
+
duplicate_keys.add(task_key)
|
|
231
|
+
print(f"⚠ Warning: Duplicate task key '{task_key}' found")
|
|
232
|
+
seen_keys.add(task_key)
|
|
233
|
+
|
|
234
|
+
# Get verifier code from multiple possible locations
|
|
235
|
+
verifier_code = (
|
|
236
|
+
task.get("verifier_func")
|
|
237
|
+
or task.get("verifier_code")
|
|
238
|
+
or task.get("metadata", {}).get("verifier_code")
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if not verifier_code:
|
|
242
|
+
missing_verifier.append(task_key)
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
# Clean the code and extract leading content (docstrings, imports, etc.)
|
|
246
|
+
cleaned_code, leading_content = clean_verifier_code(verifier_code)
|
|
247
|
+
|
|
248
|
+
# Extract function info
|
|
249
|
+
func_info = extract_function_info(cleaned_code)
|
|
250
|
+
if not func_info:
|
|
251
|
+
print(
|
|
252
|
+
f"⚠ Warning: Could not extract function name from verifier for task '{task_key}'"
|
|
253
|
+
)
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
function_name, is_async = func_info
|
|
257
|
+
|
|
258
|
+
verifiers.append(
|
|
259
|
+
{
|
|
260
|
+
"task_key": task_key,
|
|
261
|
+
"function_name": function_name,
|
|
262
|
+
"is_async": is_async,
|
|
263
|
+
"code": cleaned_code,
|
|
264
|
+
"leading_content": leading_content,
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if missing_verifier:
|
|
269
|
+
print(f"\n⚠ Warning: {len(missing_verifier)} task(s) missing verifier code:")
|
|
270
|
+
for key in missing_verifier[:10]: # Show first 10
|
|
271
|
+
print(f" - {key}")
|
|
272
|
+
if len(missing_verifier) > 10:
|
|
273
|
+
print(f" ... and {len(missing_verifier) - 10} more")
|
|
274
|
+
|
|
275
|
+
if duplicate_keys:
|
|
276
|
+
print(f"\n⚠ Warning: {len(duplicate_keys)} duplicate task key(s) found:")
|
|
277
|
+
for key in list(duplicate_keys)[:10]:
|
|
278
|
+
print(f" - {key}")
|
|
279
|
+
|
|
280
|
+
print(f"\n✓ Extracted {len(verifiers)} verifier(s)")
|
|
281
|
+
|
|
282
|
+
# Count async vs sync
|
|
283
|
+
async_count = sum(1 for v in verifiers if v["is_async"])
|
|
284
|
+
sync_count = len(verifiers) - async_count
|
|
285
|
+
print(f" - {async_count} async verifier(s)")
|
|
286
|
+
print(f" - {sync_count} sync verifier(s)")
|
|
287
|
+
|
|
288
|
+
# Write to Python file
|
|
289
|
+
print(f"\nWriting verifiers to: {py_path}")
|
|
290
|
+
|
|
291
|
+
with open(py_path, "w", encoding="utf-8") as f:
|
|
292
|
+
# Write header
|
|
293
|
+
f.write('"""Auto-generated verifiers file.\n\n')
|
|
294
|
+
f.write(f"Extracted from: {json_path}\n")
|
|
295
|
+
f.write(f"Total verifiers: {len(verifiers)}\n")
|
|
296
|
+
f.write(f" - Async: {async_count}\n")
|
|
297
|
+
f.write(f" - Sync: {sync_count}\n")
|
|
298
|
+
f.write('"""\n\n')
|
|
299
|
+
|
|
300
|
+
# Write imports
|
|
301
|
+
f.write("# Import verifier decorators and dependencies\n")
|
|
302
|
+
f.write("from fleet import (\n")
|
|
303
|
+
f.write(" verifier,\n")
|
|
304
|
+
f.write(" AsyncEnv,\n")
|
|
305
|
+
f.write(" SyncEnv,\n")
|
|
306
|
+
f.write(" SyncEnv as Environment,\n")
|
|
307
|
+
f.write(" AsyncEnv as AsyncEnvironment,\n")
|
|
308
|
+
f.write(" IgnoreConfig,\n")
|
|
309
|
+
f.write(" TASK_FAILED_SCORE,\n")
|
|
310
|
+
f.write(" TASK_SUCCESSFUL_SCORE,\n")
|
|
311
|
+
f.write(")\n")
|
|
312
|
+
f.write("from fleet.verifiers.verifier import verifier as verifier_sync\n")
|
|
313
|
+
f.write("\n")
|
|
314
|
+
f.write("# Standard library imports used in verifiers\n")
|
|
315
|
+
f.write("import json\n")
|
|
316
|
+
f.write("import re\n")
|
|
317
|
+
f.write("import string\n")
|
|
318
|
+
f.write("from typing import Any, Dict, List\n")
|
|
319
|
+
f.write("\n")
|
|
320
|
+
f.write("# Helper functions available in verifier namespace\n")
|
|
321
|
+
f.write(
|
|
322
|
+
'_TRANSLATOR = str.maketrans(string.punctuation, " " * len(string.punctuation))\n'
|
|
323
|
+
)
|
|
324
|
+
f.write("\n")
|
|
325
|
+
f.write("def _normalize_text(value: str) -> str:\n")
|
|
326
|
+
f.write(" text = value.lower().translate(_TRANSLATOR)\n")
|
|
327
|
+
f.write(' return "".join(text.split())\n')
|
|
328
|
+
f.write("\n")
|
|
329
|
+
f.write("def _stringify_content(content: Any) -> str:\n")
|
|
330
|
+
f.write(" if isinstance(content, (dict, list)):\n")
|
|
331
|
+
f.write(" return json.dumps(content, sort_keys=True)\n")
|
|
332
|
+
f.write(" return str(content)\n")
|
|
333
|
+
f.write("\n")
|
|
334
|
+
f.write("def normalized_contains(target: str, blob: Any) -> bool:\n")
|
|
335
|
+
f.write(" normalized_target = _normalize_text(target)\n")
|
|
336
|
+
f.write(" normalized_blob = _normalize_text(_stringify_content(blob))\n")
|
|
337
|
+
f.write(" return normalized_target in normalized_blob\n")
|
|
338
|
+
f.write("\n")
|
|
339
|
+
f.write("def extract_numbers(text: str) -> list:\n")
|
|
340
|
+
f.write(" cleaned_text = text.replace(',', '')\n")
|
|
341
|
+
f.write(" pattern = r'-?\\d+\\.?\\d*'\n")
|
|
342
|
+
f.write(" matches = re.findall(pattern, cleaned_text)\n")
|
|
343
|
+
f.write(" return [float(num) for num in matches]\n")
|
|
344
|
+
f.write("\n")
|
|
345
|
+
f.write("def contains_number(text: str, target_number) -> bool:\n")
|
|
346
|
+
f.write(" numbers = extract_numbers(text)\n")
|
|
347
|
+
f.write(" try:\n")
|
|
348
|
+
f.write(" if isinstance(target_number, str):\n")
|
|
349
|
+
f.write(" target_number = target_number.replace(',', '')\n")
|
|
350
|
+
f.write(" target = float(target_number)\n")
|
|
351
|
+
f.write(" except (ValueError, AttributeError):\n")
|
|
352
|
+
f.write(" return False\n")
|
|
353
|
+
f.write(" return target in numbers\n")
|
|
354
|
+
f.write("\n")
|
|
355
|
+
f.write("# " + "=" * 78 + "\n")
|
|
356
|
+
f.write("# VERIFIERS\n")
|
|
357
|
+
f.write("# " + "=" * 78 + "\n\n")
|
|
358
|
+
|
|
359
|
+
# Write each verifier
|
|
360
|
+
for i, ver in enumerate(verifiers):
|
|
361
|
+
# Write separator comment
|
|
362
|
+
if i > 0:
|
|
363
|
+
f.write("\n" + "# " + "-" * 78 + "\n\n")
|
|
364
|
+
|
|
365
|
+
# Write task key comment
|
|
366
|
+
f.write(f"# Task: {ver['task_key']}\n")
|
|
367
|
+
f.write(
|
|
368
|
+
f"# Function: {ver['function_name']} ({'async' if ver['is_async'] else 'sync'})\n"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Write leading content (docstrings, imports) as comments if present
|
|
372
|
+
if ver["leading_content"]:
|
|
373
|
+
f.write(format_leading_content_as_comment(ver["leading_content"]))
|
|
374
|
+
f.write("\n")
|
|
375
|
+
|
|
376
|
+
# Write decorator - use verifier for async, verifier_sync for sync
|
|
377
|
+
decorator_name = "verifier" if ver["is_async"] else "verifier_sync"
|
|
378
|
+
f.write(f'@{decorator_name}(key="{ver["task_key"]}")\n')
|
|
379
|
+
|
|
380
|
+
# Write function code
|
|
381
|
+
f.write(ver["code"])
|
|
382
|
+
f.write("\n")
|
|
383
|
+
|
|
384
|
+
print(f"✓ Successfully wrote {len(verifiers)} verifier(s) to '{py_path}'")
|
|
385
|
+
print("\nNext steps:")
|
|
386
|
+
print(f" 1. Edit the verifiers in '{py_path}'")
|
|
387
|
+
print(f" 2. Run: python {sys.argv[0]} apply {json_path} {py_path}")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def parse_verifiers_from_file(python_path: str) -> Dict[str, dict]:
|
|
391
|
+
"""
|
|
392
|
+
Parse verifiers from a Python file and extract them by task key.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
python_path: Path to Python file containing verifiers
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
Dictionary mapping task_key to dict with 'code' and 'leading_content'
|
|
399
|
+
"""
|
|
400
|
+
print(f"Reading verifiers from: {python_path}")
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
with open(python_path, "r", encoding="utf-8") as f:
|
|
404
|
+
content = f.read()
|
|
405
|
+
except FileNotFoundError:
|
|
406
|
+
print(f"✗ Error: File '{python_path}' not found")
|
|
407
|
+
sys.exit(1)
|
|
408
|
+
|
|
409
|
+
verifiers = {}
|
|
410
|
+
|
|
411
|
+
# Split by "# Task: " markers followed by a task key pattern (uuid or specific format)
|
|
412
|
+
# This avoids splitting on "# Task: " that appears inside docstring comments
|
|
413
|
+
# Task keys look like: task_uuid, task_xxx_timestamp_xxx, or send_xxx_xxx
|
|
414
|
+
task_key_pattern = (
|
|
415
|
+
r"(?:task_[a-f0-9-]+|task_[a-z0-9]+_\d+_[a-z0-9]+|[a-z_]+_[a-z0-9]+)"
|
|
416
|
+
)
|
|
417
|
+
task_blocks = re.split(rf"\n# Task: (?={task_key_pattern})", content)
|
|
418
|
+
|
|
419
|
+
for block in task_blocks[1:]: # Skip the first block (header)
|
|
420
|
+
# Extract task key from the first line
|
|
421
|
+
lines = block.split("\n")
|
|
422
|
+
if not lines:
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
# First line should be the task key
|
|
426
|
+
task_key = lines[0].strip()
|
|
427
|
+
|
|
428
|
+
# Skip if this doesn't look like a task key (sanity check)
|
|
429
|
+
if not re.match(task_key_pattern, task_key):
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
# Find the @verifier or @verifier_sync decorator to extract the key parameter
|
|
433
|
+
verifier_match = re.search(
|
|
434
|
+
r'@verifier(?:_sync)?\(key=["\']([^"\']+)["\']\s*(?:,\s*[^)]+)?\)', block
|
|
435
|
+
)
|
|
436
|
+
if verifier_match:
|
|
437
|
+
task_key = verifier_match.group(1)
|
|
438
|
+
|
|
439
|
+
# Check for leading content markers (new format)
|
|
440
|
+
leading_content = None
|
|
441
|
+
if LEADING_CONTENT_START in block:
|
|
442
|
+
start_idx = block.find(LEADING_CONTENT_START)
|
|
443
|
+
end_idx = block.find(LEADING_CONTENT_END)
|
|
444
|
+
if start_idx != -1 and end_idx != -1:
|
|
445
|
+
comment_block = block[
|
|
446
|
+
start_idx + len(LEADING_CONTENT_START) : end_idx
|
|
447
|
+
].strip()
|
|
448
|
+
leading_content = parse_leading_content_from_comments(comment_block)
|
|
449
|
+
# Fallback: check for legacy docstring markers
|
|
450
|
+
elif LEADING_DOCSTRING_START in block:
|
|
451
|
+
start_idx = block.find(LEADING_DOCSTRING_START)
|
|
452
|
+
end_idx = block.find(LEADING_DOCSTRING_END)
|
|
453
|
+
if start_idx != -1 and end_idx != -1:
|
|
454
|
+
comment_block = block[
|
|
455
|
+
start_idx + len(LEADING_DOCSTRING_START) : end_idx
|
|
456
|
+
].strip()
|
|
457
|
+
leading_content = parse_legacy_docstring_from_comments(comment_block)
|
|
458
|
+
|
|
459
|
+
# Find the function definition (async def or def)
|
|
460
|
+
# Extract from the function start until we hit the separator or end
|
|
461
|
+
func_pattern = r"((async\s+)?def\s+\w+.*?)(?=\n# -+\n|\n# Task:|\Z)"
|
|
462
|
+
func_match = re.search(func_pattern, block, re.DOTALL)
|
|
463
|
+
|
|
464
|
+
if func_match:
|
|
465
|
+
function_code = func_match.group(1).strip()
|
|
466
|
+
verifiers[task_key] = {
|
|
467
|
+
"code": function_code,
|
|
468
|
+
"leading_content": leading_content,
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
# If the above approach didn't work, try a direct pattern match
|
|
472
|
+
if not verifiers:
|
|
473
|
+
# Pattern to match @verifier or @verifier_sync decorator with key and the following function
|
|
474
|
+
pattern = r'@verifier(?:_sync)?\(key=["\']([^"\']+)["\']\s*(?:,\s*[^)]+)?\)\s*\n((?:async\s+)?def\s+[^\n]+:(?:\n(?: |\t).*)*(?:\n(?: |\t).*)*)'
|
|
475
|
+
|
|
476
|
+
matches = re.findall(pattern, content, re.MULTILINE)
|
|
477
|
+
|
|
478
|
+
for task_key, function_code in matches:
|
|
479
|
+
verifiers[task_key] = {
|
|
480
|
+
"code": function_code.strip(),
|
|
481
|
+
"leading_content": None,
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
print(f"✓ Found {len(verifiers)} verifier(s)")
|
|
485
|
+
|
|
486
|
+
# Analyze async vs sync
|
|
487
|
+
async_count = 0
|
|
488
|
+
sync_count = 0
|
|
489
|
+
for data in verifiers.values():
|
|
490
|
+
func_info = extract_function_info(data["code"])
|
|
491
|
+
if func_info:
|
|
492
|
+
_, is_async = func_info
|
|
493
|
+
if is_async:
|
|
494
|
+
async_count += 1
|
|
495
|
+
else:
|
|
496
|
+
sync_count += 1
|
|
497
|
+
|
|
498
|
+
print(f" - {async_count} async verifier(s)")
|
|
499
|
+
print(f" - {sync_count} sync verifier(s)")
|
|
500
|
+
|
|
501
|
+
return verifiers
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def normalize_code_for_comparison(code: str) -> str:
|
|
505
|
+
"""
|
|
506
|
+
Normalize code for comparison to avoid false positives.
|
|
507
|
+
Removes leading/trailing whitespace and normalizes line endings.
|
|
508
|
+
"""
|
|
509
|
+
# Strip and normalize line endings
|
|
510
|
+
code = code.strip().replace("\r\n", "\n")
|
|
511
|
+
# Normalize trailing whitespace on each line
|
|
512
|
+
lines = code.split("\n")
|
|
513
|
+
lines = [line.rstrip() for line in lines]
|
|
514
|
+
code = "\n".join(lines)
|
|
515
|
+
# Normalize multiple blank lines to single (2+ newlines → 1)
|
|
516
|
+
code = re.sub(r"\n\n+", "\n", code)
|
|
517
|
+
return code
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def apply_verifiers_to_json(json_path: str, python_path: str) -> None:
|
|
521
|
+
"""
|
|
522
|
+
Apply verifiers from Python file back into JSON task file (updates in-place).
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
json_path: Path to JSON file to update
|
|
526
|
+
python_path: Path to Python file with verifiers
|
|
527
|
+
"""
|
|
528
|
+
# Parse verifiers from Python file
|
|
529
|
+
verifiers = parse_verifiers_from_file(python_path)
|
|
530
|
+
|
|
531
|
+
# Load JSON file
|
|
532
|
+
print(f"\nReading tasks from: {json_path}")
|
|
533
|
+
try:
|
|
534
|
+
with open(json_path, "r", encoding="utf-8") as f:
|
|
535
|
+
tasks = json.load(f)
|
|
536
|
+
except FileNotFoundError:
|
|
537
|
+
print(f"✗ Error: File '{json_path}' not found")
|
|
538
|
+
sys.exit(1)
|
|
539
|
+
except json.JSONDecodeError as e:
|
|
540
|
+
print(f"✗ Error: Invalid JSON in '{json_path}': {e}")
|
|
541
|
+
sys.exit(1)
|
|
542
|
+
|
|
543
|
+
if not isinstance(tasks, list):
|
|
544
|
+
print("✗ Error: JSON file must contain an array of tasks")
|
|
545
|
+
sys.exit(1)
|
|
546
|
+
|
|
547
|
+
print(f"Found {len(tasks)} task(s)")
|
|
548
|
+
|
|
549
|
+
# Update tasks with new verifiers (only if changed)
|
|
550
|
+
updated_count = 0
|
|
551
|
+
updated_keys = []
|
|
552
|
+
not_found = []
|
|
553
|
+
|
|
554
|
+
for task in tasks:
|
|
555
|
+
task_key = task.get("key") or task.get("id")
|
|
556
|
+
if not task_key:
|
|
557
|
+
continue
|
|
558
|
+
|
|
559
|
+
if task_key in verifiers:
|
|
560
|
+
ver_data = verifiers[task_key]
|
|
561
|
+
|
|
562
|
+
# Reconstruct the full verifier code with leading content if present
|
|
563
|
+
if ver_data["leading_content"]:
|
|
564
|
+
new_code = ver_data["leading_content"] + "\n" + ver_data["code"]
|
|
565
|
+
else:
|
|
566
|
+
new_code = ver_data["code"]
|
|
567
|
+
|
|
568
|
+
old_code = task.get("verifier_func", "")
|
|
569
|
+
|
|
570
|
+
# Normalize both for comparison
|
|
571
|
+
old_normalized = normalize_code_for_comparison(old_code)
|
|
572
|
+
new_normalized = normalize_code_for_comparison(new_code)
|
|
573
|
+
|
|
574
|
+
# Debug: show comparison info
|
|
575
|
+
old_len = len(old_normalized)
|
|
576
|
+
new_len = len(new_normalized)
|
|
577
|
+
|
|
578
|
+
if old_normalized == new_normalized:
|
|
579
|
+
if old_code != new_code:
|
|
580
|
+
print(
|
|
581
|
+
f" [DEBUG] {task_key}: Codes differ in whitespace only (normalized match)"
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
# Find first difference position for debugging
|
|
585
|
+
min_len = min(old_len, new_len)
|
|
586
|
+
diff_pos = min_len
|
|
587
|
+
for i in range(min_len):
|
|
588
|
+
if old_normalized[i] != new_normalized[i]:
|
|
589
|
+
diff_pos = i
|
|
590
|
+
break
|
|
591
|
+
print(
|
|
592
|
+
f" [DEBUG] {task_key}: Code changed (old={old_len}, new={new_len}, first_diff@{diff_pos})"
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
# Only update if the code actually changed
|
|
596
|
+
if old_normalized != new_normalized:
|
|
597
|
+
# Update verifier_func with new code
|
|
598
|
+
task["verifier_func"] = new_code
|
|
599
|
+
|
|
600
|
+
# Also update metadata if it exists
|
|
601
|
+
if "metadata" in task and isinstance(task["metadata"], dict):
|
|
602
|
+
task["metadata"]["verifier_code"] = new_code
|
|
603
|
+
|
|
604
|
+
# Clear verifier_id and verifier_sha to force re-upload
|
|
605
|
+
task["verifier_id"] = None
|
|
606
|
+
task["verifier_sha"] = None
|
|
607
|
+
|
|
608
|
+
updated_count += 1
|
|
609
|
+
updated_keys.append(task_key)
|
|
610
|
+
else:
|
|
611
|
+
not_found.append(task_key)
|
|
612
|
+
|
|
613
|
+
print(f"\n✓ Updated {updated_count} task(s) with new verifiers")
|
|
614
|
+
|
|
615
|
+
if updated_keys:
|
|
616
|
+
print("\nUpdated task keys:")
|
|
617
|
+
for key in updated_keys:
|
|
618
|
+
print(f" - {key}")
|
|
619
|
+
|
|
620
|
+
if not_found:
|
|
621
|
+
print(f"\n⚠ Warning: {len(not_found)} task(s) not found in Python file:")
|
|
622
|
+
for key in not_found[:10]:
|
|
623
|
+
print(f" - {key}")
|
|
624
|
+
if len(not_found) > 10:
|
|
625
|
+
print(f" ... and {len(not_found) - 10} more")
|
|
626
|
+
|
|
627
|
+
# Write output back to the same JSON file
|
|
628
|
+
print(f"\nWriting updated tasks to: {json_path}")
|
|
629
|
+
|
|
630
|
+
try:
|
|
631
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
632
|
+
json.dump(tasks, f, indent=2, ensure_ascii=False)
|
|
633
|
+
print(f"✓ Successfully updated {len(tasks)} task(s) in '{json_path}'")
|
|
634
|
+
except Exception as e:
|
|
635
|
+
print(f"✗ Error writing JSON file: {e}")
|
|
636
|
+
sys.exit(1)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def validate_verifiers_file(python_path: str) -> None:
|
|
640
|
+
"""
|
|
641
|
+
Validate that a Python verifiers file can be parsed correctly.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
python_path: Path to Python file with verifiers
|
|
645
|
+
"""
|
|
646
|
+
verifiers = parse_verifiers_from_file(python_path)
|
|
647
|
+
|
|
648
|
+
print("\nValidating verifiers...")
|
|
649
|
+
errors = []
|
|
650
|
+
|
|
651
|
+
for task_key, ver_data in verifiers.items():
|
|
652
|
+
func_info = extract_function_info(ver_data["code"])
|
|
653
|
+
if not func_info:
|
|
654
|
+
errors.append(f" - {task_key}: Could not extract function info")
|
|
655
|
+
else:
|
|
656
|
+
function_name, is_async = func_info
|
|
657
|
+
has_leading = (
|
|
658
|
+
" (has leading content)" if ver_data["leading_content"] else ""
|
|
659
|
+
)
|
|
660
|
+
print(
|
|
661
|
+
f" ✓ {task_key}: {function_name} ({'async' if is_async else 'sync'}){has_leading}"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if errors:
|
|
665
|
+
print(f"\n✗ Found {len(errors)} error(s):")
|
|
666
|
+
for error in errors:
|
|
667
|
+
print(error)
|
|
668
|
+
sys.exit(1)
|
|
669
|
+
else:
|
|
670
|
+
print(f"\n✓ All {len(verifiers)} verifier(s) are valid!")
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def main():
|
|
674
|
+
parser = argparse.ArgumentParser(
|
|
675
|
+
description="Iterate on verifier code from JSON task files",
|
|
676
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
677
|
+
epilog="""
|
|
678
|
+
Examples:
|
|
679
|
+
# Extract verifiers from JSON to Python file
|
|
680
|
+
%(prog)s extract xai-day-10-batch.json verifiers.py
|
|
681
|
+
|
|
682
|
+
# Edit verifiers.py...
|
|
683
|
+
|
|
684
|
+
# Apply changes back to JSON file (updates in-place)
|
|
685
|
+
%(prog)s apply xai-day-10-batch.json verifiers.py
|
|
686
|
+
|
|
687
|
+
# Validate verifiers file
|
|
688
|
+
%(prog)s validate verifiers.py
|
|
689
|
+
""",
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
|
693
|
+
subparsers.required = True
|
|
694
|
+
|
|
695
|
+
# Extract command
|
|
696
|
+
extract_parser = subparsers.add_parser(
|
|
697
|
+
"extract", help="Extract verifiers from JSON to Python file"
|
|
698
|
+
)
|
|
699
|
+
extract_parser.add_argument("json_file", help="Path to JSON file containing tasks")
|
|
700
|
+
extract_parser.add_argument("py_file", help="Path to output Python file")
|
|
701
|
+
|
|
702
|
+
# Apply command
|
|
703
|
+
apply_parser = subparsers.add_parser(
|
|
704
|
+
"apply", help="Apply verifiers from Python file back to JSON (updates in-place)"
|
|
705
|
+
)
|
|
706
|
+
apply_parser.add_argument("json_file", help="Path to JSON file to update")
|
|
707
|
+
apply_parser.add_argument("py_file", help="Path to Python file with verifiers")
|
|
708
|
+
|
|
709
|
+
# Validate command
|
|
710
|
+
validate_parser = subparsers.add_parser("validate", help="Validate verifiers file")
|
|
711
|
+
validate_parser.add_argument("py_file", help="Path to Python file with verifiers")
|
|
712
|
+
|
|
713
|
+
args = parser.parse_args()
|
|
714
|
+
|
|
715
|
+
# Execute command
|
|
716
|
+
if args.command == "extract":
|
|
717
|
+
extract_verifiers_to_file(args.json_file, args.py_file)
|
|
718
|
+
elif args.command == "apply":
|
|
719
|
+
apply_verifiers_to_json(args.json_file, args.py_file)
|
|
720
|
+
elif args.command == "validate":
|
|
721
|
+
validate_verifiers_file(args.py_file)
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
if __name__ == "__main__":
|
|
725
|
+
main()
|