fleet-python 0.2.19__tar.gz → 0.2.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fleet-python might be problematic. Click here for more details.
- {fleet_python-0.2.19 → fleet_python-0.2.21}/PKG-INFO +1 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/dsl_example.py +2 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/__init__.py +2 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/client.py +5 -3
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/instance/client.py +10 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/client.py +5 -3
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/instance/client.py +12 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/resources/sqlite.py +164 -61
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/__init__.py +2 -1
- fleet_python-0.2.21/fleet/verifiers/parse.py +143 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet_python.egg-info/PKG-INFO +1 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet_python.egg-info/SOURCES.txt +1 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/pyproject.toml +1 -1
- {fleet_python-0.2.19 → fleet_python-0.2.21}/LICENSE +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/README.md +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/diff_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_client.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_sync.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_task.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/openai_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/examples/quickstart.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/config.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/env/client.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/models.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/tasks.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/types.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/db.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/scripts/unasync.py +0 -0
- {fleet_python-0.2.19 → fleet_python-0.2.21}/setup.cfg +0 -0
|
@@ -68,7 +68,7 @@ def validate_new_deal_creation(
|
|
|
68
68
|
async def main():
|
|
69
69
|
# Create a new instance
|
|
70
70
|
print("Creating new Hubspot instance...")
|
|
71
|
-
env = await flt.env.make_async("hubspot
|
|
71
|
+
env = await flt.env.make_async("hubspot")
|
|
72
72
|
print(f"New Instance: {env.instance_id}")
|
|
73
73
|
|
|
74
74
|
try:
|
|
@@ -85,6 +85,7 @@ async def main():
|
|
|
85
85
|
print(f"Message: {response.message}")
|
|
86
86
|
|
|
87
87
|
# Get the database resource
|
|
88
|
+
await env.instance.load()
|
|
88
89
|
db = env.db()
|
|
89
90
|
|
|
90
91
|
# Take a snapshot before insertion
|
|
@@ -34,6 +34,7 @@ from .verifiers import (
|
|
|
34
34
|
DatabaseSnapshot,
|
|
35
35
|
IgnoreConfig,
|
|
36
36
|
SnapshotDiff,
|
|
37
|
+
TASK_FAILED_SCORE,
|
|
37
38
|
TASK_SUCCESSFUL_SCORE,
|
|
38
39
|
)
|
|
39
40
|
|
|
@@ -80,6 +81,7 @@ __all__ = [
|
|
|
80
81
|
"DatabaseSnapshot",
|
|
81
82
|
"IgnoreConfig",
|
|
82
83
|
"SnapshotDiff",
|
|
84
|
+
"TASK_FAILED_SCORE",
|
|
83
85
|
"TASK_SUCCESSFUL_SCORE",
|
|
84
86
|
# Environment module
|
|
85
87
|
"env",
|
|
@@ -109,7 +109,7 @@ class AsyncEnv(EnvironmentBase):
|
|
|
109
109
|
return await self.instance.verify(validator)
|
|
110
110
|
|
|
111
111
|
async def verify_raw(
|
|
112
|
-
self, function_code: str, function_name: str
|
|
112
|
+
self, function_code: str, function_name: str | None = None
|
|
113
113
|
) -> ExecuteFunctionResponse:
|
|
114
114
|
return await self.instance.verify_raw(function_code, function_name)
|
|
115
115
|
|
|
@@ -152,12 +152,14 @@ class AsyncEnv(EnvironmentBase):
|
|
|
152
152
|
class AsyncFleet:
|
|
153
153
|
def __init__(
|
|
154
154
|
self,
|
|
155
|
-
api_key: Optional[str] =
|
|
155
|
+
api_key: Optional[str] = None,
|
|
156
156
|
base_url: Optional[str] = None,
|
|
157
157
|
httpx_client: Optional[httpx.AsyncClient] = None,
|
|
158
158
|
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
159
159
|
timeout: float = DEFAULT_TIMEOUT,
|
|
160
160
|
):
|
|
161
|
+
if api_key is None:
|
|
162
|
+
api_key = os.getenv("FLEET_API_KEY")
|
|
161
163
|
self._httpx_client = httpx_client or default_httpx_client(max_retries, timeout)
|
|
162
164
|
self.client = AsyncWrapper(
|
|
163
165
|
api_key=api_key,
|
|
@@ -182,7 +184,7 @@ class AsyncFleet:
|
|
|
182
184
|
) -> AsyncEnv:
|
|
183
185
|
if ":" in env_key:
|
|
184
186
|
env_key_part, version = env_key.split(":", 1)
|
|
185
|
-
if not version.startswith("v"):
|
|
187
|
+
if not version.startswith("v") and len(version) != 0 and version[0].isdigit():
|
|
186
188
|
version = f"v{version}"
|
|
187
189
|
else:
|
|
188
190
|
env_key_part = env_key
|
|
@@ -12,6 +12,7 @@ from ..resources.browser import AsyncBrowserResource
|
|
|
12
12
|
from ..resources.base import Resource
|
|
13
13
|
|
|
14
14
|
from fleet.verifiers import DatabaseSnapshot
|
|
15
|
+
from fleet.verifiers.parse import convert_verifier_string, extract_function_name
|
|
15
16
|
|
|
16
17
|
from ..exceptions import FleetEnvironmentError
|
|
17
18
|
from ...config import DEFAULT_MAX_RETRIES, DEFAULT_TIMEOUT
|
|
@@ -107,8 +108,16 @@ class AsyncInstanceClient:
|
|
|
107
108
|
return await self.verify_raw(function_code, function_name)
|
|
108
109
|
|
|
109
110
|
async def verify_raw(
|
|
110
|
-
self, function_code: str, function_name: str
|
|
111
|
+
self, function_code: str, function_name: str | None = None
|
|
111
112
|
) -> ExecuteFunctionResponse:
|
|
113
|
+
try:
|
|
114
|
+
function_code = convert_verifier_string(function_code)
|
|
115
|
+
except:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
if function_name is None:
|
|
119
|
+
function_name = extract_function_name(function_code)
|
|
120
|
+
|
|
112
121
|
response = await self.client.request(
|
|
113
122
|
"POST",
|
|
114
123
|
"/execute_verifier_function",
|
|
@@ -227,7 +227,7 @@ class Environment(EnvironmentBase):
|
|
|
227
227
|
return self.instance.verify(validator)
|
|
228
228
|
|
|
229
229
|
def verify_raw(
|
|
230
|
-
self, function_code: str, function_name: str
|
|
230
|
+
self, function_code: str, function_name: str | None = None
|
|
231
231
|
) -> ExecuteFunctionResponse:
|
|
232
232
|
return self.instance.verify_raw(function_code, function_name)
|
|
233
233
|
|
|
@@ -384,12 +384,14 @@ class Environment(EnvironmentBase):
|
|
|
384
384
|
class Fleet:
|
|
385
385
|
def __init__(
|
|
386
386
|
self,
|
|
387
|
-
api_key: Optional[str] =
|
|
387
|
+
api_key: Optional[str] = None,
|
|
388
388
|
base_url: Optional[str] = None,
|
|
389
389
|
httpx_client: Optional[httpx.Client] = None,
|
|
390
390
|
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
391
391
|
timeout: float = DEFAULT_TIMEOUT,
|
|
392
392
|
):
|
|
393
|
+
if api_key is None:
|
|
394
|
+
api_key = os.getenv("FLEET_API_KEY")
|
|
393
395
|
self._httpx_client = httpx_client or default_httpx_client(max_retries, timeout)
|
|
394
396
|
self.client = SyncWrapper(
|
|
395
397
|
api_key=api_key,
|
|
@@ -417,7 +419,7 @@ class Fleet:
|
|
|
417
419
|
) -> Environment:
|
|
418
420
|
if ":" in env_key:
|
|
419
421
|
env_key_part, version = env_key.split(":", 1)
|
|
420
|
-
if not version.startswith("v"):
|
|
422
|
+
if not version.startswith("v") and len(version) != 0 and version[0].isdigit():
|
|
421
423
|
version = f"v{version}"
|
|
422
424
|
else:
|
|
423
425
|
env_key_part = env_key
|
|
@@ -7,6 +7,8 @@ import time
|
|
|
7
7
|
import logging
|
|
8
8
|
from urllib.parse import urlparse
|
|
9
9
|
|
|
10
|
+
from fleet.verifiers.parse import convert_verifier_string, extract_function_name
|
|
11
|
+
|
|
10
12
|
from ..resources.sqlite import SQLiteResource
|
|
11
13
|
from ..resources.browser import BrowserResource
|
|
12
14
|
from ..resources.base import Resource
|
|
@@ -116,8 +118,17 @@ class InstanceClient:
|
|
|
116
118
|
return self.verify_raw(function_code, function_name)
|
|
117
119
|
|
|
118
120
|
def verify_raw(
|
|
119
|
-
self, function_code: str, function_name: str
|
|
121
|
+
self, function_code: str, function_name: str | None = None
|
|
120
122
|
) -> ExecuteFunctionResponse:
|
|
123
|
+
try:
|
|
124
|
+
function_code = convert_verifier_string(function_code)
|
|
125
|
+
except:
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
# Extract function name if not provided
|
|
129
|
+
if function_name is None:
|
|
130
|
+
function_name = extract_function_name(function_code)
|
|
131
|
+
|
|
121
132
|
response = self.client.request(
|
|
122
133
|
"POST",
|
|
123
134
|
"/execute_verifier_function",
|
|
@@ -379,6 +379,9 @@ class SyncSnapshotDiff:
|
|
|
379
379
|
|
|
380
380
|
def _expect_only_targeted(self, allowed_changes: list[dict[str, Any]]):
|
|
381
381
|
"""Optimized version that only queries specific rows mentioned in allowed_changes."""
|
|
382
|
+
import concurrent.futures
|
|
383
|
+
from threading import Lock
|
|
384
|
+
|
|
382
385
|
# Group allowed changes by table
|
|
383
386
|
changes_by_table: dict[str, list[dict[str, Any]]] = {}
|
|
384
387
|
for change in allowed_changes:
|
|
@@ -387,19 +390,12 @@ class SyncSnapshotDiff:
|
|
|
387
390
|
changes_by_table[table] = []
|
|
388
391
|
changes_by_table[table].append(change)
|
|
389
392
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
pk_columns = self._get_primary_key_columns(table)
|
|
397
|
-
|
|
398
|
-
# Extract unique PKs to check
|
|
399
|
-
pks_to_check = {change["pk"] for change in table_changes}
|
|
400
|
-
|
|
401
|
-
# Query only these specific rows from both snapshots
|
|
402
|
-
for pk in pks_to_check:
|
|
393
|
+
errors = []
|
|
394
|
+
errors_lock = Lock()
|
|
395
|
+
|
|
396
|
+
# Function to check a single row
|
|
397
|
+
def check_row(table: str, pk: Any, table_changes: list[dict[str, Any]], pk_columns: list[str]):
|
|
398
|
+
try:
|
|
403
399
|
# Build WHERE clause for this PK
|
|
404
400
|
where_sql = self._build_pk_where_clause(pk_columns, pk)
|
|
405
401
|
|
|
@@ -423,38 +419,99 @@ class SyncSnapshotDiff:
|
|
|
423
419
|
if not _values_equivalent(before_val, after_val):
|
|
424
420
|
# Check if this change is allowed
|
|
425
421
|
if not self._is_field_change_allowed(table_changes, pk, field, after_val):
|
|
426
|
-
|
|
422
|
+
error_msg = (
|
|
427
423
|
f"Unexpected change in table '{table}', "
|
|
428
424
|
f"row {pk}, field '{field}': "
|
|
429
425
|
f"{repr(before_val)} -> {repr(after_val)}"
|
|
430
426
|
)
|
|
427
|
+
with errors_lock:
|
|
428
|
+
errors.append(AssertionError(error_msg))
|
|
429
|
+
return # Stop checking this row
|
|
431
430
|
elif not before_row and after_row:
|
|
432
431
|
# Added row
|
|
433
432
|
if not self._is_row_change_allowed(table_changes, pk, "__added__"):
|
|
434
|
-
|
|
433
|
+
error_msg = f"Unexpected row added in table '{table}': {pk}"
|
|
434
|
+
with errors_lock:
|
|
435
|
+
errors.append(AssertionError(error_msg))
|
|
435
436
|
elif before_row and not after_row:
|
|
436
437
|
# Removed row
|
|
437
438
|
if not self._is_row_change_allowed(table_changes, pk, "__removed__"):
|
|
438
|
-
|
|
439
|
+
error_msg = f"Unexpected row removed from table '{table}': {pk}"
|
|
440
|
+
with errors_lock:
|
|
441
|
+
errors.append(AssertionError(error_msg))
|
|
442
|
+
except Exception as e:
|
|
443
|
+
with errors_lock:
|
|
444
|
+
errors.append(e)
|
|
439
445
|
|
|
440
|
-
#
|
|
441
|
-
|
|
442
|
-
for table in
|
|
443
|
-
if
|
|
446
|
+
# Prepare all row checks
|
|
447
|
+
row_checks = []
|
|
448
|
+
for table, table_changes in changes_by_table.items():
|
|
449
|
+
if self.ignore_config.should_ignore_table(table):
|
|
444
450
|
continue
|
|
451
|
+
|
|
452
|
+
# Get primary key columns once per table
|
|
453
|
+
pk_columns = self._get_primary_key_columns(table)
|
|
445
454
|
|
|
446
|
-
#
|
|
447
|
-
|
|
448
|
-
before_count = before_count_response.rows[0][0] if before_count_response.rows else 0
|
|
449
|
-
|
|
450
|
-
after_count_response = self.after.resource.query(f"SELECT COUNT(*) FROM {table}")
|
|
451
|
-
after_count = after_count_response.rows[0][0] if after_count_response.rows else 0
|
|
455
|
+
# Extract unique PKs to check
|
|
456
|
+
pks_to_check = {change["pk"] for change in table_changes}
|
|
452
457
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
+
for pk in pks_to_check:
|
|
459
|
+
row_checks.append((table, pk, table_changes, pk_columns))
|
|
460
|
+
|
|
461
|
+
# Execute row checks in parallel
|
|
462
|
+
if row_checks:
|
|
463
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
464
|
+
futures = [
|
|
465
|
+
executor.submit(check_row, table, pk, table_changes, pk_columns)
|
|
466
|
+
for table, pk, table_changes, pk_columns in row_checks
|
|
467
|
+
]
|
|
468
|
+
concurrent.futures.wait(futures)
|
|
469
|
+
|
|
470
|
+
# Check for errors from row checks
|
|
471
|
+
if errors:
|
|
472
|
+
raise errors[0]
|
|
473
|
+
|
|
474
|
+
# Now check tables not mentioned in allowed_changes to ensure no changes
|
|
475
|
+
all_tables = set(self.before.tables()) | set(self.after.tables())
|
|
476
|
+
tables_to_verify = []
|
|
477
|
+
|
|
478
|
+
for table in all_tables:
|
|
479
|
+
if table not in changes_by_table and not self.ignore_config.should_ignore_table(table):
|
|
480
|
+
tables_to_verify.append(table)
|
|
481
|
+
|
|
482
|
+
# Function to verify no changes in a table
|
|
483
|
+
def verify_no_changes(table: str):
|
|
484
|
+
try:
|
|
485
|
+
# For tables with no allowed changes, just check row counts
|
|
486
|
+
before_count_response = self.before.resource.query(f"SELECT COUNT(*) FROM {table}")
|
|
487
|
+
before_count = before_count_response.rows[0][0] if before_count_response.rows else 0
|
|
488
|
+
|
|
489
|
+
after_count_response = self.after.resource.query(f"SELECT COUNT(*) FROM {table}")
|
|
490
|
+
after_count = after_count_response.rows[0][0] if after_count_response.rows else 0
|
|
491
|
+
|
|
492
|
+
if before_count != after_count:
|
|
493
|
+
error_msg = (
|
|
494
|
+
f"Unexpected change in table '{table}': "
|
|
495
|
+
f"row count changed from {before_count} to {after_count}"
|
|
496
|
+
)
|
|
497
|
+
with errors_lock:
|
|
498
|
+
errors.append(AssertionError(error_msg))
|
|
499
|
+
except Exception as e:
|
|
500
|
+
with errors_lock:
|
|
501
|
+
errors.append(e)
|
|
502
|
+
|
|
503
|
+
# Execute table verification in parallel
|
|
504
|
+
if tables_to_verify:
|
|
505
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
506
|
+
futures = [
|
|
507
|
+
executor.submit(verify_no_changes, table)
|
|
508
|
+
for table in tables_to_verify
|
|
509
|
+
]
|
|
510
|
+
concurrent.futures.wait(futures)
|
|
511
|
+
|
|
512
|
+
# Final error check
|
|
513
|
+
if errors:
|
|
514
|
+
raise errors[0]
|
|
458
515
|
|
|
459
516
|
return self
|
|
460
517
|
|
|
@@ -500,6 +557,9 @@ class SyncSnapshotDiff:
|
|
|
500
557
|
def _expect_no_changes(self):
|
|
501
558
|
"""Efficiently verify that no changes occurred between snapshots using row counts."""
|
|
502
559
|
try:
|
|
560
|
+
import concurrent.futures
|
|
561
|
+
from threading import Lock
|
|
562
|
+
|
|
503
563
|
# Get all tables from both snapshots
|
|
504
564
|
before_tables = set(self.before.tables())
|
|
505
565
|
after_tables = set(self.after.tables())
|
|
@@ -516,41 +576,84 @@ class SyncSnapshotDiff:
|
|
|
516
576
|
if not self.ignore_config.should_ignore_table(table):
|
|
517
577
|
raise AssertionError(f"Unexpected table removed: {table}")
|
|
518
578
|
|
|
519
|
-
#
|
|
579
|
+
# Prepare tables to check
|
|
580
|
+
tables_to_check = []
|
|
520
581
|
all_tables = before_tables | after_tables
|
|
521
582
|
for table in all_tables:
|
|
522
|
-
if self.ignore_config.should_ignore_table(table):
|
|
523
|
-
|
|
583
|
+
if not self.ignore_config.should_ignore_table(table):
|
|
584
|
+
tables_to_check.append(table)
|
|
585
|
+
|
|
586
|
+
# If no tables to check, we're done
|
|
587
|
+
if not tables_to_check:
|
|
588
|
+
return self
|
|
589
|
+
|
|
590
|
+
# Use ThreadPoolExecutor to parallelize count queries
|
|
591
|
+
# We use threads instead of processes since the queries are I/O bound
|
|
592
|
+
errors = []
|
|
593
|
+
errors_lock = Lock()
|
|
594
|
+
tables_needing_verification = []
|
|
595
|
+
verification_lock = Lock()
|
|
596
|
+
|
|
597
|
+
def check_table_counts(table: str):
|
|
598
|
+
"""Check row counts for a single table."""
|
|
599
|
+
try:
|
|
600
|
+
# Get row counts from both snapshots
|
|
601
|
+
before_count = 0
|
|
602
|
+
after_count = 0
|
|
524
603
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
604
|
+
if table in before_tables:
|
|
605
|
+
before_count_response = self.before.resource.query(f"SELECT COUNT(*) FROM {table}")
|
|
606
|
+
before_count = before_count_response.rows[0][0] if before_count_response.rows else 0
|
|
607
|
+
|
|
608
|
+
if table in after_tables:
|
|
609
|
+
after_count_response = self.after.resource.query(f"SELECT COUNT(*) FROM {table}")
|
|
610
|
+
after_count = after_count_response.rows[0][0] if after_count_response.rows else 0
|
|
532
611
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
612
|
+
if before_count != after_count:
|
|
613
|
+
error_msg = (
|
|
614
|
+
f"Unexpected change in table '{table}': "
|
|
615
|
+
f"row count changed from {before_count} to {after_count}"
|
|
616
|
+
)
|
|
617
|
+
with errors_lock:
|
|
618
|
+
errors.append(AssertionError(error_msg))
|
|
619
|
+
elif before_count > 0 and before_count <= 1000:
|
|
620
|
+
# Mark for detailed verification
|
|
621
|
+
with verification_lock:
|
|
622
|
+
tables_needing_verification.append(table)
|
|
623
|
+
|
|
624
|
+
except Exception as e:
|
|
625
|
+
with errors_lock:
|
|
626
|
+
errors.append(e)
|
|
627
|
+
|
|
628
|
+
# Execute count checks in parallel
|
|
629
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
630
|
+
futures = [executor.submit(check_table_counts, table) for table in tables_to_check]
|
|
631
|
+
concurrent.futures.wait(futures)
|
|
632
|
+
|
|
633
|
+
# Check if any errors occurred during count checking
|
|
634
|
+
if errors:
|
|
635
|
+
# Raise the first error
|
|
636
|
+
raise errors[0]
|
|
637
|
+
|
|
638
|
+
# Now verify small tables for data changes (also in parallel)
|
|
639
|
+
if tables_needing_verification:
|
|
640
|
+
verification_errors = []
|
|
536
641
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
# This is a reasonable assumption for expect_only([])
|
|
553
|
-
pass
|
|
642
|
+
def verify_table(table: str):
|
|
643
|
+
"""Verify a single table's data hasn't changed."""
|
|
644
|
+
try:
|
|
645
|
+
self._verify_table_unchanged(table)
|
|
646
|
+
except AssertionError as e:
|
|
647
|
+
with errors_lock:
|
|
648
|
+
verification_errors.append(e)
|
|
649
|
+
|
|
650
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
|
651
|
+
futures = [executor.submit(verify_table, table) for table in tables_needing_verification]
|
|
652
|
+
concurrent.futures.wait(futures)
|
|
653
|
+
|
|
654
|
+
# Check if any errors occurred during verification
|
|
655
|
+
if verification_errors:
|
|
656
|
+
raise verification_errors[0]
|
|
554
657
|
|
|
555
658
|
return self
|
|
556
659
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Fleet verifiers module - database snapshot validation utilities and verifier decorator."""
|
|
2
2
|
|
|
3
3
|
from fleet.verifiers.db import DatabaseSnapshot, IgnoreConfig, SnapshotDiff
|
|
4
|
-
from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE
|
|
4
|
+
from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
|
|
5
5
|
from .decorator import (
|
|
6
6
|
verifier,
|
|
7
7
|
SyncVerifierFunction,
|
|
@@ -12,6 +12,7 @@ __all__ = [
|
|
|
12
12
|
"IgnoreConfig",
|
|
13
13
|
"SnapshotDiff",
|
|
14
14
|
"TASK_SUCCESSFUL_SCORE",
|
|
15
|
+
"TASK_FAILED_SCORE",
|
|
15
16
|
"verifier",
|
|
16
17
|
"SyncVerifierFunction",
|
|
17
18
|
]
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def extract_function_name(function_code: str) -> str | None:
|
|
5
|
+
"""
|
|
6
|
+
Extract function name from Python function code.
|
|
7
|
+
|
|
8
|
+
Handles both regular functions (def) and async functions (async def).
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
function_code: Python function code as a string
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
The function name if found, None otherwise
|
|
15
|
+
"""
|
|
16
|
+
# Pattern to match both def and async def functions
|
|
17
|
+
# Handles various formatting styles and type annotations
|
|
18
|
+
pattern = r'(?:async\s+)?def\s+(\w+)\s*\('
|
|
19
|
+
|
|
20
|
+
match = re.search(pattern, function_code)
|
|
21
|
+
if match:
|
|
22
|
+
return match.group(1)
|
|
23
|
+
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def convert_verifier_string(verifier_str: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Convert a verifier function string from the old format (env: Environment)
|
|
30
|
+
to the new format (before: DatabaseSnapshot, after: DatabaseSnapshot).
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
verifier_str: The original verifier function as a string
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The converted verifier function string
|
|
37
|
+
"""
|
|
38
|
+
# First, handle escaped newlines in the input
|
|
39
|
+
verifier_str = verifier_str.replace('\\n', '\n')
|
|
40
|
+
|
|
41
|
+
# Extract function name, docstring, and body
|
|
42
|
+
# More flexible pattern that accepts both int and float return types
|
|
43
|
+
func_pattern = r'def\s+(\w+)\s*\(\s*env(?:\s*:\s*Environment)?\s*,?\s*final_answer(?:\s*:\s*str\s*\|\s*None)?\s*(?:=\s*None)?\s*\)\s*->\s*(?:float|int):\s*\n((?:\s*""".*?"""\s*\n)?)(.*)'
|
|
44
|
+
match = re.match(func_pattern, verifier_str.strip(), re.DOTALL)
|
|
45
|
+
|
|
46
|
+
if not match:
|
|
47
|
+
# Try with multiline pattern
|
|
48
|
+
func_pattern_multiline = r'def\s+(\w+)\s*\(\s*\n?\s*env(?:\s*:\s*Environment)?\s*,?\s*\n?\s*final_answer(?:\s*:\s*str\s*\|\s*None)?\s*(?:=\s*None)?\s*\n?\s*\)\s*->\s*(?:float|int):\s*\n((?:\s*""".*?"""\s*\n)?)(.*)'
|
|
49
|
+
match = re.match(func_pattern_multiline, verifier_str.strip(), re.DOTALL)
|
|
50
|
+
|
|
51
|
+
if not match:
|
|
52
|
+
raise ValueError("Could not parse verifier function. Expected format: def function_name(env: Environment, final_answer: str | None = None) -> float/int:")
|
|
53
|
+
|
|
54
|
+
func_name = match.group(1)
|
|
55
|
+
docstring = match.group(2).strip()
|
|
56
|
+
body = match.group(3)
|
|
57
|
+
|
|
58
|
+
# Find all unique env.db() calls
|
|
59
|
+
db_calls = re.findall(r'env\.db\("(\w+)"\)', body)
|
|
60
|
+
unique_db_names = list(dict.fromkeys(db_calls)) # Remove duplicates while preserving order
|
|
61
|
+
|
|
62
|
+
# Build the new function
|
|
63
|
+
new_func = f'''def {func_name}(
|
|
64
|
+
before: DatabaseSnapshot, after: DatabaseSnapshot, transcript: str | None = None
|
|
65
|
+
) -> int:
|
|
66
|
+
class Environment:
|
|
67
|
+
def db(self, name: str) -> DatabaseSnapshot:'''
|
|
68
|
+
|
|
69
|
+
# Build the db method based on found database names
|
|
70
|
+
if unique_db_names:
|
|
71
|
+
conditions = []
|
|
72
|
+
for db_name in unique_db_names:
|
|
73
|
+
if db_name == "seed":
|
|
74
|
+
conditions.append('before if name == "seed"')
|
|
75
|
+
elif db_name == "current":
|
|
76
|
+
conditions.append('after')
|
|
77
|
+
else:
|
|
78
|
+
# Handle other database names if needed
|
|
79
|
+
conditions.append(f'None # Handle "{db_name}"')
|
|
80
|
+
|
|
81
|
+
if len(conditions) == 2 and "seed" in unique_db_names and "current" in unique_db_names:
|
|
82
|
+
new_func += f'''
|
|
83
|
+
return before if name == "seed" else after'''
|
|
84
|
+
else:
|
|
85
|
+
# More complex mapping if needed
|
|
86
|
+
new_func += f'''
|
|
87
|
+
if name == "seed":
|
|
88
|
+
return before
|
|
89
|
+
elif name == "current":
|
|
90
|
+
return after
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError(f"Unknown database name: {{name}}")'''
|
|
93
|
+
else:
|
|
94
|
+
new_func += '''
|
|
95
|
+
return before if name == "seed" else after'''
|
|
96
|
+
|
|
97
|
+
new_func += '''
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def instance(self):
|
|
101
|
+
return self
|
|
102
|
+
|
|
103
|
+
def load(self):
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
def verifier(env: Environment, final_answer: str | None = None) -> float:'''
|
|
107
|
+
|
|
108
|
+
if docstring:
|
|
109
|
+
new_func += f'\n {docstring}'
|
|
110
|
+
|
|
111
|
+
# First, find the minimum indentation in the body (excluding empty lines)
|
|
112
|
+
body_lines = body.splitlines()
|
|
113
|
+
min_indent = float('inf')
|
|
114
|
+
for line in body_lines:
|
|
115
|
+
if line.strip(): # Non-empty line
|
|
116
|
+
indent_len = len(line) - len(line.lstrip())
|
|
117
|
+
min_indent = min(min_indent, indent_len)
|
|
118
|
+
|
|
119
|
+
# If we didn't find any non-empty lines, set min_indent to 0
|
|
120
|
+
if min_indent == float('inf'):
|
|
121
|
+
min_indent = 0
|
|
122
|
+
|
|
123
|
+
# Now strip the minimum indentation and re-indent to 8 spaces
|
|
124
|
+
if body_lines:
|
|
125
|
+
indented_lines = []
|
|
126
|
+
for line in body_lines:
|
|
127
|
+
if line.strip(): # Non-empty line
|
|
128
|
+
# Remove the minimum indentation and add 8 spaces
|
|
129
|
+
stripped_line = line[min_indent:] if len(line) > min_indent else line.lstrip()
|
|
130
|
+
indented_lines.append(' ' + stripped_line)
|
|
131
|
+
else: # Empty line
|
|
132
|
+
indented_lines.append('')
|
|
133
|
+
|
|
134
|
+
indented_body = '\n'.join(indented_lines)
|
|
135
|
+
new_func += f'\n{indented_body}'
|
|
136
|
+
|
|
137
|
+
# Add the return statement
|
|
138
|
+
new_func += '\n\n return verifier(Environment(), transcript)'
|
|
139
|
+
|
|
140
|
+
# Replace TASK_FAILED_SCORE with 0 in the function string
|
|
141
|
+
new_func = new_func.replace('TASK_FAILED_SCORE', '0')
|
|
142
|
+
|
|
143
|
+
return new_func
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|