fleet-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fleet-python might be problematic. Click here for more details.
- examples/dsl_example.py +127 -0
- examples/example.py +11 -24
- examples/json_tasks_example.py +82 -0
- examples/nova_act_example.py +18 -169
- examples/openai_example.py +127 -223
- examples/openai_simple_example.py +61 -0
- examples/quickstart.py +5 -5
- fleet/__init__.py +17 -1
- fleet/base.py +1 -1
- fleet/client.py +77 -30
- fleet/env/__init__.py +2 -21
- fleet/env/client.py +9 -253
- fleet/instance/__init__.py +25 -0
- fleet/instance/client.py +295 -0
- fleet/{env → instance}/models.py +13 -0
- fleet/playwright.py +291 -0
- fleet/resources/base.py +5 -2
- fleet/resources/browser.py +15 -8
- fleet/resources/sqlite.py +3 -3
- fleet/verifiers/__init__.py +16 -0
- fleet/verifiers/code.py +132 -0
- fleet/verifiers/db.py +706 -0
- fleet/verifiers/sql_differ.py +187 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/METADATA +3 -1
- fleet_python-0.2.3.dist-info/RECORD +31 -0
- fleet_python-0.2.1.dist-info/RECORD +0 -21
- /fleet/{env → instance}/base.py +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/WHEEL +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .db import QueryBuilder, DatabaseSnapshot, SnapshotDiff, IgnoreConfig
|
|
2
|
+
from .code import (
|
|
3
|
+
TASK_SUCCESSFUL_SCORE,
|
|
4
|
+
extract_last_assistant_message,
|
|
5
|
+
execute_validation_function,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"DatabaseSnapshot",
|
|
10
|
+
"QueryBuilder",
|
|
11
|
+
"SnapshotDiff",
|
|
12
|
+
"IgnoreConfig",
|
|
13
|
+
"TASK_SUCCESSFUL_SCORE",
|
|
14
|
+
"extract_last_assistant_message",
|
|
15
|
+
"execute_validation_function",
|
|
16
|
+
]
|
fleet/verifiers/code.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
from .db import DatabaseSnapshot, IgnoreConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
TASK_SUCCESSFUL_SCORE = 1
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def extract_last_assistant_message(transcript: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Extract only the last assistant message from the transcript, filtering out tool calls.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
transcript: The full conversation transcript
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The content of the last assistant message with tool calls filtered out
|
|
22
|
+
"""
|
|
23
|
+
if not transcript:
|
|
24
|
+
return ""
|
|
25
|
+
|
|
26
|
+
# Split transcript into sections by "Assistant:" markers
|
|
27
|
+
sections = transcript.split("Assistant:")
|
|
28
|
+
if len(sections) < 2:
|
|
29
|
+
# No "Assistant:" markers found, treat entire transcript as assistant message
|
|
30
|
+
last_assistant_section = transcript
|
|
31
|
+
else:
|
|
32
|
+
# Get the last assistant section
|
|
33
|
+
last_assistant_section = sections[-1]
|
|
34
|
+
|
|
35
|
+
# Filter out specific content blocks using regex-like approach
|
|
36
|
+
import re
|
|
37
|
+
|
|
38
|
+
# Remove image blocks: <img src="data:..."/>
|
|
39
|
+
last_assistant_section = re.sub(
|
|
40
|
+
r'<img src="data:[^"]*"[^>]*/?>', "", last_assistant_section
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Remove tool call blocks: .../>
|
|
44
|
+
last_assistant_section = re.sub(
|
|
45
|
+
r'<tool_call[^>]*>.*?"/>', "", last_assistant_section, flags=re.DOTALL
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Remove tool result blocks: <tool_result>...</tool_result>
|
|
49
|
+
last_assistant_section = re.sub(
|
|
50
|
+
r"<tool_result>.*?</tool_result>", "", last_assistant_section, flags=re.DOTALL
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Clean up extra whitespace
|
|
54
|
+
filtered_transcript = last_assistant_section.strip()
|
|
55
|
+
|
|
56
|
+
return filtered_transcript
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def execute_validation_function(
|
|
60
|
+
function_code: str,
|
|
61
|
+
function_name: str,
|
|
62
|
+
before_snapshot_path: str,
|
|
63
|
+
after_snapshot_path: str,
|
|
64
|
+
transcript: str | None = None,
|
|
65
|
+
) -> Dict[str, Any]:
|
|
66
|
+
"""
|
|
67
|
+
Execute arbitrary validation function code with database snapshots.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
function_code: The Python code containing the function definition
|
|
71
|
+
function_name: Name of the function to call after executing the code
|
|
72
|
+
before_snapshot_path: Path to the before database snapshot
|
|
73
|
+
after_snapshot_path: Path to the after database snapshot
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Dict containing success status, result, and any error message
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
# Create database snapshots
|
|
80
|
+
before = DatabaseSnapshot(before_snapshot_path)
|
|
81
|
+
after = DatabaseSnapshot(after_snapshot_path)
|
|
82
|
+
|
|
83
|
+
# Create a namespace with the required imports and constants
|
|
84
|
+
namespace = {
|
|
85
|
+
"DatabaseSnapshot": DatabaseSnapshot,
|
|
86
|
+
"IgnoreConfig": IgnoreConfig,
|
|
87
|
+
"TASK_SUCCESSFUL_SCORE": TASK_SUCCESSFUL_SCORE,
|
|
88
|
+
"extract_last_assistant_message": extract_last_assistant_message,
|
|
89
|
+
"__builtins__": __builtins__,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Execute the provided code in the namespace
|
|
93
|
+
exec(function_code, namespace)
|
|
94
|
+
|
|
95
|
+
# Check if the function exists in the namespace
|
|
96
|
+
if function_name not in namespace:
|
|
97
|
+
return {
|
|
98
|
+
"success": False,
|
|
99
|
+
"error": f"Function '{function_name}' not found in the provided code",
|
|
100
|
+
"result": None,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Get the function from the namespace
|
|
104
|
+
func = namespace[function_name]
|
|
105
|
+
|
|
106
|
+
# Call the function with before/after snapshots
|
|
107
|
+
# Support both sync and async functions
|
|
108
|
+
import inspect
|
|
109
|
+
|
|
110
|
+
# Check the function signature to determine how many arguments it accepts
|
|
111
|
+
sig = inspect.signature(func)
|
|
112
|
+
param_count = len(sig.parameters)
|
|
113
|
+
|
|
114
|
+
if inspect.iscoroutinefunction(func):
|
|
115
|
+
# Handle async function - we can await it since we're now async
|
|
116
|
+
if param_count >= 3:
|
|
117
|
+
result = await func(before, after, transcript)
|
|
118
|
+
else:
|
|
119
|
+
result = await func(before, after)
|
|
120
|
+
else:
|
|
121
|
+
# Handle sync function
|
|
122
|
+
if param_count >= 3:
|
|
123
|
+
result = func(before, after, transcript)
|
|
124
|
+
else:
|
|
125
|
+
result = func(before, after)
|
|
126
|
+
|
|
127
|
+
return {"success": True, "result": result, "error": None}
|
|
128
|
+
|
|
129
|
+
except Exception as e:
|
|
130
|
+
error_msg = f"Error executing function: {str(e)}\n{traceback.format_exc()}"
|
|
131
|
+
logger.error(error_msg)
|
|
132
|
+
return {"success": False, "error": error_msg, "result": None}
|