py-adtools 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adtools/__init__.py +1 -0
- adtools/cli.py +61 -0
- adtools/evaluator/__init__.py +2 -0
- adtools/evaluator/auto_server.py +258 -0
- adtools/evaluator/py_evaluator.py +170 -0
- adtools/evaluator/py_evaluator_ray.py +110 -0
- adtools/lm/__init__.py +4 -0
- adtools/lm/lm_base.py +63 -0
- adtools/lm/openai_api.py +118 -0
- adtools/lm/sglang_server.py +423 -0
- adtools/lm/vllm_server.py +452 -0
- adtools/py_code.py +577 -0
- adtools/sandbox/__init__.py +2 -0
- adtools/sandbox/sandbox_executor.py +244 -0
- adtools/sandbox/sandbox_executor_ray.py +194 -0
- adtools/sandbox/utils.py +32 -0
- py_adtools-0.3.2.dist-info/METADATA +567 -0
- py_adtools-0.3.2.dist-info/RECORD +22 -0
- py_adtools-0.3.2.dist-info/WHEEL +5 -0
- py_adtools-0.3.2.dist-info/entry_points.txt +2 -0
- py_adtools-0.3.2.dist-info/licenses/LICENSE +21 -0
- py_adtools-0.3.2.dist-info/top_level.txt +1 -0
adtools/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from adtools.py_code import PyCodeBlock, PyFunction, PyClass, PyProgram
|
adtools/cli.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
# Import the main function of the auto_server
|
|
5
|
+
from adtools.evaluator.auto_server import main as auto_server_main
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
parser = argparse.ArgumentParser(
|
|
10
|
+
description="ADTools CLI for various utilities.",
|
|
11
|
+
formatter_class=argparse.RawTextHelpFormatter, # Preserve formatting for help messages
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# Define subcommands
|
|
15
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
16
|
+
|
|
17
|
+
# Serve subcommand
|
|
18
|
+
serve_parser = subparsers.add_parser(
|
|
19
|
+
"serve",
|
|
20
|
+
help="Launch the Auto-Evaluation Server. All arguments are passed directly to the server.",
|
|
21
|
+
formatter_class=argparse.RawTextHelpFormatter, # Preserve formatting for help messages
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# The 'serve' command doesn't define its own arguments here.
|
|
25
|
+
# Instead, it will capture all subsequent arguments and pass them to auto_server_main.
|
|
26
|
+
# This simplifies argument parsing, as auto_server_main already handles its own args.
|
|
27
|
+
|
|
28
|
+
# Parse arguments provided to the main 'adtools' command
|
|
29
|
+
# We only parse the initial command (e.g., 'adtools serve')
|
|
30
|
+
# and then pass the rest of the arguments to the subcommand's main function.
|
|
31
|
+
|
|
32
|
+
# If no subcommand is given, print help
|
|
33
|
+
if len(sys.argv) == 1:
|
|
34
|
+
parser.print_help(sys.stderr)
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
# Parse the main command and subcommand
|
|
38
|
+
args = parser.parse_args(sys.argv[1:2]) # Only parse 'adtools' and 'serve'
|
|
39
|
+
|
|
40
|
+
if args.command == "serve":
|
|
41
|
+
# Pass all remaining arguments (from sys.argv[2:]) to the auto_server_main function.
|
|
42
|
+
# This effectively makes 'adtools serve ARGS...' behave like 'adtools.evaluator.auto_server.main(ARGS...)'.
|
|
43
|
+
# We need to temporarily replace sys.argv for auto_server_main to parse correctly.
|
|
44
|
+
original_sys_argv = sys.argv
|
|
45
|
+
sys.argv = [original_sys_argv[0]] + original_sys_argv[
|
|
46
|
+
2:
|
|
47
|
+
] # Keep script name, then add server args
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
auto_server_main()
|
|
51
|
+
finally:
|
|
52
|
+
sys.argv = original_sys_argv # Restore sys.argv
|
|
53
|
+
else:
|
|
54
|
+
# This case should ideally not be reached if subparsers are configured correctly
|
|
55
|
+
# and 'dest="command"' is used.
|
|
56
|
+
parser.print_help(sys.stderr)
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
if __name__ == "__main__":
|
|
61
|
+
main()
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import argparse
|
|
5
|
+
import importlib.util
|
|
6
|
+
import threading
|
|
7
|
+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
8
|
+
from typing import Optional, Dict
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from adtools import PyClass
|
|
13
|
+
from adtools.evaluator import PyEvaluator, PyEvaluatorRay
|
|
14
|
+
|
|
15
|
+
__all__ = ["submit_code", "submit_code_async"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def submit_code(
|
|
19
|
+
host: str,
|
|
20
|
+
port: int | str,
|
|
21
|
+
code: str,
|
|
22
|
+
timeout: Optional[float] = None,
|
|
23
|
+
*,
|
|
24
|
+
post_timeout_seconds: float = 1800,
|
|
25
|
+
) -> Dict:
|
|
26
|
+
"""Submit code to the evaluation server.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
host: Server host.
|
|
30
|
+
port: Server port.
|
|
31
|
+
code: Code to submit.
|
|
32
|
+
timeout: evaluation timeout in seconds.
|
|
33
|
+
post_timeout_seconds: Post request timeout in seconds.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A dict containing the evaluation metadata.
|
|
37
|
+
"""
|
|
38
|
+
url = f"http://{host}:{port}/"
|
|
39
|
+
payload = {"code": code, "timeout": timeout}
|
|
40
|
+
|
|
41
|
+
with requests.Session() as s:
|
|
42
|
+
r = s.post(url, json=payload, timeout=post_timeout_seconds)
|
|
43
|
+
r.raise_for_status()
|
|
44
|
+
return r.json()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def submit_code_async(
|
|
48
|
+
host: str,
|
|
49
|
+
port: int | str,
|
|
50
|
+
code: str,
|
|
51
|
+
timeout: Optional[float] = None,
|
|
52
|
+
*,
|
|
53
|
+
post_timeout_seconds: float = 1800,
|
|
54
|
+
) -> Dict:
|
|
55
|
+
"""Submit code to the evaluation server.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
host: Server host.
|
|
59
|
+
port: Server port.
|
|
60
|
+
code: Code to submit.
|
|
61
|
+
timeout: evaluation timeout in seconds.
|
|
62
|
+
post_timeout_seconds: Post request timeout in seconds.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
A dict containing the evaluation metadata.
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
import aiohttp
|
|
69
|
+
except ImportError:
|
|
70
|
+
raise ImportError("Please install 'aiohttp'.")
|
|
71
|
+
|
|
72
|
+
url = f"http://{host}:{port}/"
|
|
73
|
+
payload = {"code": code, "timeout": timeout}
|
|
74
|
+
timeout_cfg = aiohttp.ClientTimeout(total=post_timeout_seconds)
|
|
75
|
+
|
|
76
|
+
async with aiohttp.ClientSession(timeout=timeout_cfg) as session:
|
|
77
|
+
async with session.post(url, json=payload) as resp:
|
|
78
|
+
resp.raise_for_status()
|
|
79
|
+
return await resp.json()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class EvaluationHandler(BaseHTTPRequestHandler):
|
|
83
|
+
|
|
84
|
+
def do_POST(self):
|
|
85
|
+
try:
|
|
86
|
+
self.connection.settimeout(10)
|
|
87
|
+
content_length = int(self.headers.get("Content-Length", 0))
|
|
88
|
+
post_data = self.rfile.read(content_length)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
req_data = json.loads(post_data.decode("utf-8"))
|
|
92
|
+
except Exception:
|
|
93
|
+
req_data = {}
|
|
94
|
+
|
|
95
|
+
code_str = req_data.get("code")
|
|
96
|
+
# Use server-level default timeout if not provided
|
|
97
|
+
timeout = req_data.get("timeout", self.server.default_timeout)
|
|
98
|
+
|
|
99
|
+
if not code_str:
|
|
100
|
+
response = {
|
|
101
|
+
"result": None,
|
|
102
|
+
"evaluate_time": 0.0,
|
|
103
|
+
"error_msg": "No 'code' field in request or invalid JSON",
|
|
104
|
+
}
|
|
105
|
+
else:
|
|
106
|
+
# Use the semaphore to limit concurrent evaluations
|
|
107
|
+
# This blocks the thread handling this request until a slot is free
|
|
108
|
+
with self.server.semaphore:
|
|
109
|
+
results = self.server.evaluator.secure_evaluate(
|
|
110
|
+
code_str, timeout_seconds=timeout
|
|
111
|
+
)
|
|
112
|
+
response = dict(results)
|
|
113
|
+
|
|
114
|
+
# Ensure serialization
|
|
115
|
+
try:
|
|
116
|
+
json_response = json.dumps(response)
|
|
117
|
+
except (TypeError, OverflowError):
|
|
118
|
+
# If result is not serializable, convert it to string
|
|
119
|
+
response["result"] = str(response["result"])
|
|
120
|
+
json_response = json.dumps(response)
|
|
121
|
+
|
|
122
|
+
self.send_response(200)
|
|
123
|
+
self.send_header("Content-type", "application/json")
|
|
124
|
+
self.end_headers()
|
|
125
|
+
self.wfile.write(json_response.encode("utf-8"))
|
|
126
|
+
|
|
127
|
+
except Exception as e:
|
|
128
|
+
error_msg = str(e)
|
|
129
|
+
print(f"Server Error: {error_msg}")
|
|
130
|
+
error_response = json.dumps(
|
|
131
|
+
{
|
|
132
|
+
"result": None,
|
|
133
|
+
"evaluate_time": 0.0,
|
|
134
|
+
"error_msg": f"Server internal error: {error_msg}",
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
# We still return 200 OK because the *HTTP* request succeeded,
|
|
138
|
+
# but the *application* (evaluation) had an error.
|
|
139
|
+
self.send_response(200)
|
|
140
|
+
self.send_header("Content-type", "application/json")
|
|
141
|
+
self.end_headers()
|
|
142
|
+
self.wfile.write(error_response.encode("utf-8"))
|
|
143
|
+
|
|
144
|
+
def log_message(self, format, *args):
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class ThreadedHTTPServer(ThreadingHTTPServer):
|
|
149
|
+
# Allow passing custom attributes to the server instance
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def main():
|
|
154
|
+
parser = argparse.ArgumentParser()
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"-d", "--dir", required=True, help="Directory (file path) of the evaluator."
|
|
157
|
+
)
|
|
158
|
+
parser.add_argument("--host", default="0.0.0.0", help="Host of the server.")
|
|
159
|
+
parser.add_argument("--port", default=8000, type=int, help="Port of the server.")
|
|
160
|
+
parser.add_argument(
|
|
161
|
+
"-t", "--timeout", default=None, type=float, help="Default timeout in seconds."
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
"--max-workers", default=4, type=int, help="Max concurrent evaluations."
|
|
165
|
+
)
|
|
166
|
+
args = parser.parse_args()
|
|
167
|
+
|
|
168
|
+
# Read file
|
|
169
|
+
with open(args.dir) as f:
|
|
170
|
+
program = f.read()
|
|
171
|
+
|
|
172
|
+
# Extract all classes
|
|
173
|
+
classes = PyClass.extract_all_classes_from_text(program)
|
|
174
|
+
|
|
175
|
+
# Count the number of public classes
|
|
176
|
+
count_public_classes = 0
|
|
177
|
+
public_class_name = None
|
|
178
|
+
for cls in classes:
|
|
179
|
+
if not cls.name.startswith("_"):
|
|
180
|
+
count_public_classes += 1
|
|
181
|
+
public_class_name = cls.name
|
|
182
|
+
|
|
183
|
+
if count_public_classes == 0:
|
|
184
|
+
raise Exception("No public classes found.")
|
|
185
|
+
if count_public_classes > 1:
|
|
186
|
+
raise Exception(
|
|
187
|
+
f"The file should only have one pubic class, "
|
|
188
|
+
f"but found {count_public_classes}"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Import evaluator from directory
|
|
192
|
+
file_path = os.path.abspath(args.dir)
|
|
193
|
+
dir_name = os.path.dirname(file_path)
|
|
194
|
+
base_name = os.path.basename(file_path)
|
|
195
|
+
module_name = os.path.splitext(base_name)[0]
|
|
196
|
+
|
|
197
|
+
# Add to sys.path for current process
|
|
198
|
+
if dir_name not in sys.path:
|
|
199
|
+
sys.path.insert(0, dir_name)
|
|
200
|
+
|
|
201
|
+
# Add to PYTHONPATH for child processes (multiprocessing spawn)
|
|
202
|
+
current_pythonpath = os.environ.get("PYTHONPATH", "")
|
|
203
|
+
if dir_name not in current_pythonpath.split(os.pathsep):
|
|
204
|
+
os.environ["PYTHONPATH"] = (
|
|
205
|
+
(dir_name + os.pathsep + current_pythonpath)
|
|
206
|
+
if current_pythonpath
|
|
207
|
+
else dir_name
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
module = importlib.import_module(module_name)
|
|
211
|
+
EvaluatorClass = getattr(module, public_class_name)
|
|
212
|
+
|
|
213
|
+
# Assert the evaluator is either "PyEvaluator" or "PyEvaluatorRay"
|
|
214
|
+
if not issubclass(EvaluatorClass, (PyEvaluator, PyEvaluatorRay)):
|
|
215
|
+
raise TypeError(
|
|
216
|
+
f"Class {public_class_name} must inherit from PyEvaluator or PyEvaluatorRay"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Instantiate the evaluator
|
|
220
|
+
evaluator = EvaluatorClass()
|
|
221
|
+
|
|
222
|
+
# Check whether timeout is set
|
|
223
|
+
timeout_defined_in_class = None
|
|
224
|
+
for field in ["timeout_seconds, timeout, _timeout_seconds, _timeout"]:
|
|
225
|
+
if hasattr(evaluator, field):
|
|
226
|
+
timeout_defined_in_class = getattr(evaluator, field)
|
|
227
|
+
|
|
228
|
+
# Initialize Threaded HTTP Server
|
|
229
|
+
# We use ThreadedHTTPServer to handle requests in separate threads
|
|
230
|
+
server = ThreadedHTTPServer((args.host, args.port), EvaluationHandler)
|
|
231
|
+
|
|
232
|
+
# Attach shared resources to the server instance so handlers can access them
|
|
233
|
+
server.evaluator = evaluator
|
|
234
|
+
server.default_timeout = args.timeout or timeout_defined_in_class
|
|
235
|
+
server.semaphore = threading.Semaphore(args.max_workers)
|
|
236
|
+
|
|
237
|
+
print(f"Evaluator '{public_class_name}' loaded from {args.dir}")
|
|
238
|
+
print(
|
|
239
|
+
f"HTTP Server running at http://{args.host}:{args.port} with max_workers={args.max_workers}"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
server.serve_forever()
|
|
244
|
+
except KeyboardInterrupt:
|
|
245
|
+
pass
|
|
246
|
+
finally:
|
|
247
|
+
server.server_close()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
if __name__ == "__main__":
|
|
251
|
+
import multiprocessing
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
multiprocessing.set_start_method("spawn", force=True)
|
|
255
|
+
except RuntimeError:
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
main()
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
|
|
3
|
+
|
|
4
|
+
NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
|
|
5
|
+
Commercial use of this software or its derivatives requires prior written permission.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import traceback
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from typing import Any, Dict, Callable, List
|
|
12
|
+
|
|
13
|
+
from adtools.py_code import PyProgram
|
|
14
|
+
from adtools.sandbox import SandboxExecutor, ExecutionResults
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"PyEvaluator",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PyEvaluator(ABC):
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
exec_code: bool = True,
|
|
26
|
+
find_and_kill_children_evaluation_process: bool = False,
|
|
27
|
+
debug_mode: bool = False,
|
|
28
|
+
*,
|
|
29
|
+
join_timeout_seconds: int = 10,
|
|
30
|
+
):
|
|
31
|
+
"""Evaluator interface for evaluating the Python algorithm program. Override this class and implement
|
|
32
|
+
'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
|
|
36
|
+
which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
|
|
37
|
+
evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
|
|
38
|
+
in 'self.evaluate_program()' will no longer be affective.
|
|
39
|
+
find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
|
|
40
|
+
when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
|
|
41
|
+
does not start new processes.
|
|
42
|
+
debug_mode: Debug mode.
|
|
43
|
+
join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
|
|
44
|
+
"""
|
|
45
|
+
self.debug_mode = debug_mode
|
|
46
|
+
self.exec_code = exec_code
|
|
47
|
+
self.sandbox_executor = SandboxExecutor(
|
|
48
|
+
evaluate_worker=self,
|
|
49
|
+
find_and_kill_children_evaluation_process=find_and_kill_children_evaluation_process,
|
|
50
|
+
debug_mode=debug_mode,
|
|
51
|
+
join_timeout_seconds=join_timeout_seconds,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def evaluate_program(
|
|
56
|
+
self,
|
|
57
|
+
program_str: str,
|
|
58
|
+
callable_functions_dict: Dict[str, Callable] | None,
|
|
59
|
+
callable_functions_list: List[Callable] | None,
|
|
60
|
+
callable_classes_dict: Dict[str, Callable] | None,
|
|
61
|
+
callable_classes_list: List[Callable] | None,
|
|
62
|
+
**kwargs,
|
|
63
|
+
) -> Any:
|
|
64
|
+
"""Evaluate a given program.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
program_str: The raw program text.
|
|
68
|
+
callable_functions_dict: A dictionary where keys are the names of functions
|
|
69
|
+
defined in the `program_str` and values are the corresponding callable function objects.
|
|
70
|
+
callable_functions_list: A list of callable function objects
|
|
71
|
+
defined in the `program_str`, ordered as they appear in the program.
|
|
72
|
+
callable_classes_dict: A dictionary where keys are the names of classes
|
|
73
|
+
defined in the `program_str` and values are the corresponding callable class objects.
|
|
74
|
+
callable_classes_list: A list of callable class objects
|
|
75
|
+
defined in the `program_str`, ordered as they appear in the program.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Returns the evaluation result.
|
|
79
|
+
"""
|
|
80
|
+
raise NotImplementedError(
|
|
81
|
+
"Must provide an evaluator for a python program. "
|
|
82
|
+
"Override this method in a subclass."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _exec_and_get_res(self, program: str | PyProgram, **kwargs):
|
|
86
|
+
"""Evaluate a program.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
program: the program to be evaluated.
|
|
90
|
+
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
91
|
+
"""
|
|
92
|
+
# Parse to program instance
|
|
93
|
+
if isinstance(program, str):
|
|
94
|
+
program = PyProgram.from_text(program)
|
|
95
|
+
function_names = [f.name for f in program.functions]
|
|
96
|
+
class_names = [c.name for c in program.classes]
|
|
97
|
+
|
|
98
|
+
# Execute the code and get callable instances
|
|
99
|
+
if self.exec_code:
|
|
100
|
+
all_globals_namespace = {}
|
|
101
|
+
# Execute the program, map func/var/class to global namespace
|
|
102
|
+
exec(str(program), all_globals_namespace)
|
|
103
|
+
# Get callable functions
|
|
104
|
+
callable_funcs_list = [
|
|
105
|
+
all_globals_namespace[f_name] for f_name in function_names
|
|
106
|
+
]
|
|
107
|
+
callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
|
|
108
|
+
# Get callable classes
|
|
109
|
+
callable_cls_list = [
|
|
110
|
+
all_globals_namespace[c_name] for c_name in class_names
|
|
111
|
+
]
|
|
112
|
+
callable_cls_dict = dict(zip(class_names, callable_cls_list))
|
|
113
|
+
else:
|
|
114
|
+
(
|
|
115
|
+
callable_funcs_list,
|
|
116
|
+
callable_funcs_dict,
|
|
117
|
+
callable_cls_list,
|
|
118
|
+
callable_cls_dict,
|
|
119
|
+
) = (None, None, None, None)
|
|
120
|
+
|
|
121
|
+
# Get evaluate result
|
|
122
|
+
res = self.evaluate_program(
|
|
123
|
+
str(program),
|
|
124
|
+
callable_funcs_dict,
|
|
125
|
+
callable_funcs_list,
|
|
126
|
+
callable_cls_dict,
|
|
127
|
+
callable_cls_list,
|
|
128
|
+
**kwargs,
|
|
129
|
+
)
|
|
130
|
+
return res
|
|
131
|
+
|
|
132
|
+
def evaluate(self, program: str | PyProgram, **kwargs) -> ExecutionResults:
|
|
133
|
+
start_time = time.time()
|
|
134
|
+
error_msg = ""
|
|
135
|
+
# noinspection PyBroadException
|
|
136
|
+
try:
|
|
137
|
+
res = self._exec_and_get_res(program, **kwargs)
|
|
138
|
+
except:
|
|
139
|
+
res = None
|
|
140
|
+
error_msg = str(traceback.format_exc())
|
|
141
|
+
|
|
142
|
+
return ExecutionResults(
|
|
143
|
+
result=res, evaluate_time=time.time() - start_time, error_msg=error_msg
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def secure_evaluate(
|
|
147
|
+
self,
|
|
148
|
+
program: str | PyProgram,
|
|
149
|
+
timeout_seconds: int | float = None,
|
|
150
|
+
redirect_to_devnull: bool = False,
|
|
151
|
+
**kwargs,
|
|
152
|
+
) -> ExecutionResults:
|
|
153
|
+
"""Evaluate program in a new process. This enables timeout restriction and output redirection.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
program: the program to be evaluated.
|
|
157
|
+
timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
|
|
158
|
+
redirect_to_devnull: redirect any output to '/dev/null'.
|
|
159
|
+
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Returns the evaluation results.
|
|
163
|
+
"""
|
|
164
|
+
return self.sandbox_executor.secure_execute(
|
|
165
|
+
worker_execute_method_name="_exec_and_get_res",
|
|
166
|
+
method_args=[program],
|
|
167
|
+
method_kwargs=kwargs,
|
|
168
|
+
timeout_seconds=timeout_seconds,
|
|
169
|
+
redirect_to_devnull=redirect_to_devnull,
|
|
170
|
+
)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
|
|
3
|
+
|
|
4
|
+
NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
|
|
5
|
+
Commercial use of this software or its derivatives requires prior written permission.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import abstractmethod
|
|
9
|
+
from typing import Any, Dict, List, Callable
|
|
10
|
+
|
|
11
|
+
from adtools.py_code import PyProgram
|
|
12
|
+
from adtools.evaluator.py_evaluator import PyEvaluator
|
|
13
|
+
from adtools.sandbox.sandbox_executor_ray import SandboxExecutorRay, ExecutionResults
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = ["PyEvaluatorRay"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PyEvaluatorRay(PyEvaluator):
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
init_ray: bool = True,
|
|
24
|
+
exec_code: bool = True,
|
|
25
|
+
debug_mode: bool = False,
|
|
26
|
+
*,
|
|
27
|
+
ray_rotation_max_bytes: int = 50 * 1024 * 1024, # 50 MB
|
|
28
|
+
ray_rotation_backup_count: int = 1,
|
|
29
|
+
):
|
|
30
|
+
"""Evaluator using Ray for secure, isolated execution.
|
|
31
|
+
It supports efficient zero-copy return of large objects (e.g., Tensors).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
init_ray: Whether to initialize the ray.
|
|
35
|
+
exec_code: Whether to execute the code using 'exec()'.
|
|
36
|
+
debug_mode: Enable debug print statements.
|
|
37
|
+
"""
|
|
38
|
+
super().__init__(
|
|
39
|
+
exec_code=exec_code,
|
|
40
|
+
debug_mode=debug_mode,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
self.sandbox_executor = SandboxExecutorRay(
|
|
44
|
+
evaluate_worker=self,
|
|
45
|
+
init_ray=init_ray,
|
|
46
|
+
debug_mode=debug_mode,
|
|
47
|
+
ray_rotation_max_bytes=ray_rotation_max_bytes,
|
|
48
|
+
ray_rotation_backup_count=ray_rotation_backup_count,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def evaluate_program(
|
|
53
|
+
self,
|
|
54
|
+
program_str: str,
|
|
55
|
+
callable_functions_dict: Dict[str, Callable] | None,
|
|
56
|
+
callable_functions_list: List[Callable] | None,
|
|
57
|
+
callable_classes_dict: Dict[str, Callable] | None,
|
|
58
|
+
callable_classes_list: List[Callable] | None,
|
|
59
|
+
**kwargs,
|
|
60
|
+
) -> Any:
|
|
61
|
+
"""Evaluate a given program.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
program_str: The raw program text.
|
|
65
|
+
callable_functions_dict: A dictionary where keys are the names of functions
|
|
66
|
+
defined in the `program_str` and values are the corresponding callable function objects.
|
|
67
|
+
callable_functions_list: A list of callable function objects
|
|
68
|
+
defined in the `program_str`, ordered as they appear in the program.
|
|
69
|
+
callable_classes_dict: A dictionary where keys are the names of classes
|
|
70
|
+
defined in the `program_str` and values are the corresponding callable class objects.
|
|
71
|
+
callable_classes_list: A list of callable class objects
|
|
72
|
+
defined in the `program_str`, ordered as they appear in the program.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Returns the evaluation result.
|
|
76
|
+
"""
|
|
77
|
+
raise NotImplementedError(
|
|
78
|
+
"Must provide an evaluator for a python program. "
|
|
79
|
+
"Override this method in a subclass."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def secure_evaluate(
|
|
83
|
+
self,
|
|
84
|
+
program: str | PyProgram,
|
|
85
|
+
timeout_seconds: int | float = None,
|
|
86
|
+
redirect_to_devnull: bool = False,
|
|
87
|
+
*,
|
|
88
|
+
ray_actor_options: dict[str, Any] = None,
|
|
89
|
+
**kwargs,
|
|
90
|
+
) -> ExecutionResults:
|
|
91
|
+
"""Evaluates the program in a separate Ray Actor (process).
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
program: the program to be evaluated.
|
|
95
|
+
timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
|
|
96
|
+
redirect_to_devnull: redirect any output to '/dev/null'.
|
|
97
|
+
ray_actor_options: kwargs pass to RayWorkerClass.options(...).
|
|
98
|
+
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Returns the evaluation results.
|
|
102
|
+
"""
|
|
103
|
+
return self.sandbox_executor.secure_execute(
|
|
104
|
+
worker_execute_method_name="_exec_and_get_res",
|
|
105
|
+
method_args=[program],
|
|
106
|
+
method_kwargs=kwargs,
|
|
107
|
+
timeout_seconds=timeout_seconds,
|
|
108
|
+
redirect_to_devnull=redirect_to_devnull,
|
|
109
|
+
ray_actor_options=ray_actor_options,
|
|
110
|
+
)
|
adtools/lm/__init__.py
ADDED
adtools/lm/lm_base.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
|
|
3
|
+
|
|
4
|
+
NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
|
|
5
|
+
Commercial use of this software or its derivatives requires prior written permission.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import abstractmethod
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
import openai.types.chat
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LanguageModel:
|
|
15
|
+
"""Base class for language model interface."""
|
|
16
|
+
|
|
17
|
+
def chat_completion(
|
|
18
|
+
self,
|
|
19
|
+
message: str | List[openai.types.chat.ChatCompletionMessageParam],
|
|
20
|
+
max_tokens: int,
|
|
21
|
+
timeout_seconds: float,
|
|
22
|
+
*args,
|
|
23
|
+
**kwargs,
|
|
24
|
+
):
|
|
25
|
+
"""Send a chat completion query with OpenAI format to the vLLM server.
|
|
26
|
+
Return the response content.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
message: The message in str or openai format.
|
|
30
|
+
max_tokens: The maximum number of tokens to generate.
|
|
31
|
+
timeout_seconds: The timeout seconds.
|
|
32
|
+
"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def embedding(
|
|
36
|
+
self,
|
|
37
|
+
text: str | List[str],
|
|
38
|
+
dimensions: Optional[int] = None,
|
|
39
|
+
timeout_seconds: Optional[float] = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> List[float] | List[List[float]]:
|
|
42
|
+
"""Generate embeddings for the given text(s) using the model specified during initialization.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
text: The text or a list of texts to embed.
|
|
46
|
+
dimensions: The number of dimensions for the output embeddings.
|
|
47
|
+
timeout_seconds: The timeout seconds.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The embedding for the text, or a list of embeddings for the list of texts.
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def close(self):
|
|
55
|
+
"""Release resources (if necessary)."""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
def reload(self):
|
|
59
|
+
"""Reload the language model (if necessary)."""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def __del__(self):
|
|
63
|
+
self.close()
|