py-adtools 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of py-adtools might be problematic. Click here for more details.
- adtools/__init__.py +1 -1
- adtools/evaluator.py +420 -57
- adtools/evaluator_pool.py +9 -7
- {py_adtools-0.1.7.dist-info → py_adtools-0.1.9.dist-info}/METADATA +1 -1
- py_adtools-0.1.9.dist-info/RECORD +13 -0
- py_adtools-0.1.7.dist-info/RECORD +0 -13
- {py_adtools-0.1.7.dist-info → py_adtools-0.1.9.dist-info}/WHEEL +0 -0
- {py_adtools-0.1.7.dist-info → py_adtools-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {py_adtools-0.1.7.dist-info → py_adtools-0.1.9.dist-info}/top_level.txt +0 -0
adtools/__init__.py
CHANGED
adtools/evaluator.py
CHANGED
|
@@ -7,21 +7,39 @@ Commercial use of this software or its derivatives requires prior written permis
|
|
|
7
7
|
|
|
8
8
|
import multiprocessing
|
|
9
9
|
import os
|
|
10
|
+
import pickle
|
|
10
11
|
import sys
|
|
12
|
+
import time
|
|
11
13
|
from abc import ABC, abstractmethod
|
|
14
|
+
from multiprocessing import shared_memory
|
|
12
15
|
from queue import Empty
|
|
13
|
-
from typing import Any, Literal, Dict, Callable, List
|
|
16
|
+
from typing import Any, Literal, Dict, Callable, List, Tuple
|
|
17
|
+
import multiprocessing.managers
|
|
14
18
|
import psutil
|
|
15
19
|
import traceback
|
|
16
20
|
|
|
17
21
|
from .py_code import PyProgram
|
|
18
22
|
|
|
23
|
+
__all__ = ['PyEvaluator', 'PyEvaluatorForBigReturnedObject']
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _set_mp_start_method(multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn']):
|
|
27
|
+
if multiprocessing_start_method == 'auto':
|
|
28
|
+
# Force macOS and Linux use 'fork' to generate new process
|
|
29
|
+
if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
|
|
30
|
+
multiprocessing.set_start_method('fork', force=True)
|
|
31
|
+
elif multiprocessing_start_method == 'fork':
|
|
32
|
+
multiprocessing.set_start_method('fork', force=True)
|
|
33
|
+
elif multiprocessing_start_method == 'spawn':
|
|
34
|
+
multiprocessing.set_start_method('spawn', force=True)
|
|
35
|
+
|
|
19
36
|
|
|
20
37
|
class PyEvaluator(ABC):
|
|
21
38
|
|
|
22
39
|
def __init__(
|
|
23
40
|
self,
|
|
24
41
|
exec_code: bool = True,
|
|
42
|
+
find_and_kill_children_evaluation_process: bool = False,
|
|
25
43
|
debug_mode: bool = False,
|
|
26
44
|
*,
|
|
27
45
|
join_timeout_seconds: int = 10
|
|
@@ -33,11 +51,15 @@ class PyEvaluator(ABC):
|
|
|
33
51
|
which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
|
|
34
52
|
evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
|
|
35
53
|
in 'self.evaluate_program()' will no longer be affective.
|
|
54
|
+
find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
|
|
55
|
+
when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
|
|
56
|
+
does not start new processes.
|
|
36
57
|
debug_mode: Debug mode.
|
|
37
58
|
join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
|
|
38
59
|
"""
|
|
39
60
|
self.debug_mode = debug_mode
|
|
40
61
|
self.exec_code = exec_code
|
|
62
|
+
self.find_and_kill_children_evaluation_process = find_and_kill_children_evaluation_process
|
|
41
63
|
self.join_timeout_seconds = join_timeout_seconds
|
|
42
64
|
|
|
43
65
|
@abstractmethod
|
|
@@ -66,11 +88,14 @@ class PyEvaluator(ABC):
|
|
|
66
88
|
)
|
|
67
89
|
|
|
68
90
|
def _kill_process_and_its_children(self, process: multiprocessing.Process):
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
91
|
+
if self.find_and_kill_children_evaluation_process:
|
|
92
|
+
# Find all children processes
|
|
93
|
+
try:
|
|
94
|
+
parent = psutil.Process(process.pid)
|
|
95
|
+
children_processes = parent.children(recursive=True)
|
|
96
|
+
except psutil.NoSuchProcess:
|
|
97
|
+
children_processes = []
|
|
98
|
+
else:
|
|
74
99
|
children_processes = []
|
|
75
100
|
# Terminate parent process
|
|
76
101
|
process.terminate()
|
|
@@ -90,43 +115,38 @@ class PyEvaluator(ABC):
|
|
|
90
115
|
program: the program to be evaluated.
|
|
91
116
|
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
92
117
|
"""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
None, None, None, None
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
# Get evaluate result
|
|
117
|
-
res = self.evaluate_program(
|
|
118
|
-
str(program),
|
|
119
|
-
callable_funcs_dict,
|
|
120
|
-
callable_funcs_list,
|
|
121
|
-
callable_cls_dict,
|
|
122
|
-
callable_cls_list,
|
|
123
|
-
**kwargs
|
|
118
|
+
# Parse to program instance
|
|
119
|
+
if isinstance(program, str):
|
|
120
|
+
program = PyProgram.from_text(program)
|
|
121
|
+
function_names = [f.name for f in program.functions]
|
|
122
|
+
class_names = [c.name for c in program.classes]
|
|
123
|
+
|
|
124
|
+
# Execute the code and get callable instances
|
|
125
|
+
if self.exec_code:
|
|
126
|
+
all_globals_namespace = {}
|
|
127
|
+
# Execute the program, map func/var/class to global namespace
|
|
128
|
+
exec(str(program), all_globals_namespace)
|
|
129
|
+
# Get callable functions
|
|
130
|
+
callable_funcs_list = [all_globals_namespace[f_name] for f_name in function_names]
|
|
131
|
+
callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
|
|
132
|
+
# Get callable classes
|
|
133
|
+
callable_cls_list = [all_globals_namespace[c_name] for c_name in class_names]
|
|
134
|
+
callable_cls_dict = dict(zip(class_names, callable_cls_list))
|
|
135
|
+
else:
|
|
136
|
+
callable_funcs_list, callable_funcs_dict, callable_cls_list, callable_cls_dict = (
|
|
137
|
+
None, None, None, None
|
|
124
138
|
)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
139
|
+
|
|
140
|
+
# Get evaluate result
|
|
141
|
+
res = self.evaluate_program(
|
|
142
|
+
str(program),
|
|
143
|
+
callable_funcs_dict,
|
|
144
|
+
callable_funcs_list,
|
|
145
|
+
callable_cls_dict,
|
|
146
|
+
callable_cls_list,
|
|
147
|
+
**kwargs
|
|
148
|
+
)
|
|
149
|
+
return res
|
|
130
150
|
|
|
131
151
|
def _evaluate_in_safe_process(
|
|
132
152
|
self,
|
|
@@ -151,8 +171,9 @@ class PyEvaluator(ABC):
|
|
|
151
171
|
timeout_seconds: int | float = None,
|
|
152
172
|
redirect_to_devnull: bool = False,
|
|
153
173
|
multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
|
|
174
|
+
get_evaluate_time=False,
|
|
154
175
|
**kwargs
|
|
155
|
-
):
|
|
176
|
+
) -> Any | Tuple[Any, float]:
|
|
156
177
|
"""Evaluate program in a new process. This enables timeout restriction and output redirection.
|
|
157
178
|
Args:
|
|
158
179
|
program: the program to be evaluated.
|
|
@@ -161,16 +182,13 @@ class PyEvaluator(ABC):
|
|
|
161
182
|
multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
|
|
162
183
|
the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
|
|
163
184
|
If set to 'default', there will be no changes to system default.
|
|
185
|
+
get_evaluate_time: get evaluation time for this program.
|
|
164
186
|
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
187
|
+
Returns:
|
|
188
|
+
Returns the evaluation results. If the 'get_evaluate_time' is True,
|
|
189
|
+
the return value will be (Results, Time).
|
|
165
190
|
"""
|
|
166
|
-
|
|
167
|
-
# Force macOS and Linux use 'fork' to generate new process
|
|
168
|
-
if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
|
|
169
|
-
multiprocessing.set_start_method('fork', force=True)
|
|
170
|
-
elif multiprocessing_start_method == 'fork':
|
|
171
|
-
multiprocessing.set_start_method('fork', force=True)
|
|
172
|
-
elif multiprocessing_start_method == 'spawn':
|
|
173
|
-
multiprocessing.set_start_method('spawn', force=True)
|
|
191
|
+
_set_mp_start_method(multiprocessing_start_method)
|
|
174
192
|
|
|
175
193
|
try:
|
|
176
194
|
# Start evaluation process
|
|
@@ -180,33 +198,378 @@ class PyEvaluator(ABC):
|
|
|
180
198
|
args=(str(program), result_queue, redirect_to_devnull),
|
|
181
199
|
kwargs=kwargs,
|
|
182
200
|
)
|
|
201
|
+
evaluate_start_time = time.time()
|
|
183
202
|
process.start()
|
|
184
203
|
|
|
185
204
|
if timeout_seconds is not None:
|
|
186
205
|
try:
|
|
187
206
|
# Get the result in timeout seconds
|
|
188
207
|
result = result_queue.get(timeout=timeout_seconds)
|
|
189
|
-
#
|
|
208
|
+
# Calculate the evaluate time
|
|
209
|
+
eval_time = time.time() - evaluate_start_time
|
|
210
|
+
# After getting the result, terminate and kill the process
|
|
190
211
|
self._kill_process_and_its_children(process)
|
|
191
212
|
except Empty: # The queue is empty indicates a timeout
|
|
213
|
+
# Calculate the evaluate time
|
|
214
|
+
eval_time = time.time() - evaluate_start_time
|
|
192
215
|
if self.debug_mode:
|
|
193
216
|
print(f'DEBUG: the evaluation time exceeds {timeout_seconds}s.')
|
|
194
|
-
# Terminate
|
|
217
|
+
# Terminate and kill all processes if timeout happens
|
|
195
218
|
self._kill_process_and_its_children(process)
|
|
196
219
|
result = None
|
|
197
220
|
except Exception as e:
|
|
221
|
+
# Calculate the evaluate time
|
|
222
|
+
eval_time = time.time() - evaluate_start_time
|
|
198
223
|
if self.debug_mode:
|
|
199
224
|
print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
|
|
200
|
-
# Terminate
|
|
225
|
+
# Terminate and kill all processes if meet exceptions
|
|
201
226
|
self._kill_process_and_its_children(process)
|
|
202
227
|
result = None
|
|
203
228
|
else:
|
|
204
229
|
# If there is no timeout limit, wait execution to finish
|
|
205
230
|
result = result_queue.get()
|
|
206
|
-
#
|
|
231
|
+
# Calculate the evaluate time
|
|
232
|
+
eval_time = time.time() - evaluate_start_time
|
|
233
|
+
# Terminate and kill all processes after evaluation
|
|
207
234
|
self._kill_process_and_its_children(process)
|
|
208
|
-
|
|
235
|
+
|
|
236
|
+
return (result, eval_time) if get_evaluate_time else result
|
|
209
237
|
except Exception as e:
|
|
210
238
|
if self.debug_mode:
|
|
211
|
-
|
|
239
|
+
traceback.print_exc()
|
|
212
240
|
return None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class PyEvaluatorForBigReturnedObject(PyEvaluator):
|
|
244
|
+
def __init__(
|
|
245
|
+
self,
|
|
246
|
+
exec_code: bool = True,
|
|
247
|
+
find_and_kill_children_evaluation_process: bool = False,
|
|
248
|
+
debug_mode: bool = False,
|
|
249
|
+
*,
|
|
250
|
+
join_timeout_seconds: int = 10
|
|
251
|
+
):
|
|
252
|
+
"""Evaluator interface for evaluating the Python algorithm program. Override this class and implement
|
|
253
|
+
'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
|
|
254
|
+
|
|
255
|
+
**Note:** This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
|
|
259
|
+
which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
|
|
260
|
+
evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
|
|
261
|
+
in 'self.evaluate_program()' will no longer be affective.
|
|
262
|
+
find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
|
|
263
|
+
when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
|
|
264
|
+
does not start new processes.
|
|
265
|
+
debug_mode: Debug mode.
|
|
266
|
+
join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
|
|
267
|
+
"""
|
|
268
|
+
super().__init__(
|
|
269
|
+
exec_code,
|
|
270
|
+
find_and_kill_children_evaluation_process,
|
|
271
|
+
debug_mode,
|
|
272
|
+
join_timeout_seconds=join_timeout_seconds
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
@abstractmethod
|
|
276
|
+
def evaluate_program(
|
|
277
|
+
self,
|
|
278
|
+
program_str: str,
|
|
279
|
+
callable_functions_dict: Dict[str, Callable] | None,
|
|
280
|
+
callable_functions_list: List[Callable] | None,
|
|
281
|
+
callable_classes_dict: Dict[str, Callable] | None,
|
|
282
|
+
callable_classes_list: List[Callable] | None,
|
|
283
|
+
**kwargs
|
|
284
|
+
) -> Any:
|
|
285
|
+
raise NotImplementedError(
|
|
286
|
+
'Must provide an evaluator for a python program. '
|
|
287
|
+
'Override this method in a subclass.'
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def _evaluate_and_put_res_in_manager_dict(
|
|
291
|
+
self,
|
|
292
|
+
program_str: str,
|
|
293
|
+
result_dict: multiprocessing.managers.DictProxy,
|
|
294
|
+
signal_queue: multiprocessing.Queue,
|
|
295
|
+
redirect_to_devnull: bool,
|
|
296
|
+
**kwargs
|
|
297
|
+
):
|
|
298
|
+
"""Evaluate and store result in Manager().dict() (for large results)."""
|
|
299
|
+
if redirect_to_devnull:
|
|
300
|
+
with open(os.devnull, 'w') as devnull:
|
|
301
|
+
os.dup2(devnull.fileno(), sys.stdout.fileno())
|
|
302
|
+
os.dup2(devnull.fileno(), sys.stderr.fileno())
|
|
303
|
+
try:
|
|
304
|
+
# Evaluate and get results
|
|
305
|
+
res = self.evaluate(program_str, **kwargs)
|
|
306
|
+
# Write results into dict
|
|
307
|
+
result_dict['result'] = res
|
|
308
|
+
# Put a signal to queue to inform the parent process the evaluation has done
|
|
309
|
+
signal_queue.put(('ok', None))
|
|
310
|
+
except Exception as e:
|
|
311
|
+
if self.debug_mode:
|
|
312
|
+
traceback.print_exc()
|
|
313
|
+
# Write results into dict
|
|
314
|
+
result_dict['result'] = None
|
|
315
|
+
# Put a signal to queue to inform the parent process the evaluation has terminated
|
|
316
|
+
signal_queue.put(('error', str(e)))
|
|
317
|
+
|
|
318
|
+
def secure_evaluate(
|
|
319
|
+
self,
|
|
320
|
+
program: str | PyProgram,
|
|
321
|
+
timeout_seconds: int | float = None,
|
|
322
|
+
redirect_to_devnull: bool = False,
|
|
323
|
+
multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
|
|
324
|
+
get_evaluate_time: bool = False,
|
|
325
|
+
**kwargs
|
|
326
|
+
):
|
|
327
|
+
"""Evaluate program in a new process. This enables timeout restriction and output redirection.
|
|
328
|
+
Args:
|
|
329
|
+
program: the program to be evaluated.
|
|
330
|
+
timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
|
|
331
|
+
redirect_to_devnull: redirect any output to '/dev/null'.
|
|
332
|
+
multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
|
|
333
|
+
the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
|
|
334
|
+
If set to 'default', there will be no changes to system default.
|
|
335
|
+
get_evaluate_time: get evaluation time for this program.
|
|
336
|
+
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
337
|
+
Returns:
|
|
338
|
+
Returns the evaluation results. If the 'get_evaluate_time' is True,
|
|
339
|
+
the return value will be (Results, Time).
|
|
340
|
+
"""
|
|
341
|
+
_set_mp_start_method(multiprocessing_start_method)
|
|
342
|
+
|
|
343
|
+
with multiprocessing.Manager() as manager:
|
|
344
|
+
# Path a dictionary to the evaluation process to get maybe very big return objects
|
|
345
|
+
result_dict = manager.dict()
|
|
346
|
+
# Pass a queue to the evaluation process to get signals whether the evaluation terminates
|
|
347
|
+
signal_queue = multiprocessing.Queue()
|
|
348
|
+
# Start evaluation process
|
|
349
|
+
process = multiprocessing.Process(
|
|
350
|
+
target=self._evaluate_and_put_res_in_manager_dict,
|
|
351
|
+
args=(str(program), result_dict, signal_queue, redirect_to_devnull),
|
|
352
|
+
kwargs=kwargs,
|
|
353
|
+
)
|
|
354
|
+
evaluate_start_time = time.time()
|
|
355
|
+
process.start()
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
if timeout_seconds is not None:
|
|
359
|
+
try:
|
|
360
|
+
# If there is timeout restriction, we try to get results before timeout
|
|
361
|
+
signal = signal_queue.get(timeout=timeout_seconds)
|
|
362
|
+
except Empty:
|
|
363
|
+
# Evaluation timeout happens, we return 'None' as well as the actual evaluate time
|
|
364
|
+
eval_time = time.time() - evaluate_start_time
|
|
365
|
+
if self.debug_mode:
|
|
366
|
+
print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
|
|
367
|
+
# Terminate and kill all processes after evaluation
|
|
368
|
+
self._kill_process_and_its_children(process)
|
|
369
|
+
return (None, eval_time) if get_evaluate_time else None
|
|
370
|
+
else:
|
|
371
|
+
# If there is no timeout restriction, we wait until the evaluation terminates
|
|
372
|
+
signal = signal_queue.get()
|
|
373
|
+
|
|
374
|
+
# Calculate evaluation time and kill children processes
|
|
375
|
+
eval_time = time.time() - evaluate_start_time
|
|
376
|
+
# Terminate and kill all processes after evaluation
|
|
377
|
+
self._kill_process_and_its_children(process)
|
|
378
|
+
|
|
379
|
+
# The first element is 'ok' indicates that the evaluation terminate without exceptions
|
|
380
|
+
if signal[0] == 'ok':
|
|
381
|
+
# We get the evaluation results from 'manager.dict'
|
|
382
|
+
result = result_dict.get('result', None)
|
|
383
|
+
else:
|
|
384
|
+
# The evaluation failed for some reason, so we set the result to 'None'
|
|
385
|
+
if self.debug_mode:
|
|
386
|
+
print(f'DEBUG: child process error: {signal[1]}')
|
|
387
|
+
result = None
|
|
388
|
+
except:
|
|
389
|
+
# If there is any exception during above procedure, we set the result to None
|
|
390
|
+
eval_time = time.time() - evaluate_start_time
|
|
391
|
+
if self.debug_mode:
|
|
392
|
+
print(f'DEBUG: exception in manager evaluate:\n{traceback.format_exc()}')
|
|
393
|
+
# Terminate and kill all processes after evaluation
|
|
394
|
+
self._kill_process_and_its_children(process)
|
|
395
|
+
result = None
|
|
396
|
+
|
|
397
|
+
return (result, eval_time) if get_evaluate_time else result
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class PyEvaluatorForBigReturnedObjectV2(PyEvaluator):
|
|
401
|
+
|
|
402
|
+
def __init__(
|
|
403
|
+
self,
|
|
404
|
+
exec_code: bool = True,
|
|
405
|
+
find_and_kill_children_evaluation_process: bool = False,
|
|
406
|
+
debug_mode: bool = False,
|
|
407
|
+
*,
|
|
408
|
+
join_timeout_seconds: int = 10
|
|
409
|
+
):
|
|
410
|
+
"""Evaluator interface for evaluating the Python algorithm program. Override this class and implement
|
|
411
|
+
'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
|
|
412
|
+
Note: This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
|
|
416
|
+
which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
|
|
417
|
+
evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
|
|
418
|
+
in 'self.evaluate_program()' will no longer be affective.
|
|
419
|
+
find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
|
|
420
|
+
when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
|
|
421
|
+
does not start new processes.
|
|
422
|
+
debug_mode: Debug mode.
|
|
423
|
+
join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
|
|
424
|
+
"""
|
|
425
|
+
super().__init__(
|
|
426
|
+
exec_code,
|
|
427
|
+
find_and_kill_children_evaluation_process,
|
|
428
|
+
debug_mode,
|
|
429
|
+
join_timeout_seconds=join_timeout_seconds
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
@abstractmethod
|
|
433
|
+
def evaluate_program(
|
|
434
|
+
self,
|
|
435
|
+
program_str: str,
|
|
436
|
+
callable_functions_dict: Dict[str, Callable] | None,
|
|
437
|
+
callable_functions_list: List[Callable] | None,
|
|
438
|
+
callable_classes_dict: Dict[str, Callable] | None,
|
|
439
|
+
callable_classes_list: List[Callable] | None,
|
|
440
|
+
**kwargs
|
|
441
|
+
) -> Any:
|
|
442
|
+
"""Evaluate a given program.
|
|
443
|
+
Args:
|
|
444
|
+
program_str: The raw program text.
|
|
445
|
+
callable_functions_dict: A dict maps function name to callable function.
|
|
446
|
+
callable_functions_list: A list of callable functions.
|
|
447
|
+
callable_classes_dict: A dict maps class name to callable class.
|
|
448
|
+
callable_classes_list: A list of callable classes.
|
|
449
|
+
Returns:
|
|
450
|
+
Returns the evaluation result.
|
|
451
|
+
"""
|
|
452
|
+
raise NotImplementedError(
|
|
453
|
+
'Must provide an evaluator for a python program. '
|
|
454
|
+
'Override this method in a subclass.'
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
def _evaluate_and_put_res_in_shared_memory(
|
|
458
|
+
self,
|
|
459
|
+
program_str: str,
|
|
460
|
+
meta_queue: multiprocessing.Queue,
|
|
461
|
+
redirect_to_devnull: bool,
|
|
462
|
+
**kwargs
|
|
463
|
+
):
|
|
464
|
+
"""Evaluate and store result in shared memory (for large results)."""
|
|
465
|
+
# Redirect STDOUT and STDERR to '/dev/null'
|
|
466
|
+
if redirect_to_devnull:
|
|
467
|
+
with open(os.devnull, 'w') as devnull:
|
|
468
|
+
os.dup2(devnull.fileno(), sys.stdout.fileno())
|
|
469
|
+
os.dup2(devnull.fileno(), sys.stderr.fileno())
|
|
470
|
+
|
|
471
|
+
# Evaluate and get results
|
|
472
|
+
res = self.evaluate(program_str, **kwargs)
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
# Dump the results to data
|
|
476
|
+
data = pickle.dumps(res, protocol=pickle.HIGHEST_PROTOCOL)
|
|
477
|
+
# Create shared memory with the size of data
|
|
478
|
+
shm = shared_memory.SharedMemory(create=True, size=len(data))
|
|
479
|
+
# Write data
|
|
480
|
+
shm.buf[:len(data)] = data
|
|
481
|
+
# Send back shm metadata (shared_mem_name, shared_mem_size) and put them into the queue
|
|
482
|
+
meta_queue.put((shm.name, len(data)))
|
|
483
|
+
# Child closes its handle
|
|
484
|
+
shm.close()
|
|
485
|
+
except Exception as data_pickle_error:
|
|
486
|
+
# Put the exception message to the queue
|
|
487
|
+
meta_queue.put((None, str(data_pickle_error)))
|
|
488
|
+
|
|
489
|
+
def secure_evaluate(
|
|
490
|
+
self,
|
|
491
|
+
program: str | PyProgram,
|
|
492
|
+
timeout_seconds: int | float = None,
|
|
493
|
+
redirect_to_devnull: bool = False,
|
|
494
|
+
multiprocessing_start_method: str = 'auto',
|
|
495
|
+
get_evaluate_time: bool = False,
|
|
496
|
+
**kwargs
|
|
497
|
+
):
|
|
498
|
+
"""Evaluate program in a new process. This enables timeout restriction and output redirection.
|
|
499
|
+
Args:
|
|
500
|
+
program: the program to be evaluated.
|
|
501
|
+
timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
|
|
502
|
+
redirect_to_devnull: redirect any output to '/dev/null'.
|
|
503
|
+
multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
|
|
504
|
+
the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
|
|
505
|
+
If set to 'default', there will be no changes to system default.
|
|
506
|
+
get_evaluate_time: get evaluation time for this program.
|
|
507
|
+
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
508
|
+
Returns:
|
|
509
|
+
Returns the evaluation results. If the 'get_evaluate_time' is True,
|
|
510
|
+
the return value will be (Results, Time).
|
|
511
|
+
"""
|
|
512
|
+
if multiprocessing_start_method == 'auto':
|
|
513
|
+
if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
|
|
514
|
+
multiprocessing.set_start_method('fork', force=True)
|
|
515
|
+
elif multiprocessing_start_method == 'fork':
|
|
516
|
+
multiprocessing.set_start_method('fork', force=True)
|
|
517
|
+
elif multiprocessing_start_method == 'spawn':
|
|
518
|
+
multiprocessing.set_start_method('spawn', force=True)
|
|
519
|
+
|
|
520
|
+
meta_queue = multiprocessing.Queue()
|
|
521
|
+
|
|
522
|
+
process = multiprocessing.Process(
|
|
523
|
+
target=self._evaluate_and_put_res_in_shared_memory,
|
|
524
|
+
args=(str(program), meta_queue, redirect_to_devnull),
|
|
525
|
+
kwargs=kwargs,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
evaluate_start_time = time.time()
|
|
529
|
+
process.start()
|
|
530
|
+
|
|
531
|
+
try:
|
|
532
|
+
if timeout_seconds is not None:
|
|
533
|
+
try:
|
|
534
|
+
# Try to get the metadata before timeout
|
|
535
|
+
meta = meta_queue.get(timeout=timeout_seconds)
|
|
536
|
+
except Empty:
|
|
537
|
+
# Evaluate timeout
|
|
538
|
+
eval_time = time.time() - evaluate_start_time
|
|
539
|
+
if self.debug_mode:
|
|
540
|
+
print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
|
|
541
|
+
self._kill_process_and_its_children(process)
|
|
542
|
+
return (None, eval_time) if get_evaluate_time else None
|
|
543
|
+
else:
|
|
544
|
+
meta = meta_queue.get()
|
|
545
|
+
|
|
546
|
+
# Calculate evaluation time
|
|
547
|
+
eval_time = time.time() - evaluate_start_time
|
|
548
|
+
self._kill_process_and_its_children(process)
|
|
549
|
+
|
|
550
|
+
# If the first element in the queue is None,
|
|
551
|
+
# it means that the shared memory raises exceptions
|
|
552
|
+
if meta[0] is None:
|
|
553
|
+
if self.debug_mode:
|
|
554
|
+
print(f'DEBUG: shared memory failed with exception: {meta[1]}')
|
|
555
|
+
result = None
|
|
556
|
+
else:
|
|
557
|
+
# Read results from metadata
|
|
558
|
+
shm_name, size = meta
|
|
559
|
+
shm = shared_memory.SharedMemory(name=shm_name)
|
|
560
|
+
buf = bytes(shm.buf[:size])
|
|
561
|
+
# Load results from buffer
|
|
562
|
+
result = pickle.loads(buf)
|
|
563
|
+
shm.close()
|
|
564
|
+
try:
|
|
565
|
+
shm.unlink()
|
|
566
|
+
except FileNotFoundError:
|
|
567
|
+
pass
|
|
568
|
+
except Exception:
|
|
569
|
+
eval_time = time.time() - evaluate_start_time
|
|
570
|
+
if self.debug_mode:
|
|
571
|
+
print(f'DEBUG: exception in shared evaluate:\n{traceback.format_exc()}')
|
|
572
|
+
self._kill_process_and_its_children(process)
|
|
573
|
+
result = None
|
|
574
|
+
|
|
575
|
+
return (result, eval_time) if get_evaluate_time else result
|
adtools/evaluator_pool.py
CHANGED
|
@@ -62,21 +62,23 @@ class EvaluatorExecutorPool:
|
|
|
62
62
|
program: the program to be evaluated.
|
|
63
63
|
timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
|
|
64
64
|
redirect_to_devnull: redirect any output to '/dev/null'.
|
|
65
|
-
multiprocessing_start_method: start a process using 'fork' or 'spawn'.
|
|
65
|
+
multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
|
|
66
|
+
the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
|
|
67
|
+
If set to 'default', there will be no changes to system default.
|
|
68
|
+
return_time: get evaluation time for this program.
|
|
66
69
|
**kwargs: additional keyword arguments to pass to 'evaluate_program'.
|
|
70
|
+
Returns:
|
|
71
|
+
Returns the evaluation results. If the 'get_evaluate_time' is True,
|
|
72
|
+
the return value will be (Results, Time).
|
|
67
73
|
"""
|
|
68
|
-
start_time = time.time()
|
|
69
74
|
future = self.pool.submit(
|
|
70
75
|
self.evaluator.secure_evaluate,
|
|
71
76
|
program,
|
|
72
77
|
timeout_seconds,
|
|
73
78
|
redirect_to_devnull,
|
|
74
79
|
multiprocessing_start_method,
|
|
80
|
+
return_time,
|
|
75
81
|
**kwargs
|
|
76
82
|
)
|
|
77
83
|
res = future.result()
|
|
78
|
-
|
|
79
|
-
if return_time:
|
|
80
|
-
return res, duration
|
|
81
|
-
else:
|
|
82
|
-
return res
|
|
84
|
+
return res
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
adtools/__init__.py,sha256=2XOq8zj8mpejJFavtARmst_hih6qAtBPKBoNB0UWeqs,183
|
|
2
|
+
adtools/evaluator.py,sha256=v-tdE5emOa7o6615YN2Av7fuh5W5hIlHoSjc-rTO1uU,26334
|
|
3
|
+
adtools/evaluator_pool.py,sha256=zfQ7DgAjpByqPM5SE1tRQT_HGXU5uwNntvguzrXaPEk,3258
|
|
4
|
+
adtools/py_code.py,sha256=FZfkp-IZ4zpOjrWe6svKNJsQhVANaTTkE0l0mc4aMW8,14277
|
|
5
|
+
adtools/lm/__init__.py,sha256=PZf5Lraly9rAWz-cxOSLCvZ9OZ4EI8aQEluetvNX8LM,146
|
|
6
|
+
adtools/lm/lm_base.py,sha256=KtO7KTrrMW7oWN-BhncoIOsbOVQsSc-0gNCYtvR6Sog,1105
|
|
7
|
+
adtools/lm/openai_api.py,sha256=LcfLkNOBrJTdsp0zcUjaCelIcQK5XknpHWrlB0S67_k,2390
|
|
8
|
+
adtools/lm/vllm_server.py,sha256=BPZoTS77wNJDcJ_0FO2QFyZTf6WR0isYKMuTctqKEU8,12942
|
|
9
|
+
py_adtools-0.1.9.dist-info/licenses/LICENSE,sha256=E5GGyecx3y5h2gcEGQloF-rDY9wbaef5IHjRsvtFbt8,1065
|
|
10
|
+
py_adtools-0.1.9.dist-info/METADATA,sha256=Fnu9fmNWBgI8l4nuvrIwYHXGUu4vweTH8b_uziAB4sM,6386
|
|
11
|
+
py_adtools-0.1.9.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
12
|
+
py_adtools-0.1.9.dist-info/top_level.txt,sha256=X2kKzmJFDAKR2FWCij5pfMG9pVVjVUomyl4e-1VLXIk,8
|
|
13
|
+
py_adtools-0.1.9.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
adtools/__init__.py,sha256=kbxntZFeCcURiIypNOdMWyeKPdlzRsWOB-K7z6HNCsc,150
|
|
2
|
-
adtools/evaluator.py,sha256=weA6zR1WyUE3f5pt7wQYF1ukmkA-e2kDLaogbDmG_Ig,9154
|
|
3
|
-
adtools/evaluator_pool.py,sha256=v_NZibN4VI3STVUZt6ARdyoB4Z061xAefZlH8lkWsjE,2972
|
|
4
|
-
adtools/py_code.py,sha256=FZfkp-IZ4zpOjrWe6svKNJsQhVANaTTkE0l0mc4aMW8,14277
|
|
5
|
-
adtools/lm/__init__.py,sha256=PZf5Lraly9rAWz-cxOSLCvZ9OZ4EI8aQEluetvNX8LM,146
|
|
6
|
-
adtools/lm/lm_base.py,sha256=KtO7KTrrMW7oWN-BhncoIOsbOVQsSc-0gNCYtvR6Sog,1105
|
|
7
|
-
adtools/lm/openai_api.py,sha256=LcfLkNOBrJTdsp0zcUjaCelIcQK5XknpHWrlB0S67_k,2390
|
|
8
|
-
adtools/lm/vllm_server.py,sha256=BPZoTS77wNJDcJ_0FO2QFyZTf6WR0isYKMuTctqKEU8,12942
|
|
9
|
-
py_adtools-0.1.7.dist-info/licenses/LICENSE,sha256=E5GGyecx3y5h2gcEGQloF-rDY9wbaef5IHjRsvtFbt8,1065
|
|
10
|
-
py_adtools-0.1.7.dist-info/METADATA,sha256=952zQgrdHcpBYGmnYqfwlVFfMubvvSYixg-weIf4eWU,6386
|
|
11
|
-
py_adtools-0.1.7.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
12
|
-
py_adtools-0.1.7.dist-info/top_level.txt,sha256=X2kKzmJFDAKR2FWCij5pfMG9pVVjVUomyl4e-1VLXIk,8
|
|
13
|
-
py_adtools-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|