py-adtools 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of py-adtools might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -1,3 +1,3 @@
1
1
  from .py_code import PyCodeBlock, PyFunction, PyClass, PyProgram
2
- from .evaluator import PyEvaluator
2
+ from .evaluator import PyEvaluator, PyEvaluatorForBigReturnedObject
3
3
  from .evaluator_pool import EvaluatorExecutorPool
@@ -0,0 +1,575 @@
1
+ """
2
+ Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
3
+
4
+ NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
5
+ Commercial use of this software or its derivatives requires prior written permission.
6
+ """
7
+
8
+ import multiprocessing
9
+ import os
10
+ import pickle
11
+ import sys
12
+ import time
13
+ from abc import ABC, abstractmethod
14
+ from multiprocessing import shared_memory
15
+ from queue import Empty
16
+ from typing import Any, Literal, Dict, Callable, List, Tuple
17
+ import multiprocessing.managers
18
+ import psutil
19
+ import traceback
20
+
21
+ from .py_code import PyProgram
22
+
23
+ __all__ = ['PyEvaluator', 'PyEvaluatorForBigReturnedObject']
24
+
25
+
26
+ def _set_mp_start_method(multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn']):
27
+ if multiprocessing_start_method == 'auto':
28
+ # Force macOS and Linux use 'fork' to generate new process
29
+ if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
30
+ multiprocessing.set_start_method('fork', force=True)
31
+ elif multiprocessing_start_method == 'fork':
32
+ multiprocessing.set_start_method('fork', force=True)
33
+ elif multiprocessing_start_method == 'spawn':
34
+ multiprocessing.set_start_method('spawn', force=True)
35
+
36
+
37
+ class PyEvaluator(ABC):
38
+
39
+ def __init__(
40
+ self,
41
+ exec_code: bool = True,
42
+ find_and_kill_children_evaluation_process: bool = False,
43
+ debug_mode: bool = False,
44
+ *,
45
+ join_timeout_seconds: int = 10
46
+ ):
47
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
48
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
49
+ Args:
50
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
51
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
52
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
53
+ in 'self.evaluate_program()' will no longer be affective.
54
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
55
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
56
+ does not start new processes.
57
+ debug_mode: Debug mode.
58
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
59
+ """
60
+ self.debug_mode = debug_mode
61
+ self.exec_code = exec_code
62
+ self.find_and_kill_children_evaluation_process = find_and_kill_children_evaluation_process
63
+ self.join_timeout_seconds = join_timeout_seconds
64
+
65
+ @abstractmethod
66
+ def evaluate_program(
67
+ self,
68
+ program_str: str,
69
+ callable_functions_dict: Dict[str, Callable] | None,
70
+ callable_functions_list: List[Callable] | None,
71
+ callable_classes_dict: Dict[str, Callable] | None,
72
+ callable_classes_list: List[Callable] | None,
73
+ **kwargs
74
+ ) -> Any:
75
+ """Evaluate a given program.
76
+ Args:
77
+ program_str: The raw program text.
78
+ callable_functions_dict: A dict maps function name to callable function.
79
+ callable_functions_list: A list of callable functions.
80
+ callable_classes_dict: A dict maps class name to callable class.
81
+ callable_classes_list: A list of callable classes.
82
+ Returns:
83
+ Returns the evaluation result.
84
+ """
85
+ raise NotImplementedError(
86
+ 'Must provide an evaluator for a python program. '
87
+ 'Override this method in a subclass.'
88
+ )
89
+
90
+ def _kill_process_and_its_children(self, process: multiprocessing.Process):
91
+ if self.find_and_kill_children_evaluation_process:
92
+ # Find all children processes
93
+ try:
94
+ parent = psutil.Process(process.pid)
95
+ children_processes = parent.children(recursive=True)
96
+ except psutil.NoSuchProcess:
97
+ children_processes = []
98
+ else:
99
+ children_processes = []
100
+ # Terminate parent process
101
+ process.terminate()
102
+ process.join(timeout=self.join_timeout_seconds)
103
+ if process.is_alive():
104
+ process.kill()
105
+ process.join()
106
+ # Kill all children processes
107
+ for child in children_processes:
108
+ if self.debug_mode:
109
+ print(f"Killing process {process.pid}'s children process {child.pid}")
110
+ child.terminate()
111
+
112
+ def evaluate(self, program: str | PyProgram, **kwargs):
113
+ """Evaluate a program.
114
+ Args:
115
+ program: the program to be evaluated.
116
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
117
+ """
118
+ # Parse to program instance
119
+ if isinstance(program, str):
120
+ program = PyProgram.from_text(program)
121
+ function_names = [f.name for f in program.functions]
122
+ class_names = [c.name for c in program.classes]
123
+
124
+ # Execute the code and get callable instances
125
+ if self.exec_code:
126
+ all_globals_namespace = {}
127
+ # Execute the program, map func/var/class to global namespace
128
+ exec(str(program), all_globals_namespace)
129
+ # Get callable functions
130
+ callable_funcs_list = [all_globals_namespace[f_name] for f_name in function_names]
131
+ callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
132
+ # Get callable classes
133
+ callable_cls_list = [all_globals_namespace[c_name] for c_name in class_names]
134
+ callable_cls_dict = dict(zip(class_names, callable_cls_list))
135
+ else:
136
+ callable_funcs_list, callable_funcs_dict, callable_cls_list, callable_cls_dict = (
137
+ None, None, None, None
138
+ )
139
+
140
+ # Get evaluate result
141
+ res = self.evaluate_program(
142
+ str(program),
143
+ callable_funcs_dict,
144
+ callable_funcs_list,
145
+ callable_cls_dict,
146
+ callable_cls_list,
147
+ **kwargs
148
+ )
149
+ return res
150
+
151
+ def _evaluate_in_safe_process(
152
+ self,
153
+ program_str: str,
154
+ result_queue: multiprocessing.Queue,
155
+ redirect_to_devnull: bool,
156
+ **kwargs
157
+ ):
158
+ # Redirect STDOUT and STDERR to '/dev/null'
159
+ if redirect_to_devnull:
160
+ with open(os.devnull, 'w') as devnull:
161
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
162
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
163
+
164
+ # Evaluate and put the results to the queue
165
+ res = self.evaluate(program_str, **kwargs)
166
+ result_queue.put(res)
167
+
168
+ def secure_evaluate(
169
+ self,
170
+ program: str | PyProgram,
171
+ timeout_seconds: int | float = None,
172
+ redirect_to_devnull: bool = False,
173
+ multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
174
+ get_evaluate_time=False,
175
+ **kwargs
176
+ ) -> Any | Tuple[Any, float]:
177
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
178
+ Args:
179
+ program: the program to be evaluated.
180
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
181
+ redirect_to_devnull: redirect any output to '/dev/null'.
182
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
183
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
184
+ If set to 'default', there will be no changes to system default.
185
+ get_evaluate_time: get evaluation time for this program.
186
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
187
+ Returns:
188
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
189
+ the return value will be (Results, Time).
190
+ """
191
+ _set_mp_start_method(multiprocessing_start_method)
192
+
193
+ try:
194
+ # Start evaluation process
195
+ result_queue = multiprocessing.Queue()
196
+ process = multiprocessing.Process(
197
+ target=self._evaluate_in_safe_process,
198
+ args=(str(program), result_queue, redirect_to_devnull),
199
+ kwargs=kwargs,
200
+ )
201
+ evaluate_start_time = time.time()
202
+ process.start()
203
+
204
+ if timeout_seconds is not None:
205
+ try:
206
+ # Get the result in timeout seconds
207
+ result = result_queue.get(timeout=timeout_seconds)
208
+ # Calculate the evaluate time
209
+ eval_time = time.time() - evaluate_start_time
210
+ # After getting the result, terminate and kill the process
211
+ self._kill_process_and_its_children(process)
212
+ except Empty: # The queue is empty indicates a timeout
213
+ # Calculate the evaluate time
214
+ eval_time = time.time() - evaluate_start_time
215
+ if self.debug_mode:
216
+ print(f'DEBUG: the evaluation time exceeds {timeout_seconds}s.')
217
+ # Terminate and kill all processes if timeout happens
218
+ self._kill_process_and_its_children(process)
219
+ result = None
220
+ except Exception as e:
221
+ # Calculate the evaluate time
222
+ eval_time = time.time() - evaluate_start_time
223
+ if self.debug_mode:
224
+ print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
225
+ # Terminate and kill all processes if meet exceptions
226
+ self._kill_process_and_its_children(process)
227
+ result = None
228
+ else:
229
+ # If there is no timeout limit, wait execution to finish
230
+ result = result_queue.get()
231
+ # Calculate the evaluate time
232
+ eval_time = time.time() - evaluate_start_time
233
+ # Terminate and kill all processes after evaluation
234
+ self._kill_process_and_its_children(process)
235
+
236
+ return (result, eval_time) if get_evaluate_time else result
237
+ except Exception as e:
238
+ if self.debug_mode:
239
+ traceback.print_exc()
240
+ return None
241
+
242
+
243
+ class PyEvaluatorForBigReturnedObject(PyEvaluator):
244
+ def __init__(
245
+ self,
246
+ exec_code: bool = True,
247
+ find_and_kill_children_evaluation_process: bool = False,
248
+ debug_mode: bool = False,
249
+ *,
250
+ join_timeout_seconds: int = 10
251
+ ):
252
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
253
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
254
+
255
+ **Note:** This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
256
+
257
+ Args:
258
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
259
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
260
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
261
+ in 'self.evaluate_program()' will no longer be affective.
262
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
263
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
264
+ does not start new processes.
265
+ debug_mode: Debug mode.
266
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
267
+ """
268
+ super().__init__(
269
+ exec_code,
270
+ find_and_kill_children_evaluation_process,
271
+ debug_mode,
272
+ join_timeout_seconds=join_timeout_seconds
273
+ )
274
+
275
+ @abstractmethod
276
+ def evaluate_program(
277
+ self,
278
+ program_str: str,
279
+ callable_functions_dict: Dict[str, Callable] | None,
280
+ callable_functions_list: List[Callable] | None,
281
+ callable_classes_dict: Dict[str, Callable] | None,
282
+ callable_classes_list: List[Callable] | None,
283
+ **kwargs
284
+ ) -> Any:
285
+ raise NotImplementedError(
286
+ 'Must provide an evaluator for a python program. '
287
+ 'Override this method in a subclass.'
288
+ )
289
+
290
+ def _evaluate_and_put_res_in_manager_dict(
291
+ self,
292
+ program_str: str,
293
+ result_dict: multiprocessing.managers.DictProxy,
294
+ signal_queue: multiprocessing.Queue,
295
+ redirect_to_devnull: bool,
296
+ **kwargs
297
+ ):
298
+ """Evaluate and store result in Manager().dict() (for large results)."""
299
+ if redirect_to_devnull:
300
+ with open(os.devnull, 'w') as devnull:
301
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
302
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
303
+ try:
304
+ # Evaluate and get results
305
+ res = self.evaluate(program_str, **kwargs)
306
+ # Write results into dict
307
+ result_dict['result'] = res
308
+ # Put a signal to queue to inform the parent process the evaluation has done
309
+ signal_queue.put(('ok', None))
310
+ except Exception as e:
311
+ if self.debug_mode:
312
+ traceback.print_exc()
313
+ # Write results into dict
314
+ result_dict['result'] = None
315
+ # Put a signal to queue to inform the parent process the evaluation has terminated
316
+ signal_queue.put(('error', str(e)))
317
+
318
+ def secure_evaluate(
319
+ self,
320
+ program: str | PyProgram,
321
+ timeout_seconds: int | float = None,
322
+ redirect_to_devnull: bool = False,
323
+ multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
324
+ get_evaluate_time: bool = False,
325
+ **kwargs
326
+ ):
327
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
328
+ Args:
329
+ program: the program to be evaluated.
330
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
331
+ redirect_to_devnull: redirect any output to '/dev/null'.
332
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
333
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
334
+ If set to 'default', there will be no changes to system default.
335
+ get_evaluate_time: get evaluation time for this program.
336
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
337
+ Returns:
338
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
339
+ the return value will be (Results, Time).
340
+ """
341
+ _set_mp_start_method(multiprocessing_start_method)
342
+
343
+ with multiprocessing.Manager() as manager:
344
+ # Path a dictionary to the evaluation process to get maybe very big return objects
345
+ result_dict = manager.dict()
346
+ # Pass a queue to the evaluation process to get signals whether the evaluation terminates
347
+ signal_queue = multiprocessing.Queue()
348
+ # Start evaluation process
349
+ process = multiprocessing.Process(
350
+ target=self._evaluate_and_put_res_in_manager_dict,
351
+ args=(str(program), result_dict, signal_queue, redirect_to_devnull),
352
+ kwargs=kwargs,
353
+ )
354
+ evaluate_start_time = time.time()
355
+ process.start()
356
+
357
+ try:
358
+ if timeout_seconds is not None:
359
+ try:
360
+ # If there is timeout restriction, we try to get results before timeout
361
+ signal = signal_queue.get(timeout=timeout_seconds)
362
+ except Empty:
363
+ # Evaluation timeout happens, we return 'None' as well as the actual evaluate time
364
+ eval_time = time.time() - evaluate_start_time
365
+ if self.debug_mode:
366
+ print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
367
+ # Terminate and kill all processes after evaluation
368
+ self._kill_process_and_its_children(process)
369
+ return (None, eval_time) if get_evaluate_time else None
370
+ else:
371
+ # If there is no timeout restriction, we wait until the evaluation terminates
372
+ signal = signal_queue.get()
373
+
374
+ # Calculate evaluation time and kill children processes
375
+ eval_time = time.time() - evaluate_start_time
376
+ # Terminate and kill all processes after evaluation
377
+ self._kill_process_and_its_children(process)
378
+
379
+ # The first element is 'ok' indicates that the evaluation terminate without exceptions
380
+ if signal[0] == 'ok':
381
+ # We get the evaluation results from 'manager.dict'
382
+ result = result_dict.get('result', None)
383
+ else:
384
+ # The evaluation failed for some reason, so we set the result to 'None'
385
+ if self.debug_mode:
386
+ print(f'DEBUG: child process error: {signal[1]}')
387
+ result = None
388
+ except:
389
+ # If there is any exception during above procedure, we set the result to None
390
+ eval_time = time.time() - evaluate_start_time
391
+ if self.debug_mode:
392
+ print(f'DEBUG: exception in manager evaluate:\n{traceback.format_exc()}')
393
+ # Terminate and kill all processes after evaluation
394
+ self._kill_process_and_its_children(process)
395
+ result = None
396
+
397
+ return (result, eval_time) if get_evaluate_time else result
398
+
399
+
400
+ class PyEvaluatorForBigReturnedObjectV2(PyEvaluator):
401
+
402
+ def __init__(
403
+ self,
404
+ exec_code: bool = True,
405
+ find_and_kill_children_evaluation_process: bool = False,
406
+ debug_mode: bool = False,
407
+ *,
408
+ join_timeout_seconds: int = 10
409
+ ):
410
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
411
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
412
+ Note: This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
413
+
414
+ Args:
415
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
416
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
417
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
418
+ in 'self.evaluate_program()' will no longer be affective.
419
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
420
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
421
+ does not start new processes.
422
+ debug_mode: Debug mode.
423
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
424
+ """
425
+ super().__init__(
426
+ exec_code,
427
+ find_and_kill_children_evaluation_process,
428
+ debug_mode,
429
+ join_timeout_seconds=join_timeout_seconds
430
+ )
431
+
432
+ @abstractmethod
433
+ def evaluate_program(
434
+ self,
435
+ program_str: str,
436
+ callable_functions_dict: Dict[str, Callable] | None,
437
+ callable_functions_list: List[Callable] | None,
438
+ callable_classes_dict: Dict[str, Callable] | None,
439
+ callable_classes_list: List[Callable] | None,
440
+ **kwargs
441
+ ) -> Any:
442
+ """Evaluate a given program.
443
+ Args:
444
+ program_str: The raw program text.
445
+ callable_functions_dict: A dict maps function name to callable function.
446
+ callable_functions_list: A list of callable functions.
447
+ callable_classes_dict: A dict maps class name to callable class.
448
+ callable_classes_list: A list of callable classes.
449
+ Returns:
450
+ Returns the evaluation result.
451
+ """
452
+ raise NotImplementedError(
453
+ 'Must provide an evaluator for a python program. '
454
+ 'Override this method in a subclass.'
455
+ )
456
+
457
+ def _evaluate_and_put_res_in_shared_memory(
458
+ self,
459
+ program_str: str,
460
+ meta_queue: multiprocessing.Queue,
461
+ redirect_to_devnull: bool,
462
+ **kwargs
463
+ ):
464
+ """Evaluate and store result in shared memory (for large results)."""
465
+ # Redirect STDOUT and STDERR to '/dev/null'
466
+ if redirect_to_devnull:
467
+ with open(os.devnull, 'w') as devnull:
468
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
469
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
470
+
471
+ # Evaluate and get results
472
+ res = self.evaluate(program_str, **kwargs)
473
+
474
+ try:
475
+ # Dump the results to data
476
+ data = pickle.dumps(res, protocol=pickle.HIGHEST_PROTOCOL)
477
+ # Create shared memory with the size of data
478
+ shm = shared_memory.SharedMemory(create=True, size=len(data))
479
+ # Write data
480
+ shm.buf[:len(data)] = data
481
+ # Send back shm metadata (shared_mem_name, shared_mem_size) and put them into the queue
482
+ meta_queue.put((shm.name, len(data)))
483
+ # Child closes its handle
484
+ shm.close()
485
+ except Exception as data_pickle_error:
486
+ # Put the exception message to the queue
487
+ meta_queue.put((None, str(data_pickle_error)))
488
+
489
+ def secure_evaluate(
490
+ self,
491
+ program: str | PyProgram,
492
+ timeout_seconds: int | float = None,
493
+ redirect_to_devnull: bool = False,
494
+ multiprocessing_start_method: str = 'auto',
495
+ get_evaluate_time: bool = False,
496
+ **kwargs
497
+ ):
498
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
499
+ Args:
500
+ program: the program to be evaluated.
501
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
502
+ redirect_to_devnull: redirect any output to '/dev/null'.
503
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
504
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
505
+ If set to 'default', there will be no changes to system default.
506
+ get_evaluate_time: get evaluation time for this program.
507
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
508
+ Returns:
509
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
510
+ the return value will be (Results, Time).
511
+ """
512
+ if multiprocessing_start_method == 'auto':
513
+ if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
514
+ multiprocessing.set_start_method('fork', force=True)
515
+ elif multiprocessing_start_method == 'fork':
516
+ multiprocessing.set_start_method('fork', force=True)
517
+ elif multiprocessing_start_method == 'spawn':
518
+ multiprocessing.set_start_method('spawn', force=True)
519
+
520
+ meta_queue = multiprocessing.Queue()
521
+
522
+ process = multiprocessing.Process(
523
+ target=self._evaluate_and_put_res_in_shared_memory,
524
+ args=(str(program), meta_queue, redirect_to_devnull),
525
+ kwargs=kwargs,
526
+ )
527
+
528
+ evaluate_start_time = time.time()
529
+ process.start()
530
+
531
+ try:
532
+ if timeout_seconds is not None:
533
+ try:
534
+ # Try to get the metadata before timeout
535
+ meta = meta_queue.get(timeout=timeout_seconds)
536
+ except Empty:
537
+ # Evaluate timeout
538
+ eval_time = time.time() - evaluate_start_time
539
+ if self.debug_mode:
540
+ print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
541
+ self._kill_process_and_its_children(process)
542
+ return (None, eval_time) if get_evaluate_time else None
543
+ else:
544
+ meta = meta_queue.get()
545
+
546
+ # Calculate evaluation time
547
+ eval_time = time.time() - evaluate_start_time
548
+ self._kill_process_and_its_children(process)
549
+
550
+ # If the first element in the queue is None,
551
+ # it means that the shared memory raises exceptions
552
+ if meta[0] is None:
553
+ if self.debug_mode:
554
+ print(f'DEBUG: shared memory failed with exception: {meta[1]}')
555
+ result = None
556
+ else:
557
+ # Read results from metadata
558
+ shm_name, size = meta
559
+ shm = shared_memory.SharedMemory(name=shm_name)
560
+ buf = bytes(shm.buf[:size])
561
+ # Load results from buffer
562
+ result = pickle.loads(buf)
563
+ shm.close()
564
+ try:
565
+ shm.unlink()
566
+ except FileNotFoundError:
567
+ pass
568
+ except Exception:
569
+ eval_time = time.time() - evaluate_start_time
570
+ if self.debug_mode:
571
+ print(f'DEBUG: exception in shared evaluate:\n{traceback.format_exc()}')
572
+ self._kill_process_and_its_children(process)
573
+ result = None
574
+
575
+ return (result, eval_time) if get_evaluate_time else result
@@ -62,21 +62,23 @@ class EvaluatorExecutorPool:
62
62
  program: the program to be evaluated.
63
63
  timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
64
64
  redirect_to_devnull: redirect any output to '/dev/null'.
65
- multiprocessing_start_method: start a process using 'fork' or 'spawn'.
65
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
66
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
67
+ If set to 'default', there will be no changes to system default.
68
+ return_time: get evaluation time for this program.
66
69
  **kwargs: additional keyword arguments to pass to 'evaluate_program'.
70
+ Returns:
71
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
72
+ the return value will be (Results, Time).
67
73
  """
68
- start_time = time.time()
69
74
  future = self.pool.submit(
70
75
  self.evaluator.secure_evaluate,
71
76
  program,
72
77
  timeout_seconds,
73
78
  redirect_to_devnull,
74
79
  multiprocessing_start_method,
80
+ return_time,
75
81
  **kwargs
76
82
  )
77
83
  res = future.result()
78
- duration = time.time() - start_time
79
- if return_time:
80
- return res, duration
81
- else:
82
- return res
84
+ return res
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -5,7 +5,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
5
5
 
6
6
  setup(
7
7
  name='py-adtools',
8
- version='0.1.7',
8
+ version='0.1.9',
9
9
  author='Rui Zhang',
10
10
  author_email='rzhang.cs@gmail.com',
11
11
  description='Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.',
@@ -1,212 +0,0 @@
1
- """
2
- Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
3
-
4
- NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
5
- Commercial use of this software or its derivatives requires prior written permission.
6
- """
7
-
8
- import multiprocessing
9
- import os
10
- import sys
11
- from abc import ABC, abstractmethod
12
- from queue import Empty
13
- from typing import Any, Literal, Dict, Callable, List
14
- import psutil
15
- import traceback
16
-
17
- from .py_code import PyProgram
18
-
19
-
20
- class PyEvaluator(ABC):
21
-
22
- def __init__(
23
- self,
24
- exec_code: bool = True,
25
- debug_mode: bool = False,
26
- *,
27
- join_timeout_seconds: int = 10
28
- ):
29
- """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
30
- 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
31
- Args:
32
- exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
33
- which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
34
- evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
35
- in 'self.evaluate_program()' will no longer be affective.
36
- debug_mode: Debug mode.
37
- join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
38
- """
39
- self.debug_mode = debug_mode
40
- self.exec_code = exec_code
41
- self.join_timeout_seconds = join_timeout_seconds
42
-
43
- @abstractmethod
44
- def evaluate_program(
45
- self,
46
- program_str: str,
47
- callable_functions_dict: Dict[str, Callable] | None,
48
- callable_functions_list: List[Callable] | None,
49
- callable_classes_dict: Dict[str, Callable] | None,
50
- callable_classes_list: List[Callable] | None,
51
- **kwargs
52
- ) -> Any:
53
- """Evaluate a given program.
54
- Args:
55
- program_str: The raw program text.
56
- callable_functions_dict: A dict maps function name to callable function.
57
- callable_functions_list: A list of callable functions.
58
- callable_classes_dict: A dict maps class name to callable class.
59
- callable_classes_list: A list of callable classes.
60
- Returns:
61
- Returns the evaluation result.
62
- """
63
- raise NotImplementedError(
64
- 'Must provide an evaluator for a python program. '
65
- 'Override this method in a subclass.'
66
- )
67
-
68
- def _kill_process_and_its_children(self, process: multiprocessing.Process):
69
- # Find all children processes
70
- try:
71
- parent = psutil.Process(process.pid)
72
- children_processes = parent.children(recursive=True)
73
- except psutil.NoSuchProcess:
74
- children_processes = []
75
- # Terminate parent process
76
- process.terminate()
77
- process.join(timeout=self.join_timeout_seconds)
78
- if process.is_alive():
79
- process.kill()
80
- process.join()
81
- # Kill all children processes
82
- for child in children_processes:
83
- if self.debug_mode:
84
- print(f"Killing process {process.pid}'s children process {child.pid}")
85
- child.terminate()
86
-
87
- def evaluate(self, program: str | PyProgram, **kwargs):
88
- """Evaluate a program.
89
- Args:
90
- program: the program to be evaluated.
91
- **kwargs: additional keyword arguments to pass to 'evaluate_program'.
92
- """
93
- try:
94
- # Parse to program instance
95
- if isinstance(program, str):
96
- program = PyProgram.from_text(program)
97
- function_names = [f.name for f in program.functions]
98
- class_names = [c.name for c in program.classes]
99
-
100
- # Execute the code and get callable instances
101
- if self.exec_code:
102
- all_globals_namespace = {}
103
- # Execute the program, map func/var/class to global namespace
104
- exec(str(program), all_globals_namespace)
105
- # Get callable functions
106
- callable_funcs_list = [all_globals_namespace[f_name] for f_name in function_names]
107
- callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
108
- # Get callable classes
109
- callable_cls_list = [all_globals_namespace[c_name] for c_name in class_names]
110
- callable_cls_dict = dict(zip(class_names, callable_cls_list))
111
- else:
112
- callable_funcs_list, callable_funcs_dict, callable_cls_list, callable_cls_dict = (
113
- None, None, None, None
114
- )
115
-
116
- # Get evaluate result
117
- res = self.evaluate_program(
118
- str(program),
119
- callable_funcs_dict,
120
- callable_funcs_list,
121
- callable_cls_dict,
122
- callable_cls_list,
123
- **kwargs
124
- )
125
- return res
126
- except Exception as e:
127
- if self.debug_mode:
128
- print(traceback.format_exc())
129
- return None
130
-
131
- def _evaluate_in_safe_process(
132
- self,
133
- program_str: str,
134
- result_queue: multiprocessing.Queue,
135
- redirect_to_devnull: bool,
136
- **kwargs
137
- ):
138
- # Redirect STDOUT and STDERR to '/dev/null'
139
- if redirect_to_devnull:
140
- with open(os.devnull, 'w') as devnull:
141
- os.dup2(devnull.fileno(), sys.stdout.fileno())
142
- os.dup2(devnull.fileno(), sys.stderr.fileno())
143
-
144
- # Evaluate and put the results to the queue
145
- res = self.evaluate(program_str, **kwargs)
146
- result_queue.put(res)
147
-
148
- def secure_evaluate(
149
- self,
150
- program: str | PyProgram,
151
- timeout_seconds: int | float = None,
152
- redirect_to_devnull: bool = False,
153
- multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
154
- **kwargs
155
- ):
156
- """Evaluate program in a new process. This enables timeout restriction and output redirection.
157
- Args:
158
- program: the program to be evaluated.
159
- timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
160
- redirect_to_devnull: redirect any output to '/dev/null'.
161
- multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
162
- the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
163
- If set to 'default', there will be no changes to system default.
164
- **kwargs: additional keyword arguments to pass to 'evaluate_program'.
165
- """
166
- if multiprocessing_start_method == 'auto':
167
- # Force macOS and Linux use 'fork' to generate new process
168
- if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
169
- multiprocessing.set_start_method('fork', force=True)
170
- elif multiprocessing_start_method == 'fork':
171
- multiprocessing.set_start_method('fork', force=True)
172
- elif multiprocessing_start_method == 'spawn':
173
- multiprocessing.set_start_method('spawn', force=True)
174
-
175
- try:
176
- # Start evaluation process
177
- result_queue = multiprocessing.Queue()
178
- process = multiprocessing.Process(
179
- target=self._evaluate_in_safe_process,
180
- args=(str(program), result_queue, redirect_to_devnull),
181
- kwargs=kwargs,
182
- )
183
- process.start()
184
-
185
- if timeout_seconds is not None:
186
- try:
187
- # Get the result in timeout seconds
188
- result = result_queue.get(timeout=timeout_seconds)
189
- # After getting the result, terminate/kill the process
190
- self._kill_process_and_its_children(process)
191
- except Empty: # The queue is empty indicates a timeout
192
- if self.debug_mode:
193
- print(f'DEBUG: the evaluation time exceeds {timeout_seconds}s.')
194
- # Terminate/kill all processes if timeout happens
195
- self._kill_process_and_its_children(process)
196
- result = None
197
- except Exception as e:
198
- if self.debug_mode:
199
- print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
200
- # Terminate/kill all processes if meet exceptions
201
- self._kill_process_and_its_children(process)
202
- result = None
203
- else:
204
- # If there is no timeout limit, wait execution to finish
205
- result = result_queue.get()
206
- # Terminate/kill all processes after evaluation
207
- self._kill_process_and_its_children(process)
208
- return result
209
- except Exception as e:
210
- if self.debug_mode:
211
- print(traceback.format_exc())
212
- return None
File without changes
File without changes
File without changes