py-adtools 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of py-adtools might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -1,3 +1,3 @@
1
1
  from .py_code import PyCodeBlock, PyFunction, PyClass, PyProgram
2
- from .evaluator import PyEvaluator
2
+ from .evaluator import PyEvaluator, PyEvaluatorForBigReturnedObject
3
3
  from .evaluator_pool import EvaluatorExecutorPool
@@ -0,0 +1,578 @@
1
+ """
2
+ Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
3
+
4
+ NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
5
+ Commercial use of this software or its derivatives requires prior written permission.
6
+ """
7
+
8
+ import multiprocessing
9
+ import os
10
+ import pickle
11
+ import sys
12
+ import time
13
+ from abc import ABC, abstractmethod
14
+ from multiprocessing import shared_memory
15
+ from queue import Empty
16
+ from typing import Any, Literal, Dict, Callable, List, Tuple
17
+ import multiprocessing.managers
18
+ import psutil
19
+ import traceback
20
+
21
+ from .py_code import PyProgram
22
+
23
+ __all__ = ['PyEvaluator', 'PyEvaluatorForBigReturnedObject']
24
+
25
+
26
+ def _set_mp_start_method(multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn']):
27
+ if multiprocessing_start_method == 'auto':
28
+ # Force macOS and Linux use 'fork' to generate new process
29
+ if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
30
+ multiprocessing.set_start_method('fork', force=True)
31
+ elif multiprocessing_start_method == 'fork':
32
+ multiprocessing.set_start_method('fork', force=True)
33
+ elif multiprocessing_start_method == 'spawn':
34
+ multiprocessing.set_start_method('spawn', force=True)
35
+
36
+
37
+ class PyEvaluator(ABC):
38
+
39
+ def __init__(
40
+ self,
41
+ exec_code: bool = True,
42
+ find_and_kill_children_evaluation_process: bool = False,
43
+ debug_mode: bool = False,
44
+ *,
45
+ join_timeout_seconds: int = 10
46
+ ):
47
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
48
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
49
+ Args:
50
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
51
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
52
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
53
+ in 'self.evaluate_program()' will no longer be affective.
54
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
55
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
56
+ does not start new processes.
57
+ debug_mode: Debug mode.
58
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
59
+ """
60
+ self.debug_mode = debug_mode
61
+ self.exec_code = exec_code
62
+ self.find_and_kill_children_evaluation_process = find_and_kill_children_evaluation_process
63
+ self.join_timeout_seconds = join_timeout_seconds
64
+
65
+ @abstractmethod
66
+ def evaluate_program(
67
+ self,
68
+ program_str: str,
69
+ callable_functions_dict: Dict[str, Callable] | None,
70
+ callable_functions_list: List[Callable] | None,
71
+ callable_classes_dict: Dict[str, Callable] | None,
72
+ callable_classes_list: List[Callable] | None,
73
+ **kwargs
74
+ ) -> Any:
75
+ """Evaluate a given program.
76
+ Args:
77
+ program_str: The raw program text.
78
+ callable_functions_dict: A dict maps function name to callable function.
79
+ callable_functions_list: A list of callable functions.
80
+ callable_classes_dict: A dict maps class name to callable class.
81
+ callable_classes_list: A list of callable classes.
82
+ Returns:
83
+ Returns the evaluation result.
84
+ """
85
+ raise NotImplementedError(
86
+ 'Must provide an evaluator for a python program. '
87
+ 'Override this method in a subclass.'
88
+ )
89
+
90
+ def _kill_process_and_its_children(self, process: multiprocessing.Process):
91
+ if self.find_and_kill_children_evaluation_process:
92
+ # Find all children processes
93
+ try:
94
+ parent = psutil.Process(process.pid)
95
+ children_processes = parent.children(recursive=True)
96
+ except psutil.NoSuchProcess:
97
+ children_processes = []
98
+ else:
99
+ children_processes = []
100
+ # Terminate parent process
101
+ process.terminate()
102
+ process.join(timeout=self.join_timeout_seconds)
103
+ if process.is_alive():
104
+ process.kill()
105
+ process.join()
106
+ # Kill all children processes
107
+ for child in children_processes:
108
+ if self.debug_mode:
109
+ print(f"Killing process {process.pid}'s children process {child.pid}")
110
+ child.terminate()
111
+
112
+ def evaluate(self, program: str | PyProgram, **kwargs):
113
+ """Evaluate a program.
114
+ Args:
115
+ program: the program to be evaluated.
116
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
117
+ """
118
+ # Parse to program instance
119
+ if isinstance(program, str):
120
+ program = PyProgram.from_text(program)
121
+ function_names = [f.name for f in program.functions]
122
+ class_names = [c.name for c in program.classes]
123
+
124
+ # Execute the code and get callable instances
125
+ if self.exec_code:
126
+ all_globals_namespace = {}
127
+ # Execute the program, map func/var/class to global namespace
128
+ exec(str(program), all_globals_namespace)
129
+ # Get callable functions
130
+ callable_funcs_list = [all_globals_namespace[f_name] for f_name in function_names]
131
+ callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
132
+ # Get callable classes
133
+ callable_cls_list = [all_globals_namespace[c_name] for c_name in class_names]
134
+ callable_cls_dict = dict(zip(class_names, callable_cls_list))
135
+ else:
136
+ callable_funcs_list, callable_funcs_dict, callable_cls_list, callable_cls_dict = (
137
+ None, None, None, None
138
+ )
139
+
140
+ # Get evaluate result
141
+ res = self.evaluate_program(
142
+ str(program),
143
+ callable_funcs_dict,
144
+ callable_funcs_list,
145
+ callable_cls_dict,
146
+ callable_cls_list,
147
+ **kwargs
148
+ )
149
+ return res
150
+
151
+ def _evaluate_in_safe_process(
152
+ self,
153
+ program_str: str,
154
+ result_queue: multiprocessing.Queue,
155
+ redirect_to_devnull: bool,
156
+ **kwargs
157
+ ):
158
+ # Redirect STDOUT and STDERR to '/dev/null'
159
+ if redirect_to_devnull:
160
+ with open(os.devnull, 'w') as devnull:
161
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
162
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
163
+ try:
164
+ # Evaluate and put the results to the queue
165
+ res = self.evaluate(program_str, **kwargs)
166
+ result_queue.put(res)
167
+ except:
168
+ traceback.print_exc()
169
+ result_queue.put(None)
170
+
171
+ def secure_evaluate(
172
+ self,
173
+ program: str | PyProgram,
174
+ timeout_seconds: int | float = None,
175
+ redirect_to_devnull: bool = False,
176
+ multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
177
+ get_evaluate_time=False,
178
+ **kwargs
179
+ ) -> Any | Tuple[Any, float]:
180
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
181
+ Args:
182
+ program: the program to be evaluated.
183
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
184
+ redirect_to_devnull: redirect any output to '/dev/null'.
185
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
186
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
187
+ If set to 'default', there will be no changes to system default.
188
+ get_evaluate_time: get evaluation time for this program.
189
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
190
+ Returns:
191
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
192
+ the return value will be (Results, Time).
193
+ """
194
+ _set_mp_start_method(multiprocessing_start_method)
195
+
196
+ try:
197
+ # Start evaluation process
198
+ result_queue = multiprocessing.Queue()
199
+ process = multiprocessing.Process(
200
+ target=self._evaluate_in_safe_process,
201
+ args=(str(program), result_queue, redirect_to_devnull),
202
+ kwargs=kwargs,
203
+ )
204
+ evaluate_start_time = time.time()
205
+ process.start()
206
+
207
+ if timeout_seconds is not None:
208
+ try:
209
+ # Get the result in timeout seconds
210
+ result = result_queue.get(timeout=timeout_seconds)
211
+ # Calculate the evaluate time
212
+ eval_time = time.time() - evaluate_start_time
213
+ # After getting the result, terminate and kill the process
214
+ self._kill_process_and_its_children(process)
215
+ except Empty: # The queue is empty indicates a timeout
216
+ # Calculate the evaluate time
217
+ eval_time = time.time() - evaluate_start_time
218
+ if self.debug_mode:
219
+ print(f'DEBUG: the evaluation time exceeds {timeout_seconds}s.')
220
+ # Terminate and kill all processes if timeout happens
221
+ self._kill_process_and_its_children(process)
222
+ result = None
223
+ except Exception as e:
224
+ # Calculate the evaluate time
225
+ eval_time = time.time() - evaluate_start_time
226
+ if self.debug_mode:
227
+ print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
228
+ # Terminate and kill all processes if meet exceptions
229
+ self._kill_process_and_its_children(process)
230
+ result = None
231
+ else:
232
+ # If there is no timeout limit, wait execution to finish
233
+ result = result_queue.get()
234
+ # Calculate the evaluate time
235
+ eval_time = time.time() - evaluate_start_time
236
+ # Terminate and kill all processes after evaluation
237
+ self._kill_process_and_its_children(process)
238
+
239
+ return (result, eval_time) if get_evaluate_time else result
240
+ except Exception as e:
241
+ if self.debug_mode:
242
+ traceback.print_exc()
243
+ return None
244
+
245
+
246
+ class PyEvaluatorForBigReturnedObject(PyEvaluator):
247
+ def __init__(
248
+ self,
249
+ exec_code: bool = True,
250
+ find_and_kill_children_evaluation_process: bool = False,
251
+ debug_mode: bool = False,
252
+ *,
253
+ join_timeout_seconds: int = 10
254
+ ):
255
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
256
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
257
+
258
+ **Note:** This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
259
+
260
+ Args:
261
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
262
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
263
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
264
+ in 'self.evaluate_program()' will no longer be affective.
265
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
266
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
267
+ does not start new processes.
268
+ debug_mode: Debug mode.
269
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
270
+ """
271
+ super().__init__(
272
+ exec_code,
273
+ find_and_kill_children_evaluation_process,
274
+ debug_mode,
275
+ join_timeout_seconds=join_timeout_seconds
276
+ )
277
+
278
+ @abstractmethod
279
+ def evaluate_program(
280
+ self,
281
+ program_str: str,
282
+ callable_functions_dict: Dict[str, Callable] | None,
283
+ callable_functions_list: List[Callable] | None,
284
+ callable_classes_dict: Dict[str, Callable] | None,
285
+ callable_classes_list: List[Callable] | None,
286
+ **kwargs
287
+ ) -> Any:
288
+ raise NotImplementedError(
289
+ 'Must provide an evaluator for a python program. '
290
+ 'Override this method in a subclass.'
291
+ )
292
+
293
+ def _evaluate_and_put_res_in_manager_dict(
294
+ self,
295
+ program_str: str,
296
+ result_dict: multiprocessing.managers.DictProxy,
297
+ signal_queue: multiprocessing.Queue,
298
+ redirect_to_devnull: bool,
299
+ **kwargs
300
+ ):
301
+ """Evaluate and store result in Manager().dict() (for large results)."""
302
+ if redirect_to_devnull:
303
+ with open(os.devnull, 'w') as devnull:
304
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
305
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
306
+ try:
307
+ # Evaluate and get results
308
+ res = self.evaluate(program_str, **kwargs)
309
+ # Write results into dict
310
+ result_dict['result'] = res
311
+ # Put a signal to queue to inform the parent process the evaluation has done
312
+ signal_queue.put(('ok', None))
313
+ except Exception as e:
314
+ if self.debug_mode:
315
+ traceback.print_exc()
316
+ # Write results into dict
317
+ result_dict['result'] = None
318
+ # Put a signal to queue to inform the parent process the evaluation has terminated
319
+ signal_queue.put(('error', str(e)))
320
+
321
+ def secure_evaluate(
322
+ self,
323
+ program: str | PyProgram,
324
+ timeout_seconds: int | float = None,
325
+ redirect_to_devnull: bool = False,
326
+ multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
327
+ get_evaluate_time: bool = False,
328
+ **kwargs
329
+ ):
330
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
331
+ Args:
332
+ program: the program to be evaluated.
333
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
334
+ redirect_to_devnull: redirect any output to '/dev/null'.
335
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
336
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
337
+ If set to 'default', there will be no changes to system default.
338
+ get_evaluate_time: get evaluation time for this program.
339
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
340
+ Returns:
341
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
342
+ the return value will be (Results, Time).
343
+ """
344
+ _set_mp_start_method(multiprocessing_start_method)
345
+
346
+ with multiprocessing.Manager() as manager:
347
+ # Path a dictionary to the evaluation process to get maybe very big return objects
348
+ result_dict = manager.dict()
349
+ # Pass a queue to the evaluation process to get signals whether the evaluation terminates
350
+ signal_queue = multiprocessing.Queue()
351
+ # Start evaluation process
352
+ process = multiprocessing.Process(
353
+ target=self._evaluate_and_put_res_in_manager_dict,
354
+ args=(str(program), result_dict, signal_queue, redirect_to_devnull),
355
+ kwargs=kwargs,
356
+ )
357
+ evaluate_start_time = time.time()
358
+ process.start()
359
+
360
+ try:
361
+ if timeout_seconds is not None:
362
+ try:
363
+ # If there is timeout restriction, we try to get results before timeout
364
+ signal = signal_queue.get(timeout=timeout_seconds)
365
+ except Empty:
366
+ # Evaluation timeout happens, we return 'None' as well as the actual evaluate time
367
+ eval_time = time.time() - evaluate_start_time
368
+ if self.debug_mode:
369
+ print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
370
+ # Terminate and kill all processes after evaluation
371
+ self._kill_process_and_its_children(process)
372
+ return (None, eval_time) if get_evaluate_time else None
373
+ else:
374
+ # If there is no timeout restriction, we wait until the evaluation terminates
375
+ signal = signal_queue.get()
376
+
377
+ # Calculate evaluation time and kill children processes
378
+ eval_time = time.time() - evaluate_start_time
379
+ # Terminate and kill all processes after evaluation
380
+ self._kill_process_and_its_children(process)
381
+
382
+ # The first element is 'ok' indicates that the evaluation terminate without exceptions
383
+ if signal[0] == 'ok':
384
+ # We get the evaluation results from 'manager.dict'
385
+ result = result_dict.get('result', None)
386
+ else:
387
+ # The evaluation failed for some reason, so we set the result to 'None'
388
+ if self.debug_mode:
389
+ print(f'DEBUG: child process error: {signal[1]}')
390
+ result = None
391
+ except:
392
+ # If there is any exception during above procedure, we set the result to None
393
+ eval_time = time.time() - evaluate_start_time
394
+ if self.debug_mode:
395
+ print(f'DEBUG: exception in manager evaluate:\n{traceback.format_exc()}')
396
+ # Terminate and kill all processes after evaluation
397
+ self._kill_process_and_its_children(process)
398
+ result = None
399
+
400
+ return (result, eval_time) if get_evaluate_time else result
401
+
402
+
403
+ class PyEvaluatorForBigReturnedObjectV2(PyEvaluator):
404
+
405
+ def __init__(
406
+ self,
407
+ exec_code: bool = True,
408
+ find_and_kill_children_evaluation_process: bool = False,
409
+ debug_mode: bool = False,
410
+ *,
411
+ join_timeout_seconds: int = 10
412
+ ):
413
+ """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
414
+ 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
415
+ Note: This class supports the secure_evaluate to handle very big return object, e.g., Tensors.
416
+
417
+ Args:
418
+ exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
419
+ which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
420
+ evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
421
+ in 'self.evaluate_program()' will no longer be affective.
422
+ find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
423
+ when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
424
+ does not start new processes.
425
+ debug_mode: Debug mode.
426
+ join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
427
+ """
428
+ super().__init__(
429
+ exec_code,
430
+ find_and_kill_children_evaluation_process,
431
+ debug_mode,
432
+ join_timeout_seconds=join_timeout_seconds
433
+ )
434
+
435
+ @abstractmethod
436
+ def evaluate_program(
437
+ self,
438
+ program_str: str,
439
+ callable_functions_dict: Dict[str, Callable] | None,
440
+ callable_functions_list: List[Callable] | None,
441
+ callable_classes_dict: Dict[str, Callable] | None,
442
+ callable_classes_list: List[Callable] | None,
443
+ **kwargs
444
+ ) -> Any:
445
+ """Evaluate a given program.
446
+ Args:
447
+ program_str: The raw program text.
448
+ callable_functions_dict: A dict maps function name to callable function.
449
+ callable_functions_list: A list of callable functions.
450
+ callable_classes_dict: A dict maps class name to callable class.
451
+ callable_classes_list: A list of callable classes.
452
+ Returns:
453
+ Returns the evaluation result.
454
+ """
455
+ raise NotImplementedError(
456
+ 'Must provide an evaluator for a python program. '
457
+ 'Override this method in a subclass.'
458
+ )
459
+
460
+ def _evaluate_and_put_res_in_shared_memory(
461
+ self,
462
+ program_str: str,
463
+ meta_queue: multiprocessing.Queue,
464
+ redirect_to_devnull: bool,
465
+ **kwargs
466
+ ):
467
+ """Evaluate and store result in shared memory (for large results)."""
468
+ # Redirect STDOUT and STDERR to '/dev/null'
469
+ if redirect_to_devnull:
470
+ with open(os.devnull, 'w') as devnull:
471
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
472
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
473
+
474
+ # Evaluate and get results
475
+ res = self.evaluate(program_str, **kwargs)
476
+
477
+ try:
478
+ # Dump the results to data
479
+ data = pickle.dumps(res, protocol=pickle.HIGHEST_PROTOCOL)
480
+ # Create shared memory with the size of data
481
+ shm = shared_memory.SharedMemory(create=True, size=len(data))
482
+ # Write data
483
+ shm.buf[:len(data)] = data
484
+ # Send back shm metadata (shared_mem_name, shared_mem_size) and put them into the queue
485
+ meta_queue.put((shm.name, len(data)))
486
+ # Child closes its handle
487
+ shm.close()
488
+ except Exception as data_pickle_error:
489
+ # Put the exception message to the queue
490
+ meta_queue.put((None, str(data_pickle_error)))
491
+
492
+ def secure_evaluate(
493
+ self,
494
+ program: str | PyProgram,
495
+ timeout_seconds: int | float = None,
496
+ redirect_to_devnull: bool = False,
497
+ multiprocessing_start_method: str = 'auto',
498
+ get_evaluate_time: bool = False,
499
+ **kwargs
500
+ ):
501
+ """Evaluate program in a new process. This enables timeout restriction and output redirection.
502
+ Args:
503
+ program: the program to be evaluated.
504
+ timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
505
+ redirect_to_devnull: redirect any output to '/dev/null'.
506
+ multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
507
+ the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
508
+ If set to 'default', there will be no changes to system default.
509
+ get_evaluate_time: get evaluation time for this program.
510
+ **kwargs: additional keyword arguments to pass to 'evaluate_program'.
511
+ Returns:
512
+ Returns the evaluation results. If the 'get_evaluate_time' is True,
513
+ the return value will be (Results, Time).
514
+ """
515
+ if multiprocessing_start_method == 'auto':
516
+ if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
517
+ multiprocessing.set_start_method('fork', force=True)
518
+ elif multiprocessing_start_method == 'fork':
519
+ multiprocessing.set_start_method('fork', force=True)
520
+ elif multiprocessing_start_method == 'spawn':
521
+ multiprocessing.set_start_method('spawn', force=True)
522
+
523
+ meta_queue = multiprocessing.Queue()
524
+
525
+ process = multiprocessing.Process(
526
+ target=self._evaluate_and_put_res_in_shared_memory,
527
+ args=(str(program), meta_queue, redirect_to_devnull),
528
+ kwargs=kwargs,
529
+ )
530
+
531
+ evaluate_start_time = time.time()
532
+ process.start()
533
+
534
+ try:
535
+ if timeout_seconds is not None:
536
+ try:
537
+ # Try to get the metadata before timeout
538
+ meta = meta_queue.get(timeout=timeout_seconds)
539
+ except Empty:
540
+ # Evaluate timeout
541
+ eval_time = time.time() - evaluate_start_time
542
+ if self.debug_mode:
543
+ print(f'DEBUG: evaluation time exceeds {timeout_seconds}s.')
544
+ self._kill_process_and_its_children(process)
545
+ return (None, eval_time) if get_evaluate_time else None
546
+ else:
547
+ meta = meta_queue.get()
548
+
549
+ # Calculate evaluation time
550
+ eval_time = time.time() - evaluate_start_time
551
+ self._kill_process_and_its_children(process)
552
+
553
+ # If the first element in the queue is None,
554
+ # it means that the shared memory raises exceptions
555
+ if meta[0] is None:
556
+ if self.debug_mode:
557
+ print(f'DEBUG: shared memory failed with exception: {meta[1]}')
558
+ result = None
559
+ else:
560
+ # Read results from metadata
561
+ shm_name, size = meta
562
+ shm = shared_memory.SharedMemory(name=shm_name)
563
+ buf = bytes(shm.buf[:size])
564
+ # Load results from buffer
565
+ result = pickle.loads(buf)
566
+ shm.close()
567
+ try:
568
+ shm.unlink()
569
+ except FileNotFoundError:
570
+ pass
571
+ except Exception:
572
+ eval_time = time.time() - evaluate_start_time
573
+ if self.debug_mode:
574
+ print(f'DEBUG: exception in shared evaluate:\n{traceback.format_exc()}')
575
+ self._kill_process_and_its_children(process)
576
+ result = None
577
+
578
+ return (result, eval_time) if get_evaluate_time else result
@@ -6,7 +6,7 @@ Commercial use of this software or its derivatives requires prior written permis
6
6
  """
7
7
 
8
8
  import os
9
- from typing import List
9
+ from typing import List, Optional
10
10
 
11
11
  import openai.types.chat
12
12
 
@@ -46,8 +46,8 @@ class OpenAIAPI(LanguageModel):
46
46
  def chat_completion(
47
47
  self,
48
48
  message: str | List[openai.types.chat.ChatCompletionMessageParam],
49
- max_tokens: int,
50
- timeout_seconds: float,
49
+ max_tokens: Optional[int] = None,
50
+ timeout_seconds: Optional[float] = None,
51
51
  *args,
52
52
  **kwargs
53
53
  ):
@@ -75,6 +75,7 @@ class VLLMServer(LanguageModel):
75
75
  env_variable_dict: Environment variables to use for vLLM server, e.g., {'KEY': 'VALUE'}.
76
76
  vllm_serve_args: Arguments to pass to vLLM server, e.g., ['--enable-reasoning'].
77
77
  vllm_serve_kwargs: Keyword arguments to pass to vLLM server, e.g., {'--reasoning-parser': 'deepseek-r1'}.
78
+ chat_template_kwargs: Keyword arguments to pass to chat template, e.g., {'enable_thinking': False}.
78
79
 
79
80
  Example:
80
81
  # deploy a model on GPU 0 and 1
@@ -163,11 +163,14 @@ class PyProgram:
163
163
  return program
164
164
 
165
165
  @classmethod
166
- def from_text(cls, text: str) -> 'PyProgram':
167
- tree = ast.parse(text)
168
- visitor = _ProgramVisitor(text)
169
- visitor.visit(tree)
170
- return visitor.return_program()
166
+ def from_text(cls, text: str) -> Optional['PyProgram']:
167
+ try:
168
+ tree = ast.parse(text)
169
+ visitor = _ProgramVisitor(text)
170
+ visitor.visit(tree)
171
+ return visitor.return_program()
172
+ except:
173
+ return None
171
174
 
172
175
 
173
176
  class _ProgramVisitor(ast.NodeVisitor):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -5,7 +5,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
5
5
 
6
6
  setup(
7
7
  name='py-adtools',
8
- version='0.1.8',
8
+ version='0.1.10',
9
9
  author='Rui Zhang',
10
10
  author_email='rzhang.cs@gmail.com',
11
11
  description='Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.',
@@ -1,236 +0,0 @@
1
- """
2
- Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
3
-
4
- NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
5
- Commercial use of this software or its derivatives requires prior written permission.
6
- """
7
-
8
- import multiprocessing
9
- import os
10
- import sys
11
- import time
12
- from abc import ABC, abstractmethod
13
- from queue import Empty
14
- from typing import Any, Literal, Dict, Callable, List, Tuple
15
- import psutil
16
- import traceback
17
-
18
- from .py_code import PyProgram
19
-
20
-
21
- class PyEvaluator(ABC):
22
-
23
- def __init__(
24
- self,
25
- exec_code: bool = True,
26
- find_and_kill_children_evaluation_process: bool = False,
27
- debug_mode: bool = False,
28
- *,
29
- join_timeout_seconds: int = 10
30
- ):
31
- """Evaluator interface for evaluating the Python algorithm program. Override this class and implement
32
- 'evaluate_program' method, then invoke 'self.evaluate()' or 'self.secure_evaluate()' for evaluation.
33
- Args:
34
- exec_code: Using 'exec()' to execute the program code and obtain the callable functions and classes,
35
- which will be passed to 'self.evaluate_program()'. Set this parameter to 'False' if you are going to
36
- evaluate a Python scripy. Note that if the parameter is set to 'False', the arguments 'callable_...'
37
- in 'self.evaluate_program()' will no longer be affective.
38
- find_and_kill_children_evaluation_process: If using 'self.secure_evaluate', kill children processes
39
- when they are terminated. Note that it is suggested to set to 'False' if the evaluation process
40
- does not start new processes.
41
- debug_mode: Debug mode.
42
- join_timeout_seconds: Timeout in seconds to wait for the process to finish. Kill the process if timeout.
43
- """
44
- self.debug_mode = debug_mode
45
- self.exec_code = exec_code
46
- self.find_and_kill_children_evaluation_process = find_and_kill_children_evaluation_process
47
- self.join_timeout_seconds = join_timeout_seconds
48
-
49
- @abstractmethod
50
- def evaluate_program(
51
- self,
52
- program_str: str,
53
- callable_functions_dict: Dict[str, Callable] | None,
54
- callable_functions_list: List[Callable] | None,
55
- callable_classes_dict: Dict[str, Callable] | None,
56
- callable_classes_list: List[Callable] | None,
57
- **kwargs
58
- ) -> Any:
59
- """Evaluate a given program.
60
- Args:
61
- program_str: The raw program text.
62
- callable_functions_dict: A dict maps function name to callable function.
63
- callable_functions_list: A list of callable functions.
64
- callable_classes_dict: A dict maps class name to callable class.
65
- callable_classes_list: A list of callable classes.
66
- Returns:
67
- Returns the evaluation result.
68
- """
69
- raise NotImplementedError(
70
- 'Must provide an evaluator for a python program. '
71
- 'Override this method in a subclass.'
72
- )
73
-
74
- def _kill_process_and_its_children(self, process: multiprocessing.Process):
75
- if self.find_and_kill_children_evaluation_process:
76
- # Find all children processes
77
- try:
78
- parent = psutil.Process(process.pid)
79
- children_processes = parent.children(recursive=True)
80
- except psutil.NoSuchProcess:
81
- children_processes = []
82
- else:
83
- children_processes = []
84
- # Terminate parent process
85
- process.terminate()
86
- process.join(timeout=self.join_timeout_seconds)
87
- if process.is_alive():
88
- process.kill()
89
- process.join()
90
- # Kill all children processes
91
- for child in children_processes:
92
- if self.debug_mode:
93
- print(f"Killing process {process.pid}'s children process {child.pid}")
94
- child.terminate()
95
-
96
- def evaluate(self, program: str | PyProgram, **kwargs):
97
- """Evaluate a program.
98
- Args:
99
- program: the program to be evaluated.
100
- **kwargs: additional keyword arguments to pass to 'evaluate_program'.
101
- """
102
- try:
103
- # Parse to program instance
104
- if isinstance(program, str):
105
- program = PyProgram.from_text(program)
106
- function_names = [f.name for f in program.functions]
107
- class_names = [c.name for c in program.classes]
108
-
109
- # Execute the code and get callable instances
110
- if self.exec_code:
111
- all_globals_namespace = {}
112
- # Execute the program, map func/var/class to global namespace
113
- exec(str(program), all_globals_namespace)
114
- # Get callable functions
115
- callable_funcs_list = [all_globals_namespace[f_name] for f_name in function_names]
116
- callable_funcs_dict = dict(zip(function_names, callable_funcs_list))
117
- # Get callable classes
118
- callable_cls_list = [all_globals_namespace[c_name] for c_name in class_names]
119
- callable_cls_dict = dict(zip(class_names, callable_cls_list))
120
- else:
121
- callable_funcs_list, callable_funcs_dict, callable_cls_list, callable_cls_dict = (
122
- None, None, None, None
123
- )
124
-
125
- # Get evaluate result
126
- res = self.evaluate_program(
127
- str(program),
128
- callable_funcs_dict,
129
- callable_funcs_list,
130
- callable_cls_dict,
131
- callable_cls_list,
132
- **kwargs
133
- )
134
- return res
135
- except Exception as e:
136
- if self.debug_mode:
137
- print(traceback.format_exc())
138
- return None
139
-
140
- def _evaluate_in_safe_process(
141
- self,
142
- program_str: str,
143
- result_queue: multiprocessing.Queue,
144
- redirect_to_devnull: bool,
145
- **kwargs
146
- ):
147
- # Redirect STDOUT and STDERR to '/dev/null'
148
- if redirect_to_devnull:
149
- with open(os.devnull, 'w') as devnull:
150
- os.dup2(devnull.fileno(), sys.stdout.fileno())
151
- os.dup2(devnull.fileno(), sys.stderr.fileno())
152
-
153
- # Evaluate and put the results to the queue
154
- res = self.evaluate(program_str, **kwargs)
155
- result_queue.put(res)
156
-
157
- def secure_evaluate(
158
- self,
159
- program: str | PyProgram,
160
- timeout_seconds: int | float = None,
161
- redirect_to_devnull: bool = False,
162
- multiprocessing_start_method: Literal['default', 'auto', 'fork', 'spawn'] = 'auto',
163
- get_evaluate_time=False,
164
- **kwargs
165
- ) -> Any | Tuple[Any, float]:
166
- """Evaluate program in a new process. This enables timeout restriction and output redirection.
167
- Args:
168
- program: the program to be evaluated.
169
- timeout_seconds: return 'None' if the execution time exceeds 'timeout_seconds'.
170
- redirect_to_devnull: redirect any output to '/dev/null'.
171
- multiprocessing_start_method: start a process using 'fork' or 'spawn'. If set to 'auto',
172
- the process will be started using 'fork' with Linux/macOS and 'spawn' with Windows.
173
- If set to 'default', there will be no changes to system default.
174
- get_evaluate_time: get evaluation time for this program.
175
- **kwargs: additional keyword arguments to pass to 'evaluate_program'.
176
- Returns:
177
- Returns the evaluation results. If the 'get_evaluate_time' is True,
178
- the return value will be (Results, Time).
179
- """
180
- if multiprocessing_start_method == 'auto':
181
- # Force macOS and Linux use 'fork' to generate new process
182
- if sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
183
- multiprocessing.set_start_method('fork', force=True)
184
- elif multiprocessing_start_method == 'fork':
185
- multiprocessing.set_start_method('fork', force=True)
186
- elif multiprocessing_start_method == 'spawn':
187
- multiprocessing.set_start_method('spawn', force=True)
188
-
189
- try:
190
- # Start evaluation process
191
- result_queue = multiprocessing.Queue()
192
- process = multiprocessing.Process(
193
- target=self._evaluate_in_safe_process,
194
- args=(str(program), result_queue, redirect_to_devnull),
195
- kwargs=kwargs,
196
- )
197
- evaluate_start_time = time.time()
198
- process.start()
199
-
200
- if timeout_seconds is not None:
201
- try:
202
- # Get the result in timeout seconds
203
- result = result_queue.get(timeout=timeout_seconds)
204
- # Calculate the evaluate time
205
- eval_time = time.time() - evaluate_start_time
206
- # After getting the result, terminate/kill the process
207
- self._kill_process_and_its_children(process)
208
- except Empty: # The queue is empty indicates a timeout
209
- # Calculate the evaluate time
210
- eval_time = time.time() - evaluate_start_time
211
- if self.debug_mode:
212
- print(f'DEBUG: the evaluation time exceeds {timeout_seconds}s.')
213
- # Terminate/kill all processes if timeout happens
214
- self._kill_process_and_its_children(process)
215
- result = None
216
- except Exception as e:
217
- # Calculate the evaluate time
218
- eval_time = time.time() - evaluate_start_time
219
- if self.debug_mode:
220
- print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
221
- # Terminate/kill all processes if meet exceptions
222
- self._kill_process_and_its_children(process)
223
- result = None
224
- else:
225
- # If there is no timeout limit, wait execution to finish
226
- result = result_queue.get()
227
- # Calculate the evaluate time
228
- eval_time = time.time() - evaluate_start_time
229
- # Terminate/kill all processes after evaluation
230
- self._kill_process_and_its_children(process)
231
-
232
- return (result, eval_time) if get_evaluate_time else result
233
- except Exception as e:
234
- if self.debug_mode:
235
- print(traceback.format_exc())
236
- return None
File without changes
File without changes
File without changes