py-adtools 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of py-adtools might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -13,6 +13,7 @@ Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: psutil
16
+ Requires-Dist: openai
16
17
  Dynamic: author
17
18
  Dynamic: author-email
18
19
  Dynamic: classifier
@@ -12,6 +12,7 @@ from abc import ABC, abstractmethod
12
12
  from queue import Empty
13
13
  from typing import Any, Literal, Dict, Callable, List
14
14
  import psutil
15
+ import traceback
15
16
 
16
17
  from .py_code import PyProgram
17
18
 
@@ -124,7 +125,7 @@ class PyEvaluator(ABC):
124
125
  return res
125
126
  except Exception as e:
126
127
  if self.debug_mode:
127
- print(e)
128
+ print(traceback.format_exc())
128
129
  return None
129
130
 
130
131
  def _evaluate_in_safe_process(
@@ -195,7 +196,7 @@ class PyEvaluator(ABC):
195
196
  result = None
196
197
  except Exception as e:
197
198
  if self.debug_mode:
198
- print(f'DEBUG: evaluation failed with exception:\n{e}')
199
+ print(f'DEBUG: evaluation failed with exception:\n{traceback.format_exc()}')
199
200
  # Terminate/kill all processes if meet exceptions
200
201
  self._kill_process_and_its_children(process)
201
202
  result = None
@@ -207,5 +208,5 @@ class PyEvaluator(ABC):
207
208
  return result
208
209
  except Exception as e:
209
210
  if self.debug_mode:
210
- print(e)
211
+ print(traceback.format_exc())
211
212
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-adtools
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.
5
5
  Home-page: https://github.com/RayZhhh/py-adtools
6
6
  Author: Rui Zhang
@@ -13,6 +13,7 @@ Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: psutil
16
+ Requires-Dist: openai
16
17
  Dynamic: author
17
18
  Dynamic: author-email
18
19
  Dynamic: classifier
@@ -4,7 +4,6 @@ setup.py
4
4
  adtools/__init__.py
5
5
  adtools/evaluator.py
6
6
  adtools/evaluator_pool.py
7
- adtools/lm_base.py
8
7
  adtools/py_code.py
9
8
  py_adtools.egg-info/PKG-INFO
10
9
  py_adtools.egg-info/SOURCES.txt
@@ -5,7 +5,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
5
5
 
6
6
  setup(
7
7
  name='py-adtools',
8
- version='0.1.4',
8
+ version='0.1.6',
9
9
  author='Rui Zhang',
10
10
  author_email='rzhang.cs@gmail.com',
11
11
  description='Useful tools for parsing and evaluating Python programs for LLM-based algorithm design.',
@@ -20,5 +20,5 @@ setup(
20
20
  'Topic :: Scientific/Engineering',
21
21
  ],
22
22
  python_requires='>=3.10',
23
- install_requires=['psutil'],
23
+ install_requires=['psutil', 'openai'],
24
24
  )
@@ -1,403 +0,0 @@
1
- """
2
- Copyright (c) 2025 Rui Zhang <rzhang.cs@gmail.com>
3
-
4
- NOTICE: This code is under MIT license. This code is intended for academic/research purposes only.
5
- Commercial use of this software or its derivatives requires prior written permission.
6
- """
7
-
8
- from abc import abstractmethod
9
- from typing import Optional, List, Literal, Dict, Any
10
- import os
11
- import subprocess
12
- import sys
13
- from pathlib import Path
14
- import psutil
15
- import requests
16
- import time
17
-
18
- import openai.types.chat
19
-
20
- __all__ = ['LanguageModel', 'OpenAIAPI', 'VLLMServer']
21
-
22
-
23
- class LanguageModel:
24
- """Base class for language model interface."""
25
-
26
- @abstractmethod
27
- def chat_completion(
28
- self,
29
- message: str | List[openai.types.chat.ChatCompletionMessageParam],
30
- max_tokens: int,
31
- timeout_seconds: float,
32
- *args,
33
- **kwargs
34
- ):
35
- """Send a chat completion query with OpenAI format to the vLLM server. Return the response content.
36
- Args:
37
- message: The message in str or openai format.
38
- max_tokens: The maximum number of tokens to generate.
39
- timeout_seconds: The timeout seconds.
40
- """
41
- pass
42
-
43
- def close(self):
44
- """Release resources (if necessary)."""
45
- pass
46
-
47
-
48
- class OpenAIAPI(LanguageModel):
49
- def __init__(
50
- self,
51
- model: str,
52
- base_url: str = None,
53
- api_key: str = None,
54
- **openai_init_kwargs
55
- ):
56
- super().__init__()
57
- # If base_url is set to None, find 'OPENAI_BASE_URL' in environment variables
58
- if base_url is None:
59
- if 'OPENAI_BASE_URL' not in os.environ:
60
- raise RuntimeError('If "base_url" is None, the environment variable OPENAI_BASE_URL must be set.')
61
- else:
62
- base_url = os.environ['OPENAI_BASE_URL']
63
-
64
- # If api_key is set to None, find 'OPENAI_API_KEY' in environment variables
65
- if api_key is None:
66
- if 'OPENAI_API_KEY' not in os.environ:
67
- raise RuntimeError('If "api_key" is None, OPENAI_API_KEY must be set.')
68
- else:
69
- api_key = os.environ['OPENAI_API_KEY']
70
-
71
- self._model = model
72
- self._client = openai.OpenAI(
73
- api_key=api_key,
74
- base_url=base_url,
75
- **openai_init_kwargs
76
- )
77
-
78
- def chat_completion(
79
- self,
80
- message: str | List[openai.types.chat.ChatCompletionMessageParam],
81
- max_tokens: int,
82
- timeout_seconds: float,
83
- *args,
84
- **kwargs
85
- ):
86
- """Send a chat completion query with OpenAI format to the vLLM server. Return the response content.
87
- Args:
88
- message: The message in str or openai format.
89
- max_tokens: The maximum number of tokens to generate.
90
- timeout_seconds: The timeout seconds.
91
- """
92
- if isinstance(message, str):
93
- message = [{'role': 'user', 'content': message.strip()}]
94
-
95
- response = self._client.chat.completions.create(
96
- model=self._model,
97
- messages=message,
98
- stream=False,
99
- max_tokens=max_tokens,
100
- timeout=timeout_seconds,
101
- *args,
102
- **kwargs,
103
- )
104
- return response.choices[0].message.content
105
-
106
-
107
- def _print_cmd_list(cmd_list, gpus, host, port):
108
- print('\n' + '=' * 80)
109
- print(f'[vLLM] Launching vLLM on GPU:{gpus}; URL: https://{host}:{port}')
110
- print('=' * 80)
111
- cmd = cmd_list[0] + ' \\\n'
112
- for c in cmd_list[1:]:
113
- cmd += ' ' + c + ' \\\n'
114
- print(cmd.strip())
115
- print('=' * 80 + '\n', flush=True)
116
-
117
-
118
- class VLLMServer:
119
- def __init__(self,
120
- model_path: str,
121
- port: int,
122
- gpus: int | list[int],
123
- tokenizer_path: Optional[str] = None,
124
- max_model_len: int = 16384,
125
- max_lora_rank: Optional[int] = None,
126
- host: str = '0.0.0.0',
127
- mem_util: float = 0.85,
128
- deploy_timeout_seconds: int = 600,
129
- enforce_eager: bool = False,
130
- vllm_log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO',
131
- silent_mode: bool = False,
132
- env_variable_dict: Optional[Dict[str, str]] = None,
133
- vllm_serve_args: Optional[List[str]] = None,
134
- vllm_serve_kwargs: Optional[Dict[str, str]] = None,
135
- chat_template_kwargs: Optional[Dict[str, Any]] = None):
136
- """Deploy an LLM on specified GPUs.
137
- Args:
138
- model_path: Path to the model to deploy.
139
- tokenizer_path: Path to the tokenizer to use.
140
- port: List of ports to deploy.
141
- gpus: List of GPUs to deploy.
142
- max_lora_rank: Max rank of LoRA adapter. Defaults to `None` which disables LoRA adapter.
143
- host: Host address for vLLM server.
144
- mem_util: Memory utility for each vLLM deployment.
145
- deploy_timeout_seconds: Timeout to deploy (in seconds).
146
- enforce_eager: Enforce eager mode.
147
- vllm_log_level: Log level of vLLM server.
148
- silent_mode: Silent mode.
149
- env_variable_dict: Environment variables to use for vLLM server, e.g., {'KEY': 'VALUE'}.
150
- vllm_serve_args: Arguments to pass to vLLM server, e.g., ['--enable-reasoning'].
151
- vllm_serve_kwargs: Keyword arguments to pass to vLLM server, e.g., {'--reasoning-parser': 'deepseek-r1'}.
152
-
153
- Example:
154
- # deploy a model on GPU 0 and 1
155
- llm = VLLMServer(
156
- model_path='path/to/model',
157
- tokenizer_path='path/to/tokenizer',
158
- gpus=[0, 1], # set gpus=0 or gpus=[0] if you only use one GPU
159
- port=12001,
160
- mem_util=0.8
161
- )
162
- # draw sample using base model
163
- llm.draw_sample('hello')
164
-
165
- # load adapter and draw sample
166
- llm.load_lora_adapter('adapter_1', '/path/to/adapter')
167
- llm.draw_sample('hello', lora_name='adapter_1')
168
-
169
- # unload adapter
170
- llm.unload_lora_adapter('adapter_1')
171
-
172
- # release resources
173
- llm.close()
174
- """
175
- self._model_path = model_path
176
- self._port = port
177
- self._gpus = gpus
178
- self._tokenizer_path = tokenizer_path if tokenizer_path is not None else model_path
179
- self._max_model_len = max_model_len
180
- self._max_lora_rank = max_lora_rank
181
- self._host = host
182
- self._mem_util = mem_util
183
- self._deploy_timeout_seconds = deploy_timeout_seconds
184
- self._enforce_eager = enforce_eager
185
- self._vllm_log_level = vllm_log_level
186
- self._silent_mode = silent_mode
187
- self._env_variable_dict = env_variable_dict
188
- self._vllm_serve_args = vllm_serve_args
189
- self._vllm_serve_kwargs = vllm_serve_kwargs
190
- self._chat_template_kwargs = chat_template_kwargs
191
-
192
- # Deploy vLLMs
193
- self._process = self._launch_vllm()
194
- self._wait_for_vllm()
195
-
196
- def _launch_vllm(self):
197
- """Launch a vLLM server and return the subprocess.
198
- """
199
- if isinstance(self._gpus, int):
200
- gpus = str(self._gpus)
201
- else:
202
- gpus = ','.join([str(g) for g in self._gpus])
203
-
204
- executable_path = sys.executable
205
- cmd = [
206
- executable_path, '-m',
207
- 'vllm.entrypoints.openai.api_server',
208
- '--model', self._model_path,
209
- '--tokenizer', self._tokenizer_path,
210
- '--max_model_len', str(self._max_model_len),
211
- '--host', self._host,
212
- '--port', str(self._port),
213
- '--gpu-memory-utilization', str(self._mem_util),
214
- '--tensor-parallel-size', str(len(self._gpus)) if isinstance(self._gpus, list) else '1',
215
- '--trust-remote-code',
216
- '--chat-template-content-format', 'string',
217
- ]
218
-
219
- if self._enforce_eager:
220
- cmd.append('--enforce_eager')
221
-
222
- # Other args for vllm serve
223
- if self._vllm_serve_args is not None:
224
- for arg in self._vllm_serve_args:
225
- cmd.append(arg)
226
-
227
- # Other kwargs for vllm serve
228
- if self._vllm_serve_kwargs is not None:
229
- for kwarg, value in self._vllm_serve_kwargs.items():
230
- cmd.extend([kwarg, value])
231
-
232
- # Environmental variables
233
- env = os.environ.copy()
234
- env['CUDA_VISIBLE_DEVICES'] = gpus
235
- env['VLLM_LOGGING_LEVEL'] = self._vllm_log_level
236
-
237
- # FIXME: These code are required for my machine :-(
238
- # FIXME: This may due to the bad NCCL environment configuration :-(
239
- if isinstance(self._gpus, list) and len(self._gpus) > 1:
240
- # set NCCL environment variable
241
- env['NCCL_P2P_DISABLE'] = '1'
242
- # disable custom all reduce
243
- cmd.append('--disable-custom-all-reduce')
244
-
245
- # Enable LoRA dynamic loading
246
- if self._max_lora_rank is not None:
247
- cmd.extend([
248
- '--enable-lora',
249
- '--max-lora-rank', str(self._max_lora_rank),
250
- ])
251
- env['VLLM_ALLOW_RUNTIME_LORA_UPDATING'] = 'True'
252
-
253
- # Other env variables
254
- if self._env_variable_dict is not None:
255
- for k, v in self._env_variable_dict.items():
256
- env[k] = v
257
-
258
- _print_cmd_list(cmd, gpus=self._gpus, host=self._host, port=self._port)
259
-
260
- # Launch vllm using subprocess
261
- stdout = Path(os.devnull).open('w') if self._silent_mode else None
262
- proc = subprocess.Popen(cmd, env=env, stdout=stdout, stderr=subprocess.STDOUT)
263
- return proc
264
-
265
- def _kill_vllm_process(self):
266
- try:
267
- # Get child processes before terminating parent
268
- try:
269
- parent = psutil.Process(self._process.pid)
270
- children = parent.children(recursive=True)
271
- except psutil.NoSuchProcess:
272
- children = []
273
-
274
- # Terminate parent process
275
- self._process.terminate()
276
- self._process.wait(timeout=5)
277
- print(f'[vLLM] terminated process: {self._process.pid}')
278
-
279
- # Kill any remaining children
280
- for child in children:
281
- try:
282
- child.terminate()
283
- child.wait(timeout=2)
284
- except (psutil.NoSuchProcess, psutil.TimeoutExpired):
285
- try:
286
- child.kill()
287
- except psutil.NoSuchProcess:
288
- pass
289
- except subprocess.TimeoutExpired:
290
- self._process.kill()
291
- print(f'[vLLM] killed process: {self._process.pid}')
292
-
293
- def _wait_for_vllm(self):
294
- """Check each vLLM server's state and check /health. Kill all vLLM server processes if timeout.
295
- """
296
- for _ in range(self._deploy_timeout_seconds):
297
- # check process status
298
- if self._process.poll() is not None:
299
- sys.exit(f'[vLLM] crashed (exit {self._process.returncode})')
300
-
301
- # check server status
302
- health = f'http://{self._host}:{self._port}/health'
303
- try:
304
- if requests.get(health, timeout=1).status_code == 200:
305
- return
306
- except Exception:
307
- pass
308
- time.sleep(1)
309
-
310
- # Servers fail to initialize
311
- print('[vLLM] failed to start within timeout')
312
- self._kill_vllm_process()
313
- sys.exit('[vLLM] failed to start within timeout')
314
-
315
- def unload_lora_adapter(self, lora_name: str):
316
- """Unload lora adapter given the lora name.
317
- Args:
318
- lora_name: Lora adapter name.
319
- """
320
- lora_api_url = f'http://{self._host}:{self._port}/v1/unload_lora_adapter'
321
- headers = {'Content-Type': 'application/json'}
322
- try:
323
- payload = {'lora_name': lora_name}
324
- requests.post(lora_api_url, json=payload, headers=headers, timeout=10)
325
- except requests.exceptions.RequestException:
326
- pass
327
-
328
- def load_lora_adapter(self, lora_name: str, new_adapter_path: str, num_trails: int = 5):
329
- """Dynamically load a LoRA adapter.
330
- Args:
331
- lora_name: LoRA adapter name.
332
- new_adapter_path: Path to the new LoRA adapter weights.
333
- """
334
- # First unload lora adapter
335
- self.unload_lora_adapter(lora_name)
336
-
337
- if self._max_lora_rank is None:
338
- raise ValueError('LoRA is not enabled for this VLLMServer instance, since "max_lora_rank" is not set.')
339
-
340
- # Prepare the payload for LoRA update
341
- payload = {'lora_name': lora_name, 'lora_path': new_adapter_path}
342
- headers = {'Content-Type': 'application/json'}
343
- lora_api_url = f'http://{self._host}:{self._port}/v1/load_lora_adapter'
344
-
345
- # Repeatedly trying to load lora adapters
346
- for i in range(num_trails):
347
- try:
348
- response = requests.post(lora_api_url, json=payload, headers=headers, timeout=60)
349
- if response.status_code == 200:
350
- print(f'[vLLM] Successfully load LoRA adapter: {lora_name} from {new_adapter_path}')
351
- else:
352
- print(f'[vLLM] Failed to load LoRA adapter. '
353
- f'Status code: {response.status_code}, Response: {response.text}')
354
- return True
355
- except requests.exceptions.RequestException as e:
356
- continue
357
-
358
- print(f'[vLLM] Error loading LoRA adapter: {str(e)}')
359
- return False
360
-
361
- def close(self):
362
- """Shut down vLLM server and kill all vLLM processes."""
363
- self._kill_vllm_process()
364
-
365
- def chat_completion(self,
366
- message: str | List[openai.types.chat.ChatCompletionMessageParam],
367
- max_tokens: Optional[int] = None,
368
- timeout_seconds: Optional[int] = None,
369
- lora_name: Optional[str] = None,
370
- temperature: float = 0.9,
371
- top_p: float = 0.9,
372
- chat_template_kwargs: Optional[Dict[str, Any]] = None) -> str:
373
- """Send a chat completion query with OpenAI format to the vLLM server. Return the response content.
374
- Args:
375
- message: The message in str or openai format.
376
- max_tokens: The maximum number of tokens to generate.
377
- timeout_seconds: The timeout seconds.
378
- lora_name: Lora adapter name. Defaults to None which uses base model.
379
- temperature: The temperature parameter.
380
- top_p: The top p parameter.
381
- chat_template_kwargs: The chat template kwargs, e.g., {'enable_thinking': False}.
382
- """
383
- data = {
384
- 'messages': [
385
- {'role': 'user', 'content': message.strip()} if isinstance(message, str) else message
386
- ],
387
- 'temperature': temperature,
388
- 'top_p': top_p,
389
- 'max_tokens': max_tokens,
390
- }
391
- # Use the specified lora adapter
392
- if lora_name is not None:
393
- data['model'] = lora_name
394
- # Chat template keyword args
395
- if self._chat_template_kwargs is not None:
396
- data['chat_template_kwargs'] = self._chat_template_kwargs
397
- elif chat_template_kwargs is not None:
398
- data['chat_template_kwargs'] = chat_template_kwargs
399
- # Request
400
- url = f'http://{self._host}:{self._port}/v1/chat/completions'
401
- headers = {'Content-Type': 'application/json'}
402
- response = requests.post(url, headers=headers, json=data, timeout=timeout_seconds)
403
- return response.json()['choices'][0]['message']['content']
File without changes
File without changes
File without changes