pegaflow-llm 0.0.2__cp310-cp310-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pegaflow/__init__.py +21 -0
- pegaflow/_server.py +44 -0
- pegaflow/connector/__init__.py +235 -0
- pegaflow/connector/common.py +308 -0
- pegaflow/connector/scheduler.py +225 -0
- pegaflow/connector/worker.py +451 -0
- pegaflow/connector copy.py +941 -0
- pegaflow/ipc_wrapper.py +183 -0
- pegaflow/logging_utils.py +61 -0
- pegaflow/pegaflow-server-py +0 -0
- pegaflow/pegaflow.cpython-310-x86_64-linux-gnu.so +0 -0
- pegaflow_llm-0.0.2.dist-info/METADATA +100 -0
- pegaflow_llm-0.0.2.dist-info/RECORD +15 -0
- pegaflow_llm-0.0.2.dist-info/WHEEL +4 -0
- pegaflow_llm-0.0.2.dist-info/entry_points.txt +2 -0
pegaflow/ipc_wrapper.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""CUDA IPC Wrapper for cross-process GPU memory sharing.
|
|
2
|
+
|
|
3
|
+
This module provides a wrapper class for PyTorch tensors that enables
|
|
4
|
+
cross-process GPU memory sharing via CUDA IPC handles. The wrapper can
|
|
5
|
+
be serialized (via pickle) and sent across process boundaries.
|
|
6
|
+
|
|
7
|
+
This implementation handles CUDA_VISIBLE_DEVICES correctly by using GPU UUIDs
|
|
8
|
+
instead of device indices for device identification.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import threading
|
|
12
|
+
import torch
|
|
13
|
+
from typing import Tuple
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CudaIPCWrapper:
|
|
17
|
+
"""Wrapper for CUDA IPC handle with tensor metadata.
|
|
18
|
+
|
|
19
|
+
This class wraps a PyTorch CUDA tensor and extracts its IPC handle,
|
|
20
|
+
allowing the tensor to be reconstructed in another process. It correctly
|
|
21
|
+
handles CUDA_VISIBLE_DEVICES by using GPU UUIDs for device identification.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
handle: CUDA IPC handle tuple (device, ipc_handle, size, offset, ...)
|
|
25
|
+
dtype: PyTorch dtype of the tensor
|
|
26
|
+
shape: Shape tuple of the tensor
|
|
27
|
+
device_uuid: UUID string of the GPU device
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
# Process 1 (sender)
|
|
31
|
+
tensor = torch.randn(10, device='cuda:0')
|
|
32
|
+
wrapper = CudaIPCWrapper(tensor)
|
|
33
|
+
serialized = pickle.dumps(wrapper)
|
|
34
|
+
# ... send serialized bytes to another process ...
|
|
35
|
+
|
|
36
|
+
# Process 2 (receiver)
|
|
37
|
+
wrapper = pickle.loads(serialized)
|
|
38
|
+
tensor = wrapper.to_tensor() # Reconstruct tensor
|
|
39
|
+
ptr = tensor.data_ptr() # Get GPU pointer
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Class-level cache for device UUID to index mapping
|
|
43
|
+
_discovered_device_mapping: dict[str, int] = {}
|
|
44
|
+
_device_mapping_lock = threading.Lock()
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _get_device_uuid(device_index: int) -> str:
|
|
48
|
+
"""Get the UUID of a GPU device given its index.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
device_index: CUDA device index (relative to CUDA_VISIBLE_DEVICES)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
UUID string of the GPU device
|
|
55
|
+
"""
|
|
56
|
+
return str(torch.cuda.get_device_properties(device_index).uuid)
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def _discover_gpu_devices():
|
|
60
|
+
"""Discover all available GPU devices and map their UUIDs to
|
|
61
|
+
the physical device ordinals (relative to CUDA_VISIBLE_DEVICES).
|
|
62
|
+
"""
|
|
63
|
+
if not torch.cuda.is_available():
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
num_devices = torch.cuda.device_count()
|
|
67
|
+
with CudaIPCWrapper._device_mapping_lock:
|
|
68
|
+
if CudaIPCWrapper._discovered_device_mapping:
|
|
69
|
+
return # Already discovered
|
|
70
|
+
|
|
71
|
+
for i in range(num_devices):
|
|
72
|
+
device_uuid = CudaIPCWrapper._get_device_uuid(i)
|
|
73
|
+
CudaIPCWrapper._discovered_device_mapping[device_uuid] = i
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _get_device_index_from_uuid(device_uuid: str) -> int:
|
|
77
|
+
"""Get the physical device ordinal from its UUID.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
device_uuid: UUID string of the GPU device
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Device index relative to CUDA_VISIBLE_DEVICES
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
RuntimeError: If the device UUID is not found
|
|
87
|
+
"""
|
|
88
|
+
CudaIPCWrapper._discover_gpu_devices()
|
|
89
|
+
|
|
90
|
+
with CudaIPCWrapper._device_mapping_lock:
|
|
91
|
+
device_index = CudaIPCWrapper._discovered_device_mapping.get(
|
|
92
|
+
device_uuid, None
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if device_index is None:
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
f"Device UUID {device_uuid} not found in the discovered devices. "
|
|
98
|
+
"Please make sure the process can see all the GPU devices."
|
|
99
|
+
)
|
|
100
|
+
return device_index
|
|
101
|
+
|
|
102
|
+
def __init__(self, tensor: torch.Tensor):
|
|
103
|
+
"""Create IPC wrapper from a CUDA tensor.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
tensor: PyTorch CUDA tensor to wrap. Must be contiguous and
|
|
107
|
+
have zero storage offset.
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
AssertionError: If tensor is not contiguous or has non-zero offset.
|
|
111
|
+
"""
|
|
112
|
+
assert tensor.storage_offset() == 0, "Tensor must have zero storage offset"
|
|
113
|
+
assert tensor.is_contiguous(), "Tensor must be contiguous"
|
|
114
|
+
|
|
115
|
+
# Get the underlying storage and create IPC handle
|
|
116
|
+
storage = tensor.untyped_storage()
|
|
117
|
+
handle = storage._share_cuda_()
|
|
118
|
+
|
|
119
|
+
# Store metadata needed to reconstruct the tensor
|
|
120
|
+
self.handle = handle
|
|
121
|
+
self.dtype = tensor.dtype
|
|
122
|
+
self.shape = tensor.shape
|
|
123
|
+
|
|
124
|
+
# Store device UUID instead of device index to handle CUDA_VISIBLE_DEVICES
|
|
125
|
+
device_index = tensor.device.index
|
|
126
|
+
self.device_uuid = CudaIPCWrapper._get_device_uuid(device_index)
|
|
127
|
+
|
|
128
|
+
def to_tensor(self) -> torch.Tensor:
|
|
129
|
+
"""Reconstruct tensor from IPC handle.
|
|
130
|
+
|
|
131
|
+
This method creates a new tensor in the current process that shares
|
|
132
|
+
the same GPU memory as the original tensor (via CUDA IPC).
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
PyTorch tensor that shares GPU memory with the original tensor.
|
|
136
|
+
|
|
137
|
+
Note:
|
|
138
|
+
The reconstructed tensor shares memory with the original. Any
|
|
139
|
+
modifications to one will be visible in the other.
|
|
140
|
+
|
|
141
|
+
This function may break if torch.cuda is not initialized.
|
|
142
|
+
Call torch.cuda.init() before using this function if needed.
|
|
143
|
+
"""
|
|
144
|
+
# Get the correct device index in the current process based on UUID
|
|
145
|
+
device = CudaIPCWrapper._get_device_index_from_uuid(self.device_uuid)
|
|
146
|
+
|
|
147
|
+
# Reconstruct storage from IPC handle
|
|
148
|
+
storage = torch.UntypedStorage._new_shared_cuda(device, *self.handle[1:])
|
|
149
|
+
|
|
150
|
+
# Create empty tensor on the correct device
|
|
151
|
+
t = torch.tensor([], device=device, dtype=self.dtype)
|
|
152
|
+
|
|
153
|
+
# Set the tensor to use the shared storage
|
|
154
|
+
t.set_(storage)
|
|
155
|
+
|
|
156
|
+
# Reshape to original shape
|
|
157
|
+
return t.view(self.shape)
|
|
158
|
+
|
|
159
|
+
def __eq__(self, other) -> bool:
|
|
160
|
+
"""Check equality with another CudaIPCWrapper.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
other: Object to compare with
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
True if the wrappers refer to the same tensor, False otherwise
|
|
167
|
+
"""
|
|
168
|
+
if not isinstance(other, CudaIPCWrapper):
|
|
169
|
+
return False
|
|
170
|
+
return (
|
|
171
|
+
self.handle == other.handle
|
|
172
|
+
and self.dtype == other.dtype
|
|
173
|
+
and self.shape == other.shape
|
|
174
|
+
and self.device_uuid == other.device_uuid
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def __repr__(self) -> str:
|
|
178
|
+
return (f"CudaIPCWrapper(shape={self.shape}, dtype={self.dtype}, "
|
|
179
|
+
f"device_uuid={self.device_uuid})")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
__all__ = ["CudaIPCWrapper"]
|
|
183
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Logging utilities for PegaFlow connector.
|
|
2
|
+
|
|
3
|
+
This module provides timing decorators and logger configuration.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import functools
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
# Environment variable to control timing logging
|
|
12
|
+
ENABLE_TIMING = os.environ.get("PEGAFLOW_ENABLE_TIMING", "1") == "1"
|
|
13
|
+
|
|
14
|
+
# Module logger
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
logger.setLevel(logging.INFO)
|
|
17
|
+
|
|
18
|
+
if not logger.hasHandlers():
|
|
19
|
+
_handler = logging.StreamHandler()
|
|
20
|
+
_handler.setLevel(logging.NOTSET)
|
|
21
|
+
_handler.setFormatter(logging.Formatter("%(message)s"))
|
|
22
|
+
logger.addHandler(_handler)
|
|
23
|
+
logger.propagate = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_connector_logger() -> logging.Logger:
|
|
27
|
+
"""Get a logger for the connector module."""
|
|
28
|
+
connector_logger = logging.getLogger("pegaflow.connector")
|
|
29
|
+
connector_logger.setLevel(logging.INFO)
|
|
30
|
+
if not connector_logger.hasHandlers():
|
|
31
|
+
handler = logging.StreamHandler()
|
|
32
|
+
handler.setLevel(logging.NOTSET)
|
|
33
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
34
|
+
connector_logger.addHandler(handler)
|
|
35
|
+
connector_logger.propagate = False
|
|
36
|
+
return connector_logger
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def timing_wrapper(func):
|
|
40
|
+
"""Decorator to log function name and execution time when enabled.
|
|
41
|
+
|
|
42
|
+
Enable by setting environment variable: PEGAFLOW_ENABLE_TIMING=1
|
|
43
|
+
"""
|
|
44
|
+
@functools.wraps(func)
|
|
45
|
+
def wrapper(*args, **kwargs):
|
|
46
|
+
if not ENABLE_TIMING:
|
|
47
|
+
return func(*args, **kwargs)
|
|
48
|
+
|
|
49
|
+
start = time.perf_counter()
|
|
50
|
+
result = func(*args, **kwargs)
|
|
51
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
52
|
+
logger.debug(
|
|
53
|
+
"[PegaKVConnector] %s took %.2f ms",
|
|
54
|
+
func.__name__,
|
|
55
|
+
elapsed_ms,
|
|
56
|
+
)
|
|
57
|
+
return result
|
|
58
|
+
return wrapper
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
__all__ = ["ENABLE_TIMING", "timing_wrapper", "get_connector_logger"]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pegaflow-llm
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Classifier: Development Status :: 3 - Alpha
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: Programming Language :: Rust
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
12
|
+
Summary: High-performance key-value storage engine with Python bindings
|
|
13
|
+
Keywords: storage,kv-store,rust,vllm,inference
|
|
14
|
+
Author: PegaFlow Contributors
|
|
15
|
+
License: MIT
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
18
|
+
|
|
19
|
+
# PegaFlow Python Package
|
|
20
|
+
|
|
21
|
+
High-performance key-value storage engine with Python bindings, built with Rust and PyO3.
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
- **PegaEngine**: Fast Rust-based key-value storage with Python bindings
|
|
26
|
+
- **PegaKVConnector**: vLLM KV connector for distributed inference with KV cache transfer
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
### From Source
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Install maturin if you haven't already
|
|
34
|
+
pip install maturin
|
|
35
|
+
|
|
36
|
+
# Build and install in development mode
|
|
37
|
+
cd python
|
|
38
|
+
maturin develop
|
|
39
|
+
|
|
40
|
+
# Or build a wheel
|
|
41
|
+
maturin build --release
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### From PyPI (coming soon)
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install pegaflow
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
### Basic KV Storage
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from pegaflow import PegaEngine
|
|
56
|
+
|
|
57
|
+
# Create a new engine
|
|
58
|
+
engine = PegaEngine()
|
|
59
|
+
|
|
60
|
+
# Store key-value pairs
|
|
61
|
+
engine.put("name", "PegaFlow")
|
|
62
|
+
engine.put("version", "0.1.0")
|
|
63
|
+
|
|
64
|
+
# Retrieve values
|
|
65
|
+
name = engine.get("name") # Returns "PegaFlow"
|
|
66
|
+
missing = engine.get("nonexistent") # Returns None
|
|
67
|
+
|
|
68
|
+
# Remove keys
|
|
69
|
+
removed = engine.remove("name") # Returns "PegaFlow"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### vLLM KV Connector
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from vllm import LLM
|
|
76
|
+
from vllm.distributed.kv_transfer.kv_transfer_agent import KVTransferConfig
|
|
77
|
+
|
|
78
|
+
# Configure vLLM to use PegaKVConnector
|
|
79
|
+
kv_transfer_config = KVTransferConfig(
|
|
80
|
+
kv_connector="PegaKVConnector",
|
|
81
|
+
kv_role="kv_both",
|
|
82
|
+
kv_connector_module_path="pegaflow.connector",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Create LLM with KV transfer enabled
|
|
86
|
+
llm = LLM(
|
|
87
|
+
model="gpt2",
|
|
88
|
+
kv_transfer_config=kv_transfer_config,
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Development
|
|
93
|
+
|
|
94
|
+
See the [examples](../examples/) directory for more usage examples.
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
|
99
|
+
|
|
100
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pegaflow/__init__.py,sha256=F6E-sxO1kxm1WyJZsY4Eba3muE9Z3IYxG1RxWbGFlyU,741
|
|
2
|
+
pegaflow/_server.py,sha256=CL0HSfBAP9rx5Rb4oh77FwFhxfcwQr9pSrEX4IlPBYg,1224
|
|
3
|
+
pegaflow/connector copy.py,sha256=ELvaMvMTOfA4g48IZ8rjmKzwQRN3g5tUPOH99_fGA4w,35994
|
|
4
|
+
pegaflow/connector/__init__.py,sha256=6kr5KwOR5GpU06BMGdw0RSwTvHk4EtASsp3DZ8rR88U,7315
|
|
5
|
+
pegaflow/connector/common.py,sha256=2Qj3-H4-XNd8o3mORtYONp7N3OjTuO8t1rUYSGc-trA,9769
|
|
6
|
+
pegaflow/connector/scheduler.py,sha256=7JQ6bB2C9Titd6oFTsv3PCLuqPIc4ZctB3fAnFFjxVU,7490
|
|
7
|
+
pegaflow/connector/worker.py,sha256=EWJfx5UmFT-RSYUL1-AV1dPQTFiGF2YvGpEXfVvA7jU,15782
|
|
8
|
+
pegaflow/ipc_wrapper.py,sha256=r5OHvTLBpyyBgr_1lyt5yPCBDS6D6aeqNvOY5Kjm0Q4,6600
|
|
9
|
+
pegaflow/logging_utils.py,sha256=D3el9wRKjLQhfmK6rJcr0laUy4ey0CZEuRrLDrQxdh4,1798
|
|
10
|
+
pegaflow/pegaflow-server-py,sha256=VWfPVGfy5cOarvWgR9kPkQJxl249MUtj3vf2_sQPNFw,9505568
|
|
11
|
+
pegaflow/pegaflow.cpython-310-x86_64-linux-gnu.so,sha256=Qp_jaEa7bUsHa9uthOnREgTzbDb35_QuImqdib7T5xU,5556608
|
|
12
|
+
pegaflow_llm-0.0.2.dist-info/METADATA,sha256=EdFrIDevJdZuuASVdhZTP59Ch1JVymVpYRZWj3oZGDI,2232
|
|
13
|
+
pegaflow_llm-0.0.2.dist-info/WHEEL,sha256=hwQ1X0enL4h--Y0T24JPMOc_YcS3ZMkjluo8hhmik0c,109
|
|
14
|
+
pegaflow_llm-0.0.2.dist-info/entry_points.txt,sha256=ayii4bkhIRmNW0s6u0ys3ybZ27zGhrKVnUUWSY04Zi0,56
|
|
15
|
+
pegaflow_llm-0.0.2.dist-info/RECORD,,
|