pegaflow-llm 0.0.2__cp310-cp310-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ """CUDA IPC Wrapper for cross-process GPU memory sharing.
2
+
3
+ This module provides a wrapper class for PyTorch tensors that enables
4
+ cross-process GPU memory sharing via CUDA IPC handles. The wrapper can
5
+ be serialized (via pickle) and sent across process boundaries.
6
+
7
+ This implementation handles CUDA_VISIBLE_DEVICES correctly by using GPU UUIDs
8
+ instead of device indices for device identification.
9
+ """
10
+
11
+ import threading
12
+ import torch
13
+ from typing import Tuple
14
+
15
+
16
+ class CudaIPCWrapper:
17
+ """Wrapper for CUDA IPC handle with tensor metadata.
18
+
19
+ This class wraps a PyTorch CUDA tensor and extracts its IPC handle,
20
+ allowing the tensor to be reconstructed in another process. It correctly
21
+ handles CUDA_VISIBLE_DEVICES by using GPU UUIDs for device identification.
22
+
23
+ Attributes:
24
+ handle: CUDA IPC handle tuple (device, ipc_handle, size, offset, ...)
25
+ dtype: PyTorch dtype of the tensor
26
+ shape: Shape tuple of the tensor
27
+ device_uuid: UUID string of the GPU device
28
+
29
+ Example:
30
+ # Process 1 (sender)
31
+ tensor = torch.randn(10, device='cuda:0')
32
+ wrapper = CudaIPCWrapper(tensor)
33
+ serialized = pickle.dumps(wrapper)
34
+ # ... send serialized bytes to another process ...
35
+
36
+ # Process 2 (receiver)
37
+ wrapper = pickle.loads(serialized)
38
+ tensor = wrapper.to_tensor() # Reconstruct tensor
39
+ ptr = tensor.data_ptr() # Get GPU pointer
40
+ """
41
+
42
+ # Class-level cache for device UUID to index mapping
43
+ _discovered_device_mapping: dict[str, int] = {}
44
+ _device_mapping_lock = threading.Lock()
45
+
46
+ @staticmethod
47
+ def _get_device_uuid(device_index: int) -> str:
48
+ """Get the UUID of a GPU device given its index.
49
+
50
+ Args:
51
+ device_index: CUDA device index (relative to CUDA_VISIBLE_DEVICES)
52
+
53
+ Returns:
54
+ UUID string of the GPU device
55
+ """
56
+ return str(torch.cuda.get_device_properties(device_index).uuid)
57
+
58
+ @staticmethod
59
+ def _discover_gpu_devices():
60
+ """Discover all available GPU devices and map their UUIDs to
61
+ the physical device ordinals (relative to CUDA_VISIBLE_DEVICES).
62
+ """
63
+ if not torch.cuda.is_available():
64
+ return
65
+
66
+ num_devices = torch.cuda.device_count()
67
+ with CudaIPCWrapper._device_mapping_lock:
68
+ if CudaIPCWrapper._discovered_device_mapping:
69
+ return # Already discovered
70
+
71
+ for i in range(num_devices):
72
+ device_uuid = CudaIPCWrapper._get_device_uuid(i)
73
+ CudaIPCWrapper._discovered_device_mapping[device_uuid] = i
74
+
75
+ @staticmethod
76
+ def _get_device_index_from_uuid(device_uuid: str) -> int:
77
+ """Get the physical device ordinal from its UUID.
78
+
79
+ Args:
80
+ device_uuid: UUID string of the GPU device
81
+
82
+ Returns:
83
+ Device index relative to CUDA_VISIBLE_DEVICES
84
+
85
+ Raises:
86
+ RuntimeError: If the device UUID is not found
87
+ """
88
+ CudaIPCWrapper._discover_gpu_devices()
89
+
90
+ with CudaIPCWrapper._device_mapping_lock:
91
+ device_index = CudaIPCWrapper._discovered_device_mapping.get(
92
+ device_uuid, None
93
+ )
94
+
95
+ if device_index is None:
96
+ raise RuntimeError(
97
+ f"Device UUID {device_uuid} not found in the discovered devices. "
98
+ "Please make sure the process can see all the GPU devices."
99
+ )
100
+ return device_index
101
+
102
+ def __init__(self, tensor: torch.Tensor):
103
+ """Create IPC wrapper from a CUDA tensor.
104
+
105
+ Args:
106
+ tensor: PyTorch CUDA tensor to wrap. Must be contiguous and
107
+ have zero storage offset.
108
+
109
+ Raises:
110
+ AssertionError: If tensor is not contiguous or has non-zero offset.
111
+ """
112
+ assert tensor.storage_offset() == 0, "Tensor must have zero storage offset"
113
+ assert tensor.is_contiguous(), "Tensor must be contiguous"
114
+
115
+ # Get the underlying storage and create IPC handle
116
+ storage = tensor.untyped_storage()
117
+ handle = storage._share_cuda_()
118
+
119
+ # Store metadata needed to reconstruct the tensor
120
+ self.handle = handle
121
+ self.dtype = tensor.dtype
122
+ self.shape = tensor.shape
123
+
124
+ # Store device UUID instead of device index to handle CUDA_VISIBLE_DEVICES
125
+ device_index = tensor.device.index
126
+ self.device_uuid = CudaIPCWrapper._get_device_uuid(device_index)
127
+
128
+ def to_tensor(self) -> torch.Tensor:
129
+ """Reconstruct tensor from IPC handle.
130
+
131
+ This method creates a new tensor in the current process that shares
132
+ the same GPU memory as the original tensor (via CUDA IPC).
133
+
134
+ Returns:
135
+ PyTorch tensor that shares GPU memory with the original tensor.
136
+
137
+ Note:
138
+ The reconstructed tensor shares memory with the original. Any
139
+ modifications to one will be visible in the other.
140
+
141
+ This function may break if torch.cuda is not initialized.
142
+ Call torch.cuda.init() before using this function if needed.
143
+ """
144
+ # Get the correct device index in the current process based on UUID
145
+ device = CudaIPCWrapper._get_device_index_from_uuid(self.device_uuid)
146
+
147
+ # Reconstruct storage from IPC handle
148
+ storage = torch.UntypedStorage._new_shared_cuda(device, *self.handle[1:])
149
+
150
+ # Create empty tensor on the correct device
151
+ t = torch.tensor([], device=device, dtype=self.dtype)
152
+
153
+ # Set the tensor to use the shared storage
154
+ t.set_(storage)
155
+
156
+ # Reshape to original shape
157
+ return t.view(self.shape)
158
+
159
+ def __eq__(self, other) -> bool:
160
+ """Check equality with another CudaIPCWrapper.
161
+
162
+ Args:
163
+ other: Object to compare with
164
+
165
+ Returns:
166
+ True if the wrappers refer to the same tensor, False otherwise
167
+ """
168
+ if not isinstance(other, CudaIPCWrapper):
169
+ return False
170
+ return (
171
+ self.handle == other.handle
172
+ and self.dtype == other.dtype
173
+ and self.shape == other.shape
174
+ and self.device_uuid == other.device_uuid
175
+ )
176
+
177
+ def __repr__(self) -> str:
178
+ return (f"CudaIPCWrapper(shape={self.shape}, dtype={self.dtype}, "
179
+ f"device_uuid={self.device_uuid})")
180
+
181
+
182
+ __all__ = ["CudaIPCWrapper"]
183
+
@@ -0,0 +1,61 @@
1
+ """Logging utilities for PegaFlow connector.
2
+
3
+ This module provides timing decorators and logger configuration.
4
+ """
5
+
6
+ import functools
7
+ import logging
8
+ import os
9
+ import time
10
+
11
+ # Environment variable to control timing logging
12
+ ENABLE_TIMING = os.environ.get("PEGAFLOW_ENABLE_TIMING", "1") == "1"
13
+
14
+ # Module logger
15
+ logger = logging.getLogger(__name__)
16
+ logger.setLevel(logging.INFO)
17
+
18
+ if not logger.hasHandlers():
19
+ _handler = logging.StreamHandler()
20
+ _handler.setLevel(logging.NOTSET)
21
+ _handler.setFormatter(logging.Formatter("%(message)s"))
22
+ logger.addHandler(_handler)
23
+ logger.propagate = False
24
+
25
+
26
+ def get_connector_logger() -> logging.Logger:
27
+ """Get a logger for the connector module."""
28
+ connector_logger = logging.getLogger("pegaflow.connector")
29
+ connector_logger.setLevel(logging.INFO)
30
+ if not connector_logger.hasHandlers():
31
+ handler = logging.StreamHandler()
32
+ handler.setLevel(logging.NOTSET)
33
+ handler.setFormatter(logging.Formatter("%(message)s"))
34
+ connector_logger.addHandler(handler)
35
+ connector_logger.propagate = False
36
+ return connector_logger
37
+
38
+
39
+ def timing_wrapper(func):
40
+ """Decorator to log function name and execution time when enabled.
41
+
42
+ Enable by setting environment variable: PEGAFLOW_ENABLE_TIMING=1
43
+ """
44
+ @functools.wraps(func)
45
+ def wrapper(*args, **kwargs):
46
+ if not ENABLE_TIMING:
47
+ return func(*args, **kwargs)
48
+
49
+ start = time.perf_counter()
50
+ result = func(*args, **kwargs)
51
+ elapsed_ms = (time.perf_counter() - start) * 1000
52
+ logger.debug(
53
+ "[PegaKVConnector] %s took %.2f ms",
54
+ func.__name__,
55
+ elapsed_ms,
56
+ )
57
+ return result
58
+ return wrapper
59
+
60
+
61
+ __all__ = ["ENABLE_TIMING", "timing_wrapper", "get_connector_logger"]
Binary file
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: pegaflow-llm
3
+ Version: 0.0.2
4
+ Classifier: Development Status :: 3 - Alpha
5
+ Classifier: Intended Audience :: Developers
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.10
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: Implementation :: CPython
12
+ Summary: High-performance key-value storage engine with Python bindings
13
+ Keywords: storage,kv-store,rust,vllm,inference
14
+ Author: PegaFlow Contributors
15
+ License: MIT
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
18
+
19
+ # PegaFlow Python Package
20
+
21
+ High-performance key-value storage engine with Python bindings, built with Rust and PyO3.
22
+
23
+ ## Features
24
+
25
+ - **PegaEngine**: Fast Rust-based key-value storage with Python bindings
26
+ - **PegaKVConnector**: vLLM KV connector for distributed inference with KV cache transfer
27
+
28
+ ## Installation
29
+
30
+ ### From Source
31
+
32
+ ```bash
33
+ # Install maturin if you haven't already
34
+ pip install maturin
35
+
36
+ # Build and install in development mode
37
+ cd python
38
+ maturin develop
39
+
40
+ # Or build a wheel
41
+ maturin build --release
42
+ ```
43
+
44
+ ### From PyPI (coming soon)
45
+
46
+ ```bash
47
+ pip install pegaflow
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### Basic KV Storage
53
+
54
+ ```python
55
+ from pegaflow import PegaEngine
56
+
57
+ # Create a new engine
58
+ engine = PegaEngine()
59
+
60
+ # Store key-value pairs
61
+ engine.put("name", "PegaFlow")
62
+ engine.put("version", "0.1.0")
63
+
64
+ # Retrieve values
65
+ name = engine.get("name") # Returns "PegaFlow"
66
+ missing = engine.get("nonexistent") # Returns None
67
+
68
+ # Remove keys
69
+ removed = engine.remove("name") # Returns "PegaFlow"
70
+ ```
71
+
72
+ ### vLLM KV Connector
73
+
74
+ ```python
75
+ from vllm import LLM
76
+ from vllm.distributed.kv_transfer.kv_transfer_agent import KVTransferConfig
77
+
78
+ # Configure vLLM to use PegaKVConnector
79
+ kv_transfer_config = KVTransferConfig(
80
+ kv_connector="PegaKVConnector",
81
+ kv_role="kv_both",
82
+ kv_connector_module_path="pegaflow.connector",
83
+ )
84
+
85
+ # Create LLM with KV transfer enabled
86
+ llm = LLM(
87
+ model="gpt2",
88
+ kv_transfer_config=kv_transfer_config,
89
+ )
90
+ ```
91
+
92
+ ## Development
93
+
94
+ See the [examples](../examples/) directory for more usage examples.
95
+
96
+ ## License
97
+
98
+ MIT
99
+
100
+
@@ -0,0 +1,15 @@
1
+ pegaflow/__init__.py,sha256=F6E-sxO1kxm1WyJZsY4Eba3muE9Z3IYxG1RxWbGFlyU,741
2
+ pegaflow/_server.py,sha256=CL0HSfBAP9rx5Rb4oh77FwFhxfcwQr9pSrEX4IlPBYg,1224
3
+ pegaflow/connector copy.py,sha256=ELvaMvMTOfA4g48IZ8rjmKzwQRN3g5tUPOH99_fGA4w,35994
4
+ pegaflow/connector/__init__.py,sha256=6kr5KwOR5GpU06BMGdw0RSwTvHk4EtASsp3DZ8rR88U,7315
5
+ pegaflow/connector/common.py,sha256=2Qj3-H4-XNd8o3mORtYONp7N3OjTuO8t1rUYSGc-trA,9769
6
+ pegaflow/connector/scheduler.py,sha256=7JQ6bB2C9Titd6oFTsv3PCLuqPIc4ZctB3fAnFFjxVU,7490
7
+ pegaflow/connector/worker.py,sha256=EWJfx5UmFT-RSYUL1-AV1dPQTFiGF2YvGpEXfVvA7jU,15782
8
+ pegaflow/ipc_wrapper.py,sha256=r5OHvTLBpyyBgr_1lyt5yPCBDS6D6aeqNvOY5Kjm0Q4,6600
9
+ pegaflow/logging_utils.py,sha256=D3el9wRKjLQhfmK6rJcr0laUy4ey0CZEuRrLDrQxdh4,1798
10
+ pegaflow/pegaflow-server-py,sha256=VWfPVGfy5cOarvWgR9kPkQJxl249MUtj3vf2_sQPNFw,9505568
11
+ pegaflow/pegaflow.cpython-310-x86_64-linux-gnu.so,sha256=Qp_jaEa7bUsHa9uthOnREgTzbDb35_QuImqdib7T5xU,5556608
12
+ pegaflow_llm-0.0.2.dist-info/METADATA,sha256=EdFrIDevJdZuuASVdhZTP59Ch1JVymVpYRZWj3oZGDI,2232
13
+ pegaflow_llm-0.0.2.dist-info/WHEEL,sha256=hwQ1X0enL4h--Y0T24JPMOc_YcS3ZMkjluo8hhmik0c,109
14
+ pegaflow_llm-0.0.2.dist-info/entry_points.txt,sha256=ayii4bkhIRmNW0s6u0ys3ybZ27zGhrKVnUUWSY04Zi0,56
15
+ pegaflow_llm-0.0.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.10.2)
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-cp310-manylinux_2_34_x86_64
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pegaflow-server=pegaflow._server:main