PyPI - TransferQueue - Versions diffs - 0.1.1.dev0__py3-none-any.whl - Mend

TransferQueue 0.1.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

recipe/simple_use_case/async_demo.py +331 -0
recipe/simple_use_case/sync_demo.py +220 -0
tests/test_async_simple_storage_manager.py +339 -0
tests/test_client.py +423 -0
tests/test_controller.py +274 -0
tests/test_controller_data_partitions.py +513 -0
tests/test_kv_storage_manager.py +92 -0
tests/test_put.py +327 -0
tests/test_samplers.py +492 -0
tests/test_serial_utils_on_cpu.py +202 -0
tests/test_simple_storage_unit.py +443 -0
tests/test_storage_client_factory.py +45 -0
transfer_queue/__init__.py +48 -0
transfer_queue/client.py +611 -0
transfer_queue/controller.py +1187 -0
transfer_queue/metadata.py +460 -0
transfer_queue/sampler/__init__.py +19 -0
transfer_queue/sampler/base.py +74 -0
transfer_queue/sampler/grpo_group_n_sampler.py +157 -0
transfer_queue/sampler/sequential_sampler.py +75 -0
transfer_queue/storage/__init__.py +25 -0
transfer_queue/storage/clients/__init__.py +24 -0
transfer_queue/storage/clients/base.py +22 -0
transfer_queue/storage/clients/factory.py +55 -0
transfer_queue/storage/clients/yuanrong_client.py +118 -0
transfer_queue/storage/managers/__init__.py +23 -0
transfer_queue/storage/managers/base.py +460 -0
transfer_queue/storage/managers/factory.py +43 -0
transfer_queue/storage/managers/simple_backend_manager.py +611 -0
transfer_queue/storage/managers/yuanrong_manager.py +18 -0
transfer_queue/storage/simple_backend.py +451 -0
transfer_queue/utils/__init__.py +13 -0
transfer_queue/utils/serial_utils.py +240 -0
transfer_queue/utils/utils.py +132 -0
transfer_queue/utils/zmq_utils.py +170 -0
transfer_queue/version/version +1 -0
transferqueue-0.1.1.dev0.dist-info/METADATA +327 -0
transferqueue-0.1.1.dev0.dist-info/RECORD +41 -0
transferqueue-0.1.1.dev0.dist-info/WHEEL +5 -0
transferqueue-0.1.1.dev0.dist-info/licenses/LICENSE +202 -0
transferqueue-0.1.1.dev0.dist-info/top_level.txt +4 -0

transfer_queue/utils/serial_utils.py ADDED Viewed

@@ -0,0 +1,240 @@
+# Copyright 2025 The TransferQueue Team
+# Copyright 2025 The vLLM project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This implementation is inspired by https://github.com/vllm-project/vllm/blob/main/vllm/v1/serial_utils.py
+import os
+import pickle
+from collections.abc import Sequence
+from inspect import isclass
+from types import FunctionType
+from typing import Any, Optional, TypeAlias
+import cloudpickle
+import torch
+import zmq
+from msgspec import msgpack
+from tensordict import NonTensorData, TensorDict
+TQ_MSGPACK_ZERO_COPY_THRESHOLD = int(os.environ.get("TQ_MSGPACK_ZERO_COPY_THRESHOLD", 256))
+CUSTOM_TYPE_PICKLE = 1
+CUSTOM_TYPE_CLOUDPICKLE = 2
+CUSTOM_TYPE_RAW_VIEW = 3
+bytestr: TypeAlias = bytes | bytearray | memoryview | zmq.Frame
+tensorenc = tuple[str, tuple[int, ...], int | memoryview]
+class MsgpackEncoder:
+    """Encoder with custom torch tensor and numpy array serialization.
+    Note that unlike vanilla `msgspec` Encoders, this interface is generally
+    not thread-safe when encoding tensors / numpy arrays.
+    By default, arrays below 256B are serialized inline Larger will get sent
+    via dedicated messages. Note that this is a per-tensor limit.
+    """
+    def __init__(self, size_threshold: Optional[int] = None):
+        if size_threshold is None:
+            size_threshold = TQ_MSGPACK_ZERO_COPY_THRESHOLD
+        self.encoder = msgpack.Encoder(enc_hook=self.enc_hook)
+        # This is used as a local stash of buffers that we can then access from
+        # our custom `msgspec` hook, `enc_hook`. We don't have a way to
+        # pass custom data to the hook otherwise.
+        self.aux_buffers: Optional[list[bytestr]] = None
+        self.size_threshold = size_threshold
+    def encode(self, obj: Any) -> Sequence[bytestr]:
+        try:
+            self.aux_buffers = bufs = [b""]
+            bufs[0] = self.encoder.encode(obj)
+            # This `bufs` list allows us to collect direct pointers to backing
+            # buffers of tensors and np arrays, and return them along with the
+            # top-level encoded buffer instead of copying their data into the
+            # new buffer.
+            return bufs
+        finally:
+            self.aux_buffers = None
+    def encode_into(self, obj: Any, buf: bytearray) -> Sequence[bytestr]:
+        try:
+            self.aux_buffers = [buf]
+            bufs = self.aux_buffers
+            self.encoder.encode_into(obj, buf)
+            return bufs
+        finally:
+            self.aux_buffers = None
+    def enc_hook(self, obj: Any) -> Any:
+        if isinstance(obj, TensorDict):
+            return self._encode_tensordict(obj)
+        if isinstance(obj, torch.Tensor):
+            return self._encode_tensor(obj)
+        if isinstance(obj, FunctionType):
+            # `pickle` is generally faster than cloudpickle, but can have
+            # problems serializing methods.
+            return msgpack.Ext(CUSTOM_TYPE_CLOUDPICKLE, cloudpickle.dumps(obj))
+        return msgpack.Ext(CUSTOM_TYPE_PICKLE, pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL))
+    def _encode_tensordict(self, obj: TensorDict) -> tuple[tuple[int, ...], Optional[str], dict[str, tuple[str, Any]]]:
+        assert self.aux_buffers is not None
+        encoded_items: dict[str, tuple[str, Any]] = {}
+        for k, v in obj.items():
+            if isinstance(v, torch.Tensor):
+                encoded_items[k] = ("tensor", self._encode_tensor(v))
+            # elif isinstance(v, NonTensorStack):
+            #     encoded_items[k] = ("non_tensor_stack", self._encode_non_tensor_stack(v))
+            elif isinstance(v, NonTensorData):
+                encoded_items[k] = ("non_tensor_data", self._encode_non_tensor_data(v))
+            else:
+                data = len(self.aux_buffers)
+                self.aux_buffers.append(pickle.dumps(v, protocol=pickle.HIGHEST_PROTOCOL))
+                encoded_items[k] = ("other", data)
+        batch_size = tuple(obj.batch_size)
+        device = str(obj.device) if obj.device is not None else None
+        return batch_size, device, encoded_items
+    def _encode_tensor(self, obj: torch.Tensor) -> tuple[str, list[tensorenc]] | tensorenc:
+        if not obj.is_nested:
+            return self._encode_single_tensor(obj)
+        else:
+            layout = str(obj.layout).removeprefix("torch.")
+            data = [self._encode_single_tensor(tensor) for tensor in obj.unbind()]
+            return layout, data
+    def _encode_single_tensor(self, obj: torch.Tensor) -> tensorenc:
+        assert self.aux_buffers is not None
+        # view the tensor as a contiguous 1D array of bytes
+        arr = obj.flatten().contiguous().view(torch.uint8).numpy()
+        if obj.nbytes < self.size_threshold:
+            # Smaller tensors are encoded inline, just like ndarrays.
+            data = msgpack.Ext(CUSTOM_TYPE_RAW_VIEW, arr.data)
+        else:
+            # Otherwise encode index of backing buffer to avoid copy.
+            data = len(self.aux_buffers)
+            self.aux_buffers.append(arr.data)
+        dtype = str(obj.dtype).removeprefix("torch.")
+        return dtype, obj.shape, data
+    def _encode_non_tensor_data(self, obj: NonTensorData) -> tuple[tuple[int, ...], Optional[str], int]:
+        assert self.aux_buffers is not None
+        batch_size = tuple(obj.batch_size)
+        device = str(obj.device) if obj.device is not None else None
+        data = len(self.aux_buffers)
+        self.aux_buffers.append(pickle.dumps(obj.data, protocol=pickle.HIGHEST_PROTOCOL))
+        return batch_size, device, data
+class MsgpackDecoder:
+    """Decoder with custom torch tensor and numpy array serialization.
+    Note that unlike vanilla `msgspec` Decoders, this interface is generally
+    not thread-safe when encoding tensors / numpy arrays.
+    """
+    def __init__(self, t: Optional[Any] = None):
+        args = () if t is None else (t,)
+        self.decoder = msgpack.Decoder(*args, ext_hook=self.ext_hook, dec_hook=self.dec_hook)
+        self.aux_buffers: Sequence[bytestr] = ()
+    def decode(self, bufs: bytestr | Sequence[bytestr]) -> Any:
+        if isinstance(bufs, bytestr):
+            return self.decoder.decode(bufs)
+        self.aux_buffers = bufs
+        try:
+            return self.decoder.decode(bufs[0])  # type: ignore[index]
+        finally:
+            self.aux_buffers = ()
+    def dec_hook(self, t: type, obj: Any) -> Any:
+        # Given native types in `obj`, convert to type `t`.
+        if isclass(t):
+            if issubclass(t, TensorDict):
+                return self._decode_tensordict(obj)
+            if issubclass(t, torch.Tensor):
+                return self._decode_tensor(obj)
+        return obj
+    def _decode_tensordict(self, arr: Any) -> TensorDict:
+        batch_size, device, encoded_items = arr
+        decoded_items: dict[str, Any] = {}
+        for k, (v_type, v) in encoded_items.items():
+            if v_type == "tensor":
+                decoded_items[k] = self._decode_tensor(v)
+            # elif v_type == "non_tensor_stack":
+            #     decoded_items[k] = self._decode_non_tensor_stack(v)
+            elif v_type == "non_tensor_data":
+                decoded_items[k] = self._decode_non_tensor_data(v)
+            elif v_type == "other":
+                decoded_items[k] = pickle.loads(self.aux_buffers[v])
+        batch_size = torch.Size(batch_size)
+        torch_device = torch.device(device) if device is not None else None
+        return TensorDict(source=decoded_items, batch_size=batch_size, device=torch_device)
+    def _decode_tensor(self, arr: Any) -> torch.Tensor:
+        if len(arr) == 3:
+            # decode single tensor
+            return self._decode_single_tensor(arr)
+        elif len(arr) == 2:
+            # decode nested tensor
+            layout, data = arr
+            torch_layout = getattr(torch, layout)
+            return torch.nested.as_nested_tensor(
+                [self._decode_single_tensor(tensor) for tensor in data], layout=torch_layout
+            )
+        else:
+            raise ValueError(f"Invalid tensor encoding format, expected length 2 or 3, got {len(arr)}")
+    def _decode_single_tensor(self, arr: Any) -> torch.Tensor:
+        dtype, shape, data = arr
+        # Copy from inline representation, to decouple the memory storage
+        # of the message from the original buffer. And also make Torch
+        # not complain about a readonly memoryview.
+        buffer = self.aux_buffers[data] if isinstance(data, int) else bytearray(data)
+        torch_dtype = getattr(torch, dtype)
+        assert isinstance(torch_dtype, torch.dtype)
+        if not buffer:  # torch.frombuffer doesn't like empty buffers
+            assert 0 in shape
+            return torch.empty(shape, dtype=torch_dtype)
+        # Create uint8 array
+        arr = torch.frombuffer(buffer, dtype=torch.uint8)
+        # Convert back to proper shape & type
+        return arr.view(torch_dtype).view(shape)
+    def _decode_non_tensor_data(self, arr: Any) -> NonTensorData:
+        batch_size, device, data = arr
+        buffer = self.aux_buffers[data]
+        batch_size = torch.Size(batch_size)
+        torch_device = torch.device(device) if device is not None else None
+        non_tensor_data = pickle.loads(buffer)
+        return NonTensorData(data=non_tensor_data, batch_size=batch_size, device=torch_device)
+    def ext_hook(self, code: int, data: memoryview) -> Any:
+        if code == CUSTOM_TYPE_RAW_VIEW:
+            return data
+        if code == CUSTOM_TYPE_PICKLE:
+            return pickle.loads(data)
+        if code == CUSTOM_TYPE_CLOUDPICKLE:
+            return cloudpickle.loads(data)
+        raise NotImplementedError(f"Extension type code {code} is not supported")

transfer_queue/utils/utils.py ADDED Viewed

@@ -0,0 +1,132 @@
+# Copyright 2025 The TransferQueue Team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from contextlib import contextmanager
+from enum import Enum
+from typing import Optional
+import psutil
+import ray
+import torch
+class ExplicitEnum(str, Enum):
+    """
+    Enum with more explicit error message for missing values.
+    """
+    @classmethod
+    def _missing_(cls, value):
+        raise ValueError(
+            f"{value} is not a valid {cls.__name__}, please select one of {list(cls._value2member_map_.keys())}"
+        )
+class TransferQueueRole(ExplicitEnum):
+    CONTROLLER = "TransferQueueController"
+    STORAGE = "TransferQueueStorage"
+    CLIENT = "TransferQueueClient"
+# production_status enum: 0: not produced, 1: ready for consume, 2: consumed
+class ProductionStatus(ExplicitEnum):
+    NOT_PRODUCED = 0
+    READY_FOR_CONSUME = 1
+    CONSUMED = 2
+def get_placement_group(num_ray_actors: int, num_cpus_per_actor: int = 1):
+    """
+    Create a placement group with SPREAD strategy for Ray actors.
+    Args:
+        num_ray_actors (int): Number of Ray actors to create.
+        num_cpus_per_actor (int): Number of CPUs to allocate per actor.
+    Returns:
+        placement_group: The created placement group.
+    """
+    bundle = {"CPU": num_cpus_per_actor}
+    placement_group = ray.util.placement_group([bundle for _ in range(num_ray_actors)], strategy="SPREAD")
+    ray.get(placement_group.ready())
+    return placement_group
+def sequential_sampler(
+    ready_for_consume_idx: list[int],
+    batch_size: int,
+    get_n_samples: bool,
+    n_samples_per_prompt: int,
+) -> list[int]:
+    """
+    Sequentially samples a batch of indices from global indexes ready_for_consume_idx.
+    Args:
+        ready_for_consume_idx: A sorted list of available indices for sampling.
+            - When get_n_samples=True:
+                Expected to be grouped by prompts, e.g.,
+                [0,1,2,3, 8,9,10,11, 12,13,14,15] (3 groups of 4 samples each)
+            - When get_n_samples=False:
+                Can be any ordered list, e.g., [0,3,5,6,7,8]
+        batch_size: Total number of samples to return
+        get_n_samples: Flag indicating the sampling mode
+        n_samples_per_prompt: Number of samples per prompt (used when get_n_samples=True)
+    Returns:
+        list[int]: Sequentially sampled indices of length batch_size
+    """
+    if get_n_samples:
+        assert len(ready_for_consume_idx) % n_samples_per_prompt == 0
+        assert batch_size % n_samples_per_prompt == 0
+        batch_size_n_samples = batch_size // n_samples_per_prompt
+        group_ready_for_consume_idx = torch.tensor(ready_for_consume_idx, dtype=torch.int).view(
+            -1, n_samples_per_prompt
+        )
+        sampled_indexes = group_ready_for_consume_idx[list(range(batch_size_n_samples))].flatten().tolist()
+    else:
+        sampled_indexes = [int(ready_for_consume_idx[i]) for i in range(batch_size)]
+    return sampled_indexes
+@contextmanager
+def limit_pytorch_auto_parallel_threads(target_num_threads: Optional[int] = None):
+    """Prevent PyTorch from overdoing the automatic parallelism during torch.stack() operation"""
+    pytorch_current_num_threads = torch.get_num_threads()
+    logical_cores = psutil.cpu_count(logical=True)
+    physical_cores = psutil.cpu_count(logical=False)
+    if target_num_threads is None:
+        # auto determine target_num_threads
+        if physical_cores >= 16:
+            target_num_threads = 16
+        else:
+            target_num_threads = physical_cores
+    if target_num_threads > logical_cores:
+        raise RuntimeError(
+            f"target_num_threads {target_num_threads} should not exceed total logical CPU cores {logical_cores}"
+        )
+    if pytorch_current_num_threads <= target_num_threads:
+        # No need to change settings
+        yield
+    else:
+        torch.set_num_threads(target_num_threads)
+        try:
+            yield
+        finally:
+            # Restore the original number of threads
+            torch.set_num_threads(pytorch_current_num_threads)

transfer_queue/utils/zmq_utils.py ADDED Viewed

@@ -0,0 +1,170 @@
+# Copyright 2025 The TransferQueue Team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+import socket
+import time
+from dataclasses import dataclass
+from typing import Any, Optional
+from uuid import uuid4
+import psutil
+import zmq
+from transfer_queue.utils.utils import (
+    ExplicitEnum,
+    TransferQueueRole,
+)
+class ZMQRequestType(ExplicitEnum):
+    # HANDSHAKE
+    HANDSHAKE = "HANDSHAKE"  # TransferQueueStorageUnit -> TransferQueueController
+    HANDSHAKE_ACK = "HANDSHAKE_ACK"  # TransferQueueController  -> TransferQueueStorageUnit
+    # DATA_OPERATION
+    GET_DATA = "GET"
+    PUT_DATA = "PUT"
+    GET_DATA_RESPONSE = "GET_DATA_RESPONSE"
+    PUT_DATA_RESPONSE = "PUT_DATA_RESPONSE"
+    CLEAR_DATA = "CLEAR_DATA"
+    CLEAR_DATA_RESPONSE = "CLEAR_DATA_RESPONSE"
+    PUT_GET_OPERATION_ERROR = "PUT_GET_OPERATION_ERROR"
+    PUT_GET_ERROR = "PUT_GET_ERROR"
+    PUT_ERROR = "PUT_ERROR"
+    GET_ERROR = "GET_ERROR"
+    CLEAR_DATA_ERROR = "CLEAR_DATA_ERROR"
+    # META_OPERATION
+    GET_META = "GET_META"
+    GET_META_RESPONSE = "GET_META_RESPONSE"
+    GET_CLEAR_META = "GET_CLEAR_META"
+    GET_CLEAR_META_RESPONSE = "GET_CLEAR_META_RESPONSE"
+    CLEAR_META = "CLEAR_META"
+    CLEAR_META_RESPONSE = "CLEAR_META_RESPONSE"
+    # CHECK_CONSUMPTION
+    CHECK_CONSUMPTION = "CHECK_CONSUMPTION"
+    CONSUMPTION_RESPONSE = "CONSUMPTION_RESPONSE"
+    # NOTIFY_DATA_UPDATE
+    NOTIFY_DATA_UPDATE = "NOTIFY_DATA_UPDATE"
+    NOTIFY_DATA_UPDATE_ACK = "NOTIFY_DATA_UPDATE_ACK"
+    NOTIFY_DATA_UPDATE_ERROR = "NOTIFY_DATA_UPDATE_ERROR"
+class ZMQServerInfo:
+    def __init__(self, role: TransferQueueRole, id: str, ip: str, ports: dict[str, str]):
+        self.role = role
+        self.id = id
+        self.ip = ip
+        self.ports = ports
+    def to_addr(self, port_name: str) -> str:
+        return f"tcp://{self.ip}:{self.ports[port_name]}"
+    def to_dict(self):
+        return {
+            "role": self.role,
+            "id": self.id,
+            "ip": self.ip,
+            "ports": self.ports,
+        }
+    def __str__(self) -> str:
+        return f"ZMQSocketInfo(role={self.role}, id={self.id}, ip={self.ip}, ports={self.ports})"
+@dataclass
+class ZMQMessage:
+    request_type: ZMQRequestType
+    sender_id: str
+    receiver_id: str | None
+    body: dict[str, Any]
+    request_id: str
+    timestamp: float
+    @classmethod
+    def create(
+        cls,
+        request_type: ZMQRequestType,
+        sender_id: str,
+        body: dict[str, Any],
+        receiver_id: Optional[str] = None,
+    ) -> "ZMQMessage":
+        return cls(
+            request_type=request_type,
+            sender_id=sender_id,
+            receiver_id=receiver_id,
+            body=body,
+            request_id=str(uuid4().hex[:8]),
+            timestamp=time.time(),
+        )
+    def serialize(self) -> bytes:
+        """Using pickle to serialize ZMQMessage objects"""
+        return pickle.dumps(self)
+    @classmethod
+    def deserialize(cls, data: bytes | list[bytes]):
+        """Using pickle to deserialize ZMQMessage objects"""
+        if isinstance(data, list):
+            # Process multiple byte streams by deserializing each in sequence
+            result = []
+            for d in data:
+                result.append(pickle.loads(d))
+            return result
+        else:
+            # Single byte stream case
+            return pickle.loads(data)
+def get_free_port() -> str:
+    with socket.socket() as sock:
+        sock.bind(("", 0))
+        return sock.getsockname()[1]
+def create_zmq_socket(
+    ctx: zmq.Context,
+    socket_type: Any,
+    identity: Optional[bytes] = None,
+) -> zmq.Socket:
+    mem = psutil.virtual_memory()
+    socket = ctx.socket(socket_type)
+    # Calculate buffer size based on system memory
+    total_mem = mem.total / 1024**3
+    available_mem = mem.available / 1024**3
+    # For systems with substantial memory (>32GB total, >16GB available):
+    # - Set a large 0.5GB buffer to improve throughput
+    # For systems with less memory:
+    # - Use system default (-1) to avoid excessive memory consumption
+    if total_mem > 32 and available_mem > 16:
+        buf_size = int(0.5 * 1024**3)  # 0.5GB in bytes
+    else:
+        buf_size = -1  # Use system default buffer size
+    if socket_type in (zmq.PULL, zmq.DEALER, zmq.ROUTER):
+        socket.setsockopt(zmq.RCVHWM, 0)
+        socket.setsockopt(zmq.RCVBUF, buf_size)
+    if socket_type in (zmq.PUSH, zmq.DEALER, zmq.ROUTER):
+        socket.setsockopt(zmq.SNDHWM, 0)
+        socket.setsockopt(zmq.SNDBUF, buf_size)
+    if identity is not None:
+        socket.setsockopt(zmq.IDENTITY, identity)
+    return socket

transfer_queue/version/version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.1.dev