PyPI - sglang - Versions diffs - 0.4.5.post2__py3-none-any.whl → 0.4.6__py3-none-any.whl - Mend

sglang 0.4.5.post2py3-none-any.whl → 0.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

sglang/bench_one_batch.py +19 -3
sglang/bench_serving.py +8 -8
sglang/compile_deep_gemm.py +177 -0
sglang/lang/backend/openai.py +5 -1
sglang/lang/backend/runtime_endpoint.py +5 -1
sglang/srt/code_completion_parser.py +1 -1
sglang/srt/configs/deepseekvl2.py +1 -1
sglang/srt/configs/model_config.py +11 -2
sglang/srt/constrained/llguidance_backend.py +78 -61
sglang/srt/constrained/xgrammar_backend.py +1 -0
sglang/srt/conversation.py +34 -1
sglang/srt/disaggregation/decode.py +96 -5
sglang/srt/disaggregation/mini_lb.py +113 -15
sglang/srt/disaggregation/mooncake/conn.py +199 -32
sglang/srt/disaggregation/nixl/__init__.py +1 -0
sglang/srt/disaggregation/nixl/conn.py +622 -0
sglang/srt/disaggregation/prefill.py +119 -20
sglang/srt/disaggregation/utils.py +17 -0
sglang/srt/entrypoints/engine.py +4 -0
sglang/srt/entrypoints/http_server.py +11 -9
sglang/srt/function_call_parser.py +132 -0
sglang/srt/layers/activation.py +2 -2
sglang/srt/layers/attention/base_attn_backend.py +3 -0
sglang/srt/layers/attention/flashattention_backend.py +809 -160
sglang/srt/layers/attention/flashmla_backend.py +8 -11
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +5 -5
sglang/srt/layers/attention/triton_ops/extend_attention.py +5 -5
sglang/srt/layers/attention/triton_ops/prefill_attention.py +7 -3
sglang/srt/layers/attention/vision.py +2 -0
sglang/srt/layers/dp_attention.py +1 -1
sglang/srt/layers/layernorm.py +42 -5
sglang/srt/layers/logits_processor.py +2 -2
sglang/srt/layers/moe/ep_moe/layer.py +2 -0
sglang/srt/layers/moe/fused_moe_native.py +2 -4
sglang/srt/layers/moe/fused_moe_triton/configs/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +41 -41
sglang/srt/layers/moe/fused_moe_triton/configs/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +18 -15
sglang/srt/layers/pooler.py +6 -0
sglang/srt/layers/quantization/awq.py +5 -1
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +1 -0
sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +2 -0
sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +153 -0
sglang/srt/layers/quantization/deep_gemm.py +385 -0
sglang/srt/layers/quantization/fp8_kernel.py +7 -38
sglang/srt/layers/quantization/fp8_utils.py +2 -2
sglang/srt/layers/quantization/gptq.py +13 -7
sglang/srt/layers/quantization/int8_kernel.py +32 -1
sglang/srt/layers/quantization/modelopt_quant.py +2 -2
sglang/srt/layers/quantization/w8a8_int8.py +3 -3
sglang/srt/layers/radix_attention.py +13 -3
sglang/srt/layers/rotary_embedding.py +176 -132
sglang/srt/layers/sampler.py +2 -2
sglang/srt/managers/data_parallel_controller.py +17 -4
sglang/srt/managers/io_struct.py +21 -3
sglang/srt/managers/mm_utils.py +85 -28
sglang/srt/managers/multimodal_processors/base_processor.py +14 -1
sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +9 -2
sglang/srt/managers/multimodal_processors/gemma3.py +2 -5
sglang/srt/managers/multimodal_processors/janus_pro.py +2 -2
sglang/srt/managers/multimodal_processors/minicpm.py +4 -3
sglang/srt/managers/multimodal_processors/qwen_vl.py +38 -13
sglang/srt/managers/schedule_batch.py +42 -12
sglang/srt/managers/scheduler.py +47 -26
sglang/srt/managers/tokenizer_manager.py +120 -30
sglang/srt/managers/tp_worker.py +1 -0
sglang/srt/mem_cache/hiradix_cache.py +40 -32
sglang/srt/mem_cache/memory_pool.py +118 -13
sglang/srt/model_executor/cuda_graph_runner.py +16 -10
sglang/srt/model_executor/forward_batch_info.py +51 -95
sglang/srt/model_executor/model_runner.py +29 -27
sglang/srt/models/deepseek.py +12 -2
sglang/srt/models/deepseek_nextn.py +101 -6
sglang/srt/models/deepseek_v2.py +153 -76
sglang/srt/models/deepseek_vl2.py +9 -4
sglang/srt/models/gemma3_causal.py +1 -1
sglang/srt/models/llama4.py +0 -1
sglang/srt/models/minicpm3.py +2 -2
sglang/srt/models/minicpmo.py +22 -7
sglang/srt/models/mllama4.py +2 -2
sglang/srt/models/qwen2_5_vl.py +3 -6
sglang/srt/models/qwen2_vl.py +3 -7
sglang/srt/models/roberta.py +178 -0
sglang/srt/openai_api/adapter.py +87 -10
sglang/srt/openai_api/protocol.py +6 -1
sglang/srt/server_args.py +65 -60
sglang/srt/speculative/build_eagle_tree.py +2 -2
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +3 -3
sglang/srt/speculative/eagle_utils.py +2 -2
sglang/srt/speculative/eagle_worker.py +2 -7
sglang/srt/torch_memory_saver_adapter.py +10 -1
sglang/srt/utils.py +48 -6
sglang/test/runners.py +6 -13
sglang/test/test_utils.py +39 -19
sglang/version.py +1 -1
{sglang-0.4.5.post2.dist-info → sglang-0.4.6.dist-info}/METADATA +6 -7
{sglang-0.4.5.post2.dist-info → sglang-0.4.6.dist-info}/RECORD +99 -92
{sglang-0.4.5.post2.dist-info → sglang-0.4.6.dist-info}/WHEEL +1 -1
{sglang-0.4.5.post2.dist-info → sglang-0.4.6.dist-info}/licenses/LICENSE +0 -0
{sglang-0.4.5.post2.dist-info → sglang-0.4.6.dist-info}/top_level.txt +0 -0

sglang/srt/disaggregation/mooncake/conn.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from __future__ import annotations
 import asyncio
+import concurrent.futures
 import dataclasses
 import logging
+import os
 import queue
 import socket
 import struct
@@ -73,9 +75,7 @@ class TransferInfo:
     endpoint: str
     dst_port: int
     mooncake_session_id: str
-    dst_kv_ptrs: list[int]
     dst_kv_indices: npt.NDArray[np.int64]
-    dst_aux_ptrs: list[int]
     dst_aux_index: int
     @classmethod
@@ -85,10 +85,29 @@ class TransferInfo:
             endpoint=msg[1].decode("ascii"),
             dst_port=int(msg[2].decode("ascii")),
             mooncake_session_id=msg[3].decode("ascii"),
+            dst_kv_indices=np.frombuffer(msg[4], dtype=np.int64),
+            dst_aux_index=int(msg[5].decode("ascii")),
+        )
+@dataclasses.dataclass
+class KVArgsRegisterInfo:
+    room: str
+    endpoint: str
+    dst_port: int
+    mooncake_session_id: str
+    dst_kv_ptrs: list[int]
+    dst_aux_ptrs: list[int]
+    @classmethod
+    def from_zmq(cls, msg: List[bytes]):
+        return cls(
+            room=str(msg[0].decode("ascii")),
+            endpoint=msg[1].decode("ascii"),
+            dst_port=int(msg[2].decode("ascii")),
+            mooncake_session_id=msg[3].decode("ascii"),
             dst_kv_ptrs=list(struct.unpack(f"{len(msg[4])//8}Q", msg[4])),
-            dst_kv_indices=np.frombuffer(msg[5], dtype=np.int64),
-            dst_aux_ptrs=list(struct.unpack(f"{len(msg[6])//8}Q", msg[6])),
-            dst_aux_index=int(msg[7].decode("ascii")),
+            dst_aux_ptrs=list(struct.unpack(f"{len(msg[5])//8}Q", msg[5])),
         )
@@ -109,6 +128,13 @@ class MooncakeKVManager(BaseKVManager):
         # for p/d multi node infer
         self.bootstrap_port = server_args.disaggregation_bootstrap_port
         self.dist_init_addr = server_args.dist_init_addr
+        self.tp_size = server_args.tp_size
+        self.dp_size = server_args.dp_size
+        self.enable_dp_attention = server_args.enable_dp_attention
+        if not server_args.enable_dp_attention and server_args.dp_size != 1:
+            raise ValueError(
+                "If dp_attention is not enabled, dp size must be 1 in disaggregation mode."
+            )
         self.request_status: Dict[int, KVPoll] = {}
         self.rank_port = None
         self.server_socket = zmq.Context().socket(zmq.PULL)
@@ -116,11 +142,19 @@ class MooncakeKVManager(BaseKVManager):
         if self.disaggregation_mode == DisaggregationMode.PREFILL:
             self.transfer_queue = queue.Queue()
             self.transfer_infos: Dict[int, TransferInfo] = {}
+            self.decode_kv_args_table: Dict[str, KVArgsRegisterInfo] = {}
             self.start_prefill_thread()
             self._register_to_bootstrap()
+            # Determine the number of threads to use for kv sender
+            cpu_count = os.cpu_count()
+            self.executor = concurrent.futures.ThreadPoolExecutor(
+                min(cpu_count // 4, 16)
+            )
         elif self.disaggregation_mode == DisaggregationMode.DECODE:
             self.start_decode_thread()
             self.connection_pool: Dict[str, Dict[str, Union[str, int]]] = {}
+            self.prefill_dp_size_table: Dict[str, int] = {}
         else:
             raise ValueError(
                 f"Unsupported DisaggregationMode: {self.disaggregation_mode}"
@@ -150,28 +184,53 @@ class MooncakeKVManager(BaseKVManager):
         dst_kv_ptrs: list[int],
         dst_kv_indices: npt.NDArray[np.int64],
     ):
-        # group by indices
+        # Group by indices
         prefill_kv_blocks, dst_kv_blocks = group_concurrent_contiguous(
             prefill_kv_indices, dst_kv_indices
         )
         num_layers = len(self.kv_args.kv_data_ptrs)
-        for layer_id in range(num_layers):
-            src_ptr = self.kv_args.kv_data_ptrs[layer_id]
-            dst_ptr = dst_kv_ptrs[layer_id]
-            item_len = self.kv_args.kv_item_lens[layer_id]
+        layers_params = [
+            (
+                self.kv_args.kv_data_ptrs[layer_id],
+                dst_kv_ptrs[layer_id],
+                self.kv_args.kv_item_lens[layer_id],
+            )
+            for layer_id in range(num_layers)
+        ]
+        # Worker function for processing a single layer
+        def process_layer(src_ptr: int, dst_ptr: int, item_len: int) -> int:
             for prefill_index, decode_index in zip(prefill_kv_blocks, dst_kv_blocks):
                 src_addr = src_ptr + int(prefill_index[0]) * item_len
                 dst_addr = dst_ptr + int(decode_index[0]) * item_len
                 length = item_len * len(prefill_index)
-                # TODO: make async later
                 status = self.engine.transfer_sync(
                     mooncake_session_id, src_addr, dst_addr, length
                 )
                 if status != 0:
                     return status
+            return 0
+        futures = [
+            self.executor.submit(
+                process_layer,
+                src_ptr,
+                dst_ptr,
+                item_len,
+            )
+            for (src_ptr, dst_ptr, item_len) in layers_params
+        ]
+        for future in concurrent.futures.as_completed(futures):
+            status = future.result()
+            if status != 0:
+                # Immediate shutdown on first error (existing tasks will finish)
+                executor.shutdown(wait=False)
+                for f in futures:
+                    f.cancel()
+                return status
         return 0
@@ -215,6 +274,13 @@ class MooncakeKVManager(BaseKVManager):
                 waiting_req_bytes = self.server_socket.recv_multipart()
                 room = waiting_req_bytes[0].decode("ascii")
                 if room == "None":
+                    mooncake_session_id = waiting_req_bytes[3].decode("ascii")
+                    self.decode_kv_args_table[mooncake_session_id] = (
+                        KVArgsRegisterInfo.from_zmq(waiting_req_bytes)
+                    )
+                    logger.debug(
+                        f"Register KVArgs from {mooncake_session_id} successfully"
+                    )
                     continue
                 room = int(room)
                 self.transfer_infos[room] = TransferInfo.from_zmq(waiting_req_bytes)
@@ -231,12 +297,12 @@ class MooncakeKVManager(BaseKVManager):
                     chunked_dst_kv_indice = req.dst_kv_indices[kv_chunk.index_slice]
                     assert len(chunked_dst_kv_indice) == len(
                         kv_chunk.prefill_kv_indices
-                    )
+                    ), f"len(chunked_dst_kv_indice) = {len(chunked_dst_kv_indice)}, len(kv_chunk.prefill_kv_indices) = {len(kv_chunk.prefill_kv_indices)}"
                     ret = self.send_kvcache(
                         req.mooncake_session_id,
                         kv_chunk.prefill_kv_indices,
-                        req.dst_kv_ptrs,
+                        self.decode_kv_args_table[req.mooncake_session_id].dst_kv_ptrs,
                         chunked_dst_kv_indice,
                     )
                     if ret != 0:
@@ -251,7 +317,9 @@ class MooncakeKVManager(BaseKVManager):
                         ret = self.send_aux(
                             req.mooncake_session_id,
                             kv_chunk.prefill_aux_index,
-                            req.dst_aux_ptrs,
+                            self.decode_kv_args_table[
+                                req.mooncake_session_id
+                            ].dst_aux_ptrs,
                             req.dst_aux_index,
                         )
                         self.request_status[req.room] = (
@@ -331,6 +399,8 @@ class MooncakeKVManager(BaseKVManager):
         url = f"http://{bootstrap_server_url}/route"
         payload = {
             "role": "Prefill",
+            "tp_size": self.tp_size,
+            "dp_size": self.dp_size,
             "rank_ip": get_local_ip_by_remote(),
             "rank_port": self.rank_port,
             "engine_rank": self.kv_args.engine_rank,
@@ -408,12 +478,41 @@ class MooncakeKVReceiver(BaseKVReceiver):
         self.session_id = self.kv_mgr.get_session_id()
         self.kv_mgr.update_status(bootstrap_room, KVPoll.Bootstrapping)
+        if not self.kv_mgr.enable_dp_attention:
+            # We assume dp_attention should be activated simultaneously for
+            # both prefill role and decode role. If the decode instance does
+            # not enable dp_attention, then dp_attention is not enabled on the
+            # prefill instance as well. Therefore, we should skip questioning
+            # the prefill dp size to reduce bootstrap overhead.
+            self.prefill_dp_size = 1
+        elif self.bootstrap_addr not in self.kv_mgr.prefill_dp_size_table:
+            self.prefill_dp_size, tp_size_per_dp_rank = (
+                self._get_prefill_dp_size_from_server()
+            )
+            # Currently, we don't allow prefill instance and decode instance to
+            # have different TP sizes per DP rank.
+            assert tp_size_per_dp_rank == self.kv_mgr.tp_size // self.kv_mgr.dp_size
+            if self.prefill_dp_size is None:
+                logger.error(
+                    f"Could not fetch prefill dp_size for bootstrap_addr: {self.bootstrap_addr}"
+                )
+            else:
+                self.kv_mgr.prefill_dp_size_table[self.bootstrap_addr] = (
+                    self.prefill_dp_size
+                )
+        else:
+            self.prefill_dp_size = self.kv_mgr.prefill_dp_size_table[
+                self.bootstrap_addr
+            ]
         # NOTE: key distinguished by bootstrap_addr and engine_rank
+        self.target_dp_group = bootstrap_room % self.prefill_dp_size
         bootstrap_key = f"{self.bootstrap_addr}_{self.kv_mgr.kv_args.engine_rank}"
         if bootstrap_key not in self.kv_mgr.connection_pool:
             self.bootstrap_info = self._get_bootstrap_info_from_server(
-                self.kv_mgr.kv_args.engine_rank
+                self.kv_mgr.kv_args.engine_rank,
+                self.target_dp_group,
             )
             if self.bootstrap_info is None:
                 logger.error(
@@ -421,16 +520,18 @@ class MooncakeKVReceiver(BaseKVReceiver):
                 )
             else:
                 self.kv_mgr.connection_pool[bootstrap_key] = self.bootstrap_info
+                # Register kv_args only once to prefill KVManager according to the info fetched from the bootstrap server
+                self._register_kv_args()
         else:
             self.bootstrap_info = self.kv_mgr.connection_pool[bootstrap_key]
         assert self.bootstrap_info is not None
         self.kv_mgr.update_status(bootstrap_room, KVPoll.WaitingForInput)
-    def _get_bootstrap_info_from_server(self, engine_rank):
+    def _get_bootstrap_info_from_server(self, engine_rank, target_dp_group):
         """Fetch the bootstrap info from the bootstrap server."""
         try:
-            url = f"http://{self.bootstrap_addr}/route?engine_rank={engine_rank}"
+            url = f"http://{self.bootstrap_addr}/route?engine_rank={engine_rank}&target_dp_group={target_dp_group}"
             response = requests.get(url)
             if response.status_code == 200:
                 bootstrap_info = response.json()
@@ -444,6 +545,49 @@ class MooncakeKVReceiver(BaseKVReceiver):
             logger.error(f"Error fetching prefill info from bootstrap: {e}")
             return None
+    def _get_prefill_dp_size_from_server(self) -> int:
+        """Fetch the prefill parallel info from the bootstrap server."""
+        try:
+            url = f"http://{self.bootstrap_addr}/route?engine_rank={-1}&target_dp_group={-1}"
+            response = requests.get(url)
+            if response.status_code == 200:
+                prefill_parallel_info = response.json()
+                return int(prefill_parallel_info["prefill_dp_size"]), int(
+                    prefill_parallel_info["tp_size_per_dp_rank"]
+                )
+            else:
+                logger.error(
+                    f"Failed to get prefill parallel info: {response.status_code}, {response.text}"
+                )
+                return None
+        except Exception as e:
+            logger.error(f"Error fetching prefill parallel info from bootstrap: {e}")
+            return None
+    def _register_kv_args(self):
+        self.prefill_server_url = (
+            f"{self.bootstrap_info['rank_ip']}:{self.bootstrap_info['rank_port']}"
+        )
+        packed_kv_data_ptrs = b"".join(
+            struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.kv_data_ptrs
+        )
+        packed_aux_data_ptrs = b"".join(
+            struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.aux_data_ptrs
+        )
+        sock, lock = self._connect("tcp://" + self.prefill_server_url)
+        with lock:
+            sock.send_multipart(
+                [
+                    "None".encode("ascii"),
+                    get_local_ip_by_remote().encode("ascii"),
+                    str(self.kv_mgr.rank_port).encode("ascii"),
+                    self.session_id.encode("ascii"),
+                    packed_kv_data_ptrs,
+                    packed_aux_data_ptrs,
+                ]
+            )
     @classmethod
     def _connect(cls, endpoint: str):
         with cls._global_lock:
@@ -462,12 +606,6 @@ class MooncakeKVReceiver(BaseKVReceiver):
             f"Fetched bootstrap info: {self.bootstrap_info} for engine rank: {self.kv_mgr.kv_args.engine_rank}"
         )
-        packed_kv_data_ptrs = b"".join(
-            struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.kv_data_ptrs
-        )
-        packed_aux_data_ptrs = b"".join(
-            struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.aux_data_ptrs
-        )
         sock, lock = self._connect("tcp://" + self.prefill_server_url)
         with lock:
             sock.send_multipart(
@@ -476,9 +614,7 @@ class MooncakeKVReceiver(BaseKVReceiver):
                     get_local_ip_by_remote().encode("ascii"),
                     str(self.kv_mgr.rank_port).encode("ascii"),
                     self.session_id.encode("ascii"),
-                    packed_kv_data_ptrs,
                     kv_indices.tobytes(),
-                    packed_aux_data_ptrs,
                     str(aux_index).encode("ascii"),
                 ]
             )
@@ -497,7 +633,9 @@ class MooncakeKVBootstrapServer(BaseKVBootstrapServer):
         self.store = dict()
         self.lock = asyncio.Lock()
         self._setup_routes()
-        self.prefill_port_table: Dict[int, Dict[str, Union[str, int]]] = {}
+        self.dp_size = None
+        self.tp_size_per_dp_rank = None
+        self.prefill_port_table: Dict[int, Dict[int, Dict[str, Union[str, int]]]] = {}
         # Start bootstrap server
         self.thread = threading.Thread(target=self._run_server, daemon=True)
@@ -523,35 +661,64 @@ class MooncakeKVBootstrapServer(BaseKVBootstrapServer):
     async def _handle_route_put(self, request: web.Request):
         data = await request.json()
         role = data["role"]
+        tp_size = data["tp_size"]
+        dp_size = data["dp_size"]
         rank_ip = data["rank_ip"]
         rank_port = int(data["rank_port"])
         engine_rank = int(data["engine_rank"])
+        if self.dp_size is None:
+            self.dp_size = dp_size
+        tp_size_per_dp_rank = tp_size // dp_size
+        if self.tp_size_per_dp_rank == None:
+            self.tp_size_per_dp_rank = tp_size_per_dp_rank
         # Add lock to make sure thread-safe
         if role == "Prefill":
-            self.prefill_port_table[engine_rank] = {
+            dp_group = engine_rank // tp_size_per_dp_rank
+            tp_rank_in_dp_group = engine_rank % tp_size_per_dp_rank
+            async with self.lock:
+                if dp_group not in self.prefill_port_table:
+                    self.prefill_port_table[dp_group] = {}
+            self.prefill_port_table[dp_group][tp_rank_in_dp_group] = {
                 "rank_ip": rank_ip,
                 "rank_port": rank_port,
             }
             logger.debug(
-                f"Registered Prefill boostrap: {engine_rank} with rank_ip: {rank_ip} and rank_port: {rank_port}"
+                f"Register Prefill bootstrap: {engine_rank} with rank_ip: {rank_ip} and rank_port: {rank_port}"
             )
         return web.Response(text="OK", status=200)
     async def _handle_route_get(self, request: web.Request):
         engine_rank = request.query.get("engine_rank")
-        if not engine_rank:
-            return web.Response(text="Missing rank", status=400)
+        target_dp_group = request.query.get("target_dp_group")
+        if not engine_rank or not target_dp_group:
+            return web.Response(text="Missing inputs for bootstrap server.", status=400)
+        # Currently we use engine_rank == -1 and target_dp_group == -1 to sync dp size
+        if int(engine_rank) == -1 and int(target_dp_group) == -1:
+            prefill_parallel_info = {
+                "prefill_dp_size": self.dp_size,
+                "tp_size_per_dp_rank": self.tp_size_per_dp_rank,
+            }
+            return web.json_response(prefill_parallel_info, status=200)
         # Find corresponding prefill info
+        tp_rank_in_dp_group = int(engine_rank) % self.tp_size_per_dp_rank
         async with self.lock:
-            bootstrap_info = self.prefill_port_table.get(int(engine_rank))
+            bootstrap_info = self.prefill_port_table[int(target_dp_group)][
+                tp_rank_in_dp_group
+            ]
         if bootstrap_info is not None:
             return web.json_response(bootstrap_info, status=200)
         else:
-            return web.Response(text="Not Found", status=404)
+            return web.Response(text="Bootstrap info not Found", status=404)
     def _run_server(self):
         try:

sglang/srt/disaggregation/nixl/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .conn import NixlKVBootstrapServer, NixlKVManager, NixlKVReceiver, NixlKVSender

sglang 0.4.5.post2__py3-none-any.whl → 0.4.6__py3-none-any.whl

sglang 0.4.5.post2py3-none-any.whl → 0.4.6py3-none-any.whl