PyPI - sglang - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.1.post1__py3-none-any.whl - Mend

sglang 0.3.1py3-none-any.whl → 0.3.1.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sglang/bench_latency.py +7 -2
sglang/global_config.py +5 -13
sglang/lang/interpreter.py +0 -3
sglang/srt/constrained/fsm_cache.py +5 -1
sglang/srt/layers/activation.py +12 -0
sglang/srt/layers/attention_backend.py +12 -12
sglang/srt/layers/fused_moe/layer.py +27 -7
sglang/srt/layers/layernorm.py +12 -0
sglang/srt/layers/sampler.py +32 -97
sglang/srt/lora/lora_manager.py +11 -8
sglang/srt/managers/schedule_batch.py +1 -0
sglang/srt/managers/tp_worker.py +8 -7
sglang/srt/model_executor/cuda_graph_runner.py +12 -1
sglang/srt/model_executor/model_runner.py +24 -41
sglang/srt/models/deepseek_v2.py +6 -1
sglang/srt/models/minicpm3.py +5 -1
sglang/srt/models/olmoe.py +415 -0
sglang/srt/sampling/sampling_batch_info.py +3 -50
sglang/srt/server.py +6 -1
sglang/srt/server_args.py +34 -1
sglang/srt/utils.py +7 -51
sglang/test/test_utils.py +0 -1
sglang/version.py +1 -1
{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/METADATA +2 -2
{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/RECORD +28 -27
{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/WHEEL +1 -1
{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/LICENSE +0 -0
{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/top_level.txt +0 -0

sglang/srt/server.py CHANGED Viewed

@@ -78,6 +78,7 @@ from sglang.srt.utils import (
     assert_pkg_version,
     configure_logger,
     enable_show_time_cost,
+    is_hip,
     kill_child_process,
     maybe_set_triton_cache_manager,
     prepare_model,
@@ -152,7 +153,7 @@ async def flush_cache():
 async def update_weights(obj: UpdateWeightReqInput, request: Request):
     success, message = await tokenizer_manager.update_weights(obj, request)
-    content = {"message": message, "success": str(success)}
+    content = {"success": success, "message": message}
     if success:
         return JSONResponse(
             content,
@@ -434,6 +435,10 @@ def _set_envs_and_config(server_args: ServerArgs):
             "at https://docs.flashinfer.ai/installation.html.",
         )
+    if is_hip():
+        # to figure out a better method of not using fork later
+        mp.set_start_method("spawn", force=True)
 def _wait_and_warmup(server_args, pipe_finish_writer, pid):
     headers = {}

sglang/srt/server_args.py CHANGED Viewed

@@ -21,9 +21,22 @@ import logging
 import random
 from typing import List, Optional, Union
+from sglang.srt.utils import is_hip
 logger = logging.getLogger(__name__)
+class LoRAPathAction(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, {})
+        for lora_path in values:
+            if "=" in lora_path:
+                name, path = lora_path.split("=", 1)
+                getattr(namespace, self.dest)[name] = path
+            else:
+                getattr(namespace, self.dest)[lora_path] = lora_path
 @dataclasses.dataclass
 class ServerArgs:
     # Model and tokenizer
@@ -59,6 +72,7 @@ class ServerArgs:
     tp_size: int = 1
     stream_interval: int = 1
     random_seed: Optional[int] = None
+    constrained_json_whitespace_pattern: Optional[str] = None
     # Logging
     log_level: str = "info"
@@ -96,6 +110,7 @@ class ServerArgs:
     disable_custom_all_reduce: bool = False
     enable_mixed_chunk: bool = False
     enable_torch_compile: bool = False
+    max_torch_compile_bs: int = 32
     torchao_config: str = ""
     enable_p2p_check: bool = False
     enable_mla: bool = False
@@ -152,6 +167,11 @@ class ServerArgs:
             )
             self.sampling_backend = "pytorch"
+        # ROCm: flashinfer available later
+        if is_hip():
+            self.attention_backend = "triton"
+            self.sampling_backend = "pytorch"
         # Default kernel backends
         if self.enable_mla:
             logger.info("MLA optimization is tunred on. Use triton backend.")
@@ -359,6 +379,12 @@ class ServerArgs:
             default=ServerArgs.random_seed,
             help="The random seed.",
         )
+        parser.add_argument(
+            "--constrained-json-whitespace-pattern",
+            type=str,
+            default=ServerArgs.constrained_json_whitespace_pattern,
+            help=r"Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*",
+        )
         parser.add_argument(
             "--log-level",
             type=str,
@@ -498,6 +524,12 @@ class ServerArgs:
             action="store_true",
             help="Optimize the model with torch.compile. Experimental feature.",
         )
+        parser.add_argument(
+            "--max-torch-compile-bs",
+            type=int,
+            default=ServerArgs.max_torch_compile_bs,
+            help="Set the maximum batch size when using torch compile.",
+        )
         parser.add_argument(
             "--torchao-config",
             type=str,
@@ -532,7 +564,8 @@ class ServerArgs:
             type=str,
             nargs="*",
             default=None,
-            help="The list of LoRA adapters.",
+            action=LoRAPathAction,
+            help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}",
         )
         parser.add_argument(
             "--max-loras-per-batch",

sglang/srt/utils.py CHANGED Viewed

@@ -51,6 +51,11 @@ show_time_cost = False
 time_infos = {}
+# torch flag AMD GPU
+def is_hip() -> bool:
+    return torch.version.hip is not None
 def enable_show_time_cost():
     global show_time_cost
     show_time_cost = True
@@ -187,7 +192,7 @@ def allocate_init_ports(
         cur_port += 1
     if port is not None and ret_ports[0] != port:
-        logger.warn(
+        logger.warning(
             f"WARNING: Port {port} is not available. Use port {ret_ports[0]} instead."
         )
@@ -623,56 +628,7 @@ def set_ulimit(target_soft_limit=65535):
         try:
             resource.setrlimit(resource_type, (target_soft_limit, current_hard))
         except ValueError as e:
-            logger.warn(f"Fail to set RLIMIT_NOFILE: {e}")
-def is_llama3_405b_fp8_head_16(model_config):
-    """Return whether the model is meta-llama/Meta-Llama-3.1-405B-FP8 with 16 kv heads."""
-    if (
-        model_config.hf_config.architectures[0] == "LlamaForCausalLM"
-        and model_config.hf_config.hidden_size == 16384
-        and model_config.hf_config.intermediate_size == 53248
-        and model_config.hf_config.num_hidden_layers == 126
-        and model_config.hf_config.num_key_value_heads == 16
-        and hasattr(model_config.hf_config, "quantization_config")
-        and model_config.hf_config.quantization_config["quant_method"] == "fbgemm_fp8"
-    ):
-        return True
-    return False
-def monkey_patch_vllm_qvk_linear_loader():
-    """A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints."""
-    from vllm.model_executor.layers.linear import QKVParallelLinear
-    origin_weight_loader = QKVParallelLinear.weight_loader
-    def get_original_weight(loaded_weight, head_dim):
-        n_kv_head = loaded_weight.shape[0] // (2 * head_dim)
-        dim = loaded_weight.shape[1]
-        for i in range(n_kv_head):
-            loaded_weight[i * head_dim : (i + 1) * head_dim, :] = loaded_weight[
-                2 * i * head_dim : (2 * i + 1) * head_dim, :
-            ]
-        original_kv_weight = loaded_weight[: n_kv_head * head_dim, :]
-        assert original_kv_weight.shape == (n_kv_head * head_dim, dim)
-        return original_kv_weight
-    def weight_loader_srt(
-        self,
-        param: Parameter,
-        loaded_weight: torch.Tensor,
-        loaded_shard_id: Optional[str] = None,
-    ):
-        if (
-            loaded_shard_id in ["k", "v"]
-            and loaded_weight.shape[0] == self.head_size * self.total_num_kv_heads * 2
-        ):
-            loaded_weight = get_original_weight(loaded_weight, self.head_size)
-        origin_weight_loader(self, param, loaded_weight, loaded_shard_id)
-    setattr(QKVParallelLinear, "weight_loader", weight_loader_srt)
+            logger.warning(f"Fail to set RLIMIT_NOFILE: {e}")
 def add_api_key_middleware(app, api_key: str):

sglang/test/test_utils.py CHANGED Viewed

@@ -304,7 +304,6 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
 def select_sglang_backend(args: argparse.Namespace):
     if args.backend.startswith("srt"):
         if args.backend == "srt-no-parallel":
-            global_config.enable_parallel_decoding = False
             global_config.enable_parallel_encoding = False
         backend = RuntimeEndpoint(f"{args.host}:{args.port}")
     elif args.backend.startswith("gpt-"):

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.1"
1	+ __version__ = "0.3.1.post1"

{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sglang
-Version: 0.3.1
+Version: 0.3.1.post1
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License: Apache License
                                    Version 2.0, January 2004
@@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
 ### Method 2: From source
 ```
 # Use the last release branch
-git clone -b v0.3.1 https://github.com/sgl-project/sglang.git
+git clone -b v0.3.1.post1 https://github.com/sgl-project/sglang.git
 cd sglang
 pip install --upgrade pip

{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
 sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
-sglang/bench_latency.py,sha256=EvmXpaREU-g25OTcOUTgAUPmA-txfnyjaqY-4hlq97w,16925
+sglang/bench_latency.py,sha256=CDMrch4QwIyb2DTH2kBIgQ6Q8sGHwtrx3Cz49qZNfpU,17078
 sglang/bench_serving.py,sha256=6OM5JIDuoxJDg-VLE4ijGGcS8-6ViaidV05lIrZmSzo,36239
 sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
-sglang/global_config.py,sha256=KWpXd4OCCWW2TRQo-dShvLs4jb15ej9Ejhxr_wggzBg,1535
+sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
 sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
 sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
 sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
-sglang/version.py,sha256=r4xAFihOf72W9TD-lpMi6ntWSTKTP2SlzKP1ytkjRbI,22
+sglang/version.py,sha256=83xK6WSmRR5ba-i5fDLUmoJT83Eg_dpsWgwcnsUhMpA,28
 sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
 sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
 sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
-sglang/lang/interpreter.py,sha256=M42SuOnijFaHWOe3Qyi-bNanRt-mYhSDa1wWn1J42Hw,30324
+sglang/lang/interpreter.py,sha256=rOquFbMzxry7IItZlAn5TwtQfxMy718JPxOkiXO-yrg,30234
 sglang/lang/ir.py,sha256=W3UfZikcGeT86PDDjDjw-yNzrKY2e2UYO4DTatMCfm0,17704
 sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
 sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,56 +25,56 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
 sglang/srt/conversation.py,sha256=S5w5V6G1xigNxa3UQoSxRcMpQLWWDT9EPBoHBvHkSAk,19663
 sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVvUdIMDo,6016
 sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
-sglang/srt/server.py,sha256=FNmTpX7E9fVWj_NFzp4AtE5ODaA_rg5Xm8uZ0FB0X4o,20041
-sglang/srt/server_args.py,sha256=5OHH3gaO1s5Y2UQw2_FnFxwxrsqnUQ_WNqP1R1IWUAA,21877
-sglang/srt/utils.py,sha256=pckOt7gyQfJaV3-h8FPurWyrPij5_EBUX_Xp7x6y6YM,24229
+sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
+sglang/srt/server_args.py,sha256=M1Bm9u2JRsEptne-kw-D-B_29Q-M6V4UpAM7K-JxXAc,23309
+sglang/srt/utils.py,sha256=8yxiMRttCcfswynkNPWD3yZFNAGFz2P1PzSuxHCBGns,22340
 sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
 sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
 sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
 sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5UOS_4,2070
 sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
-sglang/srt/constrained/fsm_cache.py,sha256=jItSvCu_XrAgltfejwgvdltaiT98-8lJGBe_84cSnTk,2786
+sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
 sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
-sglang/srt/layers/activation.py,sha256=JEXNTgqxoiU4N-gVm4XMjobhft4JKDcMrgTkfpsRUzM,4856
-sglang/srt/layers/attention_backend.py,sha256=39P3iMs7B1iEzCA3EHdqUp3BLafeIVFnFWGzpEhlTRk,18182
+sglang/srt/layers/activation.py,sha256=awcwOODYcVdUtC2JxJ1TGsV8Tru0eACKcxYN6cWHbl4,5148
+sglang/srt/layers/attention_backend.py,sha256=lqMsY4VaOO_szIWoTAinXf1DnP2UsbF32kzvwFySz9w,18119
 sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
-sglang/srt/layers/layernorm.py,sha256=RXuS4UyksatqTF6lSK7VYyEiUEnBiNIBlEn8q4w84UA,3404
+sglang/srt/layers/layernorm.py,sha256=-9Yph4nnMZYX_Q31MUGAimLajNclHXjgDkswpU2BTos,3694
 sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
 sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
 sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
-sglang/srt/layers/sampler.py,sha256=1BKsZbSLBGFVtTJo1LsThuoRjOSOnsL1AiwFxJNIXRs,5800
+sglang/srt/layers/sampler.py,sha256=Q4u46oYu66e34rBNzr50VoXO8FM-assYiCoROolq3Zs,3661
 sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJNLZlE,2703
 sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
 sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
-sglang/srt/layers/fused_moe/layer.py,sha256=GT3r2UPx_PAufJd0SUMOXyh76ymAeYDubd0SM0H71bo,20977
+sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
 sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
 sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
 sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
 sglang/srt/lora/lora.py,sha256=ksj866lgDul6zxO30Jm7Nrjv-mFAMrzdvP8sez3Pl6U,14938
 sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
-sglang/srt/lora/lora_manager.py,sha256=Q7rk1SMEZ75wda68rAZDGVyX_o8ZdIW2I5Fo_llaqHs,9475
+sglang/srt/lora/lora_manager.py,sha256=7J7cGmyy1Ph4HCvLdM-ViAizAbV1snZqD-S7JLWXasI,9561
 sglang/srt/managers/controller_multi.py,sha256=KolZDso2WqH1ZhQw9p1eTmlFRgo4bcvzBxE44_sNE_o,6300
 sglang/srt/managers/controller_single.py,sha256=DiZALP_iIPZQMRx09a-LwT5_Dg7p-WU8HXyMoxJ9sRA,4955
 sglang/srt/managers/detokenizer_manager.py,sha256=yQkL5gLomLiy1qc6e9HNz8hcj7JQFHm1AfIrzpXaWJE,6852
 sglang/srt/managers/io_struct.py,sha256=bqmL3NDPLqOn6Au3WLF0NOe8Dh7ECMN7BTHCkEZ_Edk,11247
 sglang/srt/managers/policy_scheduler.py,sha256=tiBUi2GJU5eQEBK6HfsO1_YjWtFkougo40954DIp4dM,13026
-sglang/srt/managers/schedule_batch.py,sha256=QfixWzh7ks60eYE52mZHfUseXqcb89h4ZO1Aur3weLU,27340
+sglang/srt/managers/schedule_batch.py,sha256=ppHYK65GP0dtuCEzpSbGm9uAne5rEoRmW8osLknXJpI,27384
 sglang/srt/managers/tokenizer_manager.py,sha256=ql-sObjl1oRigJwnLtqqTaaw-i7gPTDMoNXDEMftr40,29643
-sglang/srt/managers/tp_worker.py,sha256=Zbl_tFUAsD6Qv1fUEJCn_jyUc3JjDm33yI3Nmu1HY8w,39174
+sglang/srt/managers/tp_worker.py,sha256=4Hhla9rfGYEdQtzGmxlIEqxt_WVkn2dkLLNQZHgpkf0,39270
 sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
 sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
 sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
 sglang/srt/mem_cache/memory_pool.py,sha256=4br3Ea2bfA-YsF_sPOVHlF2zQzYGd8fVaYTp197yZsE,7871
 sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
-sglang/srt/model_executor/cuda_graph_runner.py,sha256=LngmwtBcvobJ_9G8lD966SihjmMJlgMgHe_ZogK1kDg,10090
+sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZeO-8Mg4Tf0iP-L9FXcyhHfNzGWpTPEDGeUoC2lzHTE,10418
 sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
-sglang/srt/model_executor/model_runner.py,sha256=7jBSCdZxyDLWMOdwv1vRa7Oue-xbp8lA6I11ZPKFdAc,23457
+sglang/srt/model_executor/model_runner.py,sha256=LoQ7OFVwOiK_BfdpRfitss1TfJ8qrysHgWM-xXu7n2Y,22433
 sglang/srt/models/baichuan.py,sha256=NrG1rMJXhemkrUCEf8xKOSDQVsOD-nN8RQz6MWHOg84,15124
 sglang/srt/models/chatglm.py,sha256=KwxLHBEvK02McXDvBS0gnRxfIvOAu2QP7lgibrj9Nbc,13371
 sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
 sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
 sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
-sglang/srt/models/deepseek_v2.py,sha256=3D9WtPvVOu8U40x_KOksnmWBLmLIcgtV958go8NSj5Q,28307
+sglang/srt/models/deepseek_v2.py,sha256=bPaGRL8ieBCXKIf-KY7-D9Rus7Qj3VGvvtERzAXAZWs,28421
 sglang/srt/models/exaone.py,sha256=3I5ZoiLotf7U-8c9QJRubpgf6JDx9I_z-ViXQlCC-x8,13087
 sglang/srt/models/gemma.py,sha256=GkwgGFHgGlXgBZN7s7Wooz5tMyCp1YtgLahU2NOo66M,12273
 sglang/srt/models/gemma2.py,sha256=sFfCNEm0_OOWElRSTDuroRv8wNMX8v_81Uko9m546KA,14923
@@ -87,10 +87,11 @@ sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2z
 sglang/srt/models/llava.py,sha256=O4XGdl70Hh4tM_OHapFGHbReC82mbe9xLw6GELKWKhU,24881
 sglang/srt/models/llavavid.py,sha256=ou5uIuskBoBo0lXvqFFfDLBYYVfehx27n-Lu8X9gpLs,11992
 sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
-sglang/srt/models/minicpm3.py,sha256=S7bNeCAsfvL44Vn350KLaqX674SCb4CpUuDnhjLjr3U,25113
+sglang/srt/models/minicpm3.py,sha256=_C96kO3qGK0KRctXZf8LBR9s0sEW0QXWSGU0Vf6OrI8,25206
 sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
 sglang/srt/models/mixtral.py,sha256=oRC7mKBrPJhvzkWSabrbeQQQac-jtF4EV6H2Sgjc5JY,13897
 sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
+sglang/srt/models/olmoe.py,sha256=d0ECpU-IXXwGYg9tkVeMARUbqVcqEnWfpH3rrNiGKA0,15336
 sglang/srt/models/qwen.py,sha256=nqSRzkiZzpRVG6WGQ1MBUclQnXyw8jlvoOq-euM8j5s,9954
 sglang/srt/models/qwen2.py,sha256=9_M-VkHN1_T1XN-gsl_L636QMQ9BLF2WqvTcx_1L6aw,12432
 sglang/srt/models/qwen2_moe.py,sha256=s7b5XnSvsBYtZZUkjPp442m59CqPJ3HxGUIwXBVWsXw,17153
@@ -100,7 +101,7 @@ sglang/srt/models/xverse_moe.py,sha256=YR--WZ33G7XEMsS7ZJl1cQ62Q8PDo9gWqpvJBY_cb
 sglang/srt/models/yivl.py,sha256=B6MELthWIm5KdSzX3o2tbbpApY8XdjUdmcQSD4dQe_I,4835
 sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
 sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
-sglang/srt/sampling/sampling_batch_info.py,sha256=vkwy59Jt51FESYukmwDKwPbCM45WMb16dx_408B3oqc,7900
+sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
 sglang/srt/sampling/sampling_params.py,sha256=ggOXxafqfCD-xrGYcM57byLZ79CIeBP4AD5F44L_CW0,5635
 sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
 sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
@@ -120,10 +121,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
 sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
 sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
 sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
-sglang/test/test_utils.py,sha256=iBs07MBFxOidipTG1-s2hrCvcURFJVXo7gg10pzAQX8,17168
+sglang/test/test_utils.py,sha256=NLiJqFRWnCeQ-gdCBe0ubNFCsig1CPb1EU-Ay9CtSfU,17109
 sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
-sglang-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sglang-0.3.1.dist-info/METADATA,sha256=QKZQ7PjuK22x_QlQy1LqPX6y4zLgJJ9FPoNNSkw3cEk,38125
-sglang-0.3.1.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
-sglang-0.3.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
-sglang-0.3.1.dist-info/RECORD,,
+sglang-0.3.1.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+sglang-0.3.1.post1.dist-info/METADATA,sha256=zswdq5UTi5aLVmpEyjnc7SzIi60yc4w2hlMhckdxmcU,38137
+sglang-0.3.1.post1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+sglang-0.3.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
+sglang-0.3.1.post1.dist-info/RECORD,,

{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.1.2)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/LICENSE RENAMED Viewed

File without changes

{sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/top_level.txt RENAMED Viewed

File without changes

sglang 0.3.1__py3-none-any.whl → 0.3.1.post1__py3-none-any.whl

sglang 0.3.1py3-none-any.whl → 0.3.1.post1py3-none-any.whl