sglang 0.3.1__py3-none-any.whl → 0.3.1.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +7 -2
- sglang/global_config.py +5 -13
- sglang/lang/interpreter.py +0 -3
- sglang/srt/constrained/fsm_cache.py +5 -1
- sglang/srt/layers/activation.py +12 -0
- sglang/srt/layers/attention_backend.py +12 -12
- sglang/srt/layers/fused_moe/layer.py +27 -7
- sglang/srt/layers/layernorm.py +12 -0
- sglang/srt/layers/sampler.py +32 -97
- sglang/srt/lora/lora_manager.py +11 -8
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/managers/tp_worker.py +8 -7
- sglang/srt/model_executor/cuda_graph_runner.py +12 -1
- sglang/srt/model_executor/model_runner.py +24 -41
- sglang/srt/models/deepseek_v2.py +6 -1
- sglang/srt/models/minicpm3.py +5 -1
- sglang/srt/models/olmoe.py +415 -0
- sglang/srt/sampling/sampling_batch_info.py +3 -50
- sglang/srt/server.py +6 -1
- sglang/srt/server_args.py +34 -1
- sglang/srt/utils.py +7 -51
- sglang/test/test_utils.py +0 -1
- sglang/version.py +1 -1
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/METADATA +2 -2
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/RECORD +28 -27
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/WHEEL +1 -1
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/LICENSE +0 -0
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/top_level.txt +0 -0
    
        sglang/srt/server.py
    CHANGED
    
    | @@ -78,6 +78,7 @@ from sglang.srt.utils import ( | |
| 78 78 | 
             
                assert_pkg_version,
         | 
| 79 79 | 
             
                configure_logger,
         | 
| 80 80 | 
             
                enable_show_time_cost,
         | 
| 81 | 
            +
                is_hip,
         | 
| 81 82 | 
             
                kill_child_process,
         | 
| 82 83 | 
             
                maybe_set_triton_cache_manager,
         | 
| 83 84 | 
             
                prepare_model,
         | 
| @@ -152,7 +153,7 @@ async def flush_cache(): | |
| 152 153 | 
             
            async def update_weights(obj: UpdateWeightReqInput, request: Request):
         | 
| 153 154 |  | 
| 154 155 | 
             
                success, message = await tokenizer_manager.update_weights(obj, request)
         | 
| 155 | 
            -
                content = {" | 
| 156 | 
            +
                content = {"success": success, "message": message}
         | 
| 156 157 | 
             
                if success:
         | 
| 157 158 | 
             
                    return JSONResponse(
         | 
| 158 159 | 
             
                        content,
         | 
| @@ -434,6 +435,10 @@ def _set_envs_and_config(server_args: ServerArgs): | |
| 434 435 | 
             
                        "at https://docs.flashinfer.ai/installation.html.",
         | 
| 435 436 | 
             
                    )
         | 
| 436 437 |  | 
| 438 | 
            +
                if is_hip():
         | 
| 439 | 
            +
                    # to figure out a better method of not using fork later
         | 
| 440 | 
            +
                    mp.set_start_method("spawn", force=True)
         | 
| 441 | 
            +
             | 
| 437 442 |  | 
| 438 443 | 
             
            def _wait_and_warmup(server_args, pipe_finish_writer, pid):
         | 
| 439 444 | 
             
                headers = {}
         | 
    
        sglang/srt/server_args.py
    CHANGED
    
    | @@ -21,9 +21,22 @@ import logging | |
| 21 21 | 
             
            import random
         | 
| 22 22 | 
             
            from typing import List, Optional, Union
         | 
| 23 23 |  | 
| 24 | 
            +
            from sglang.srt.utils import is_hip
         | 
| 25 | 
            +
             | 
| 24 26 | 
             
            logger = logging.getLogger(__name__)
         | 
| 25 27 |  | 
| 26 28 |  | 
| 29 | 
            +
            class LoRAPathAction(argparse.Action):
         | 
| 30 | 
            +
                def __call__(self, parser, namespace, values, option_string=None):
         | 
| 31 | 
            +
                    setattr(namespace, self.dest, {})
         | 
| 32 | 
            +
                    for lora_path in values:
         | 
| 33 | 
            +
                        if "=" in lora_path:
         | 
| 34 | 
            +
                            name, path = lora_path.split("=", 1)
         | 
| 35 | 
            +
                            getattr(namespace, self.dest)[name] = path
         | 
| 36 | 
            +
                        else:
         | 
| 37 | 
            +
                            getattr(namespace, self.dest)[lora_path] = lora_path
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 27 40 | 
             
            @dataclasses.dataclass
         | 
| 28 41 | 
             
            class ServerArgs:
         | 
| 29 42 | 
             
                # Model and tokenizer
         | 
| @@ -59,6 +72,7 @@ class ServerArgs: | |
| 59 72 | 
             
                tp_size: int = 1
         | 
| 60 73 | 
             
                stream_interval: int = 1
         | 
| 61 74 | 
             
                random_seed: Optional[int] = None
         | 
| 75 | 
            +
                constrained_json_whitespace_pattern: Optional[str] = None
         | 
| 62 76 |  | 
| 63 77 | 
             
                # Logging
         | 
| 64 78 | 
             
                log_level: str = "info"
         | 
| @@ -96,6 +110,7 @@ class ServerArgs: | |
| 96 110 | 
             
                disable_custom_all_reduce: bool = False
         | 
| 97 111 | 
             
                enable_mixed_chunk: bool = False
         | 
| 98 112 | 
             
                enable_torch_compile: bool = False
         | 
| 113 | 
            +
                max_torch_compile_bs: int = 32
         | 
| 99 114 | 
             
                torchao_config: str = ""
         | 
| 100 115 | 
             
                enable_p2p_check: bool = False
         | 
| 101 116 | 
             
                enable_mla: bool = False
         | 
| @@ -152,6 +167,11 @@ class ServerArgs: | |
| 152 167 | 
             
                        )
         | 
| 153 168 | 
             
                        self.sampling_backend = "pytorch"
         | 
| 154 169 |  | 
| 170 | 
            +
                    # ROCm: flashinfer available later
         | 
| 171 | 
            +
                    if is_hip():
         | 
| 172 | 
            +
                        self.attention_backend = "triton"
         | 
| 173 | 
            +
                        self.sampling_backend = "pytorch"
         | 
| 174 | 
            +
             | 
| 155 175 | 
             
                    # Default kernel backends
         | 
| 156 176 | 
             
                    if self.enable_mla:
         | 
| 157 177 | 
             
                        logger.info("MLA optimization is tunred on. Use triton backend.")
         | 
| @@ -359,6 +379,12 @@ class ServerArgs: | |
| 359 379 | 
             
                        default=ServerArgs.random_seed,
         | 
| 360 380 | 
             
                        help="The random seed.",
         | 
| 361 381 | 
             
                    )
         | 
| 382 | 
            +
                    parser.add_argument(
         | 
| 383 | 
            +
                        "--constrained-json-whitespace-pattern",
         | 
| 384 | 
            +
                        type=str,
         | 
| 385 | 
            +
                        default=ServerArgs.constrained_json_whitespace_pattern,
         | 
| 386 | 
            +
                        help=r"Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*",
         | 
| 387 | 
            +
                    )
         | 
| 362 388 | 
             
                    parser.add_argument(
         | 
| 363 389 | 
             
                        "--log-level",
         | 
| 364 390 | 
             
                        type=str,
         | 
| @@ -498,6 +524,12 @@ class ServerArgs: | |
| 498 524 | 
             
                        action="store_true",
         | 
| 499 525 | 
             
                        help="Optimize the model with torch.compile. Experimental feature.",
         | 
| 500 526 | 
             
                    )
         | 
| 527 | 
            +
                    parser.add_argument(
         | 
| 528 | 
            +
                        "--max-torch-compile-bs",
         | 
| 529 | 
            +
                        type=int,
         | 
| 530 | 
            +
                        default=ServerArgs.max_torch_compile_bs,
         | 
| 531 | 
            +
                        help="Set the maximum batch size when using torch compile.",
         | 
| 532 | 
            +
                    )
         | 
| 501 533 | 
             
                    parser.add_argument(
         | 
| 502 534 | 
             
                        "--torchao-config",
         | 
| 503 535 | 
             
                        type=str,
         | 
| @@ -532,7 +564,8 @@ class ServerArgs: | |
| 532 564 | 
             
                        type=str,
         | 
| 533 565 | 
             
                        nargs="*",
         | 
| 534 566 | 
             
                        default=None,
         | 
| 535 | 
            -
                         | 
| 567 | 
            +
                        action=LoRAPathAction,
         | 
| 568 | 
            +
                        help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}",
         | 
| 536 569 | 
             
                    )
         | 
| 537 570 | 
             
                    parser.add_argument(
         | 
| 538 571 | 
             
                        "--max-loras-per-batch",
         | 
    
        sglang/srt/utils.py
    CHANGED
    
    | @@ -51,6 +51,11 @@ show_time_cost = False | |
| 51 51 | 
             
            time_infos = {}
         | 
| 52 52 |  | 
| 53 53 |  | 
| 54 | 
            +
            # torch flag AMD GPU
         | 
| 55 | 
            +
            def is_hip() -> bool:
         | 
| 56 | 
            +
                return torch.version.hip is not None
         | 
| 57 | 
            +
             | 
| 58 | 
            +
             | 
| 54 59 | 
             
            def enable_show_time_cost():
         | 
| 55 60 | 
             
                global show_time_cost
         | 
| 56 61 | 
             
                show_time_cost = True
         | 
| @@ -187,7 +192,7 @@ def allocate_init_ports( | |
| 187 192 | 
             
                    cur_port += 1
         | 
| 188 193 |  | 
| 189 194 | 
             
                if port is not None and ret_ports[0] != port:
         | 
| 190 | 
            -
                    logger. | 
| 195 | 
            +
                    logger.warning(
         | 
| 191 196 | 
             
                        f"WARNING: Port {port} is not available. Use port {ret_ports[0]} instead."
         | 
| 192 197 | 
             
                    )
         | 
| 193 198 |  | 
| @@ -623,56 +628,7 @@ def set_ulimit(target_soft_limit=65535): | |
| 623 628 | 
             
                    try:
         | 
| 624 629 | 
             
                        resource.setrlimit(resource_type, (target_soft_limit, current_hard))
         | 
| 625 630 | 
             
                    except ValueError as e:
         | 
| 626 | 
            -
                        logger. | 
| 627 | 
            -
             | 
| 628 | 
            -
             | 
| 629 | 
            -
            def is_llama3_405b_fp8_head_16(model_config):
         | 
| 630 | 
            -
                """Return whether the model is meta-llama/Meta-Llama-3.1-405B-FP8 with 16 kv heads."""
         | 
| 631 | 
            -
                if (
         | 
| 632 | 
            -
                    model_config.hf_config.architectures[0] == "LlamaForCausalLM"
         | 
| 633 | 
            -
                    and model_config.hf_config.hidden_size == 16384
         | 
| 634 | 
            -
                    and model_config.hf_config.intermediate_size == 53248
         | 
| 635 | 
            -
                    and model_config.hf_config.num_hidden_layers == 126
         | 
| 636 | 
            -
                    and model_config.hf_config.num_key_value_heads == 16
         | 
| 637 | 
            -
                    and hasattr(model_config.hf_config, "quantization_config")
         | 
| 638 | 
            -
                    and model_config.hf_config.quantization_config["quant_method"] == "fbgemm_fp8"
         | 
| 639 | 
            -
                ):
         | 
| 640 | 
            -
                    return True
         | 
| 641 | 
            -
                return False
         | 
| 642 | 
            -
             | 
| 643 | 
            -
             | 
| 644 | 
            -
            def monkey_patch_vllm_qvk_linear_loader():
         | 
| 645 | 
            -
                """A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints."""
         | 
| 646 | 
            -
                from vllm.model_executor.layers.linear import QKVParallelLinear
         | 
| 647 | 
            -
             | 
| 648 | 
            -
                origin_weight_loader = QKVParallelLinear.weight_loader
         | 
| 649 | 
            -
             | 
| 650 | 
            -
                def get_original_weight(loaded_weight, head_dim):
         | 
| 651 | 
            -
                    n_kv_head = loaded_weight.shape[0] // (2 * head_dim)
         | 
| 652 | 
            -
                    dim = loaded_weight.shape[1]
         | 
| 653 | 
            -
                    for i in range(n_kv_head):
         | 
| 654 | 
            -
                        loaded_weight[i * head_dim : (i + 1) * head_dim, :] = loaded_weight[
         | 
| 655 | 
            -
                            2 * i * head_dim : (2 * i + 1) * head_dim, :
         | 
| 656 | 
            -
                        ]
         | 
| 657 | 
            -
                    original_kv_weight = loaded_weight[: n_kv_head * head_dim, :]
         | 
| 658 | 
            -
                    assert original_kv_weight.shape == (n_kv_head * head_dim, dim)
         | 
| 659 | 
            -
                    return original_kv_weight
         | 
| 660 | 
            -
             | 
| 661 | 
            -
                def weight_loader_srt(
         | 
| 662 | 
            -
                    self,
         | 
| 663 | 
            -
                    param: Parameter,
         | 
| 664 | 
            -
                    loaded_weight: torch.Tensor,
         | 
| 665 | 
            -
                    loaded_shard_id: Optional[str] = None,
         | 
| 666 | 
            -
                ):
         | 
| 667 | 
            -
                    if (
         | 
| 668 | 
            -
                        loaded_shard_id in ["k", "v"]
         | 
| 669 | 
            -
                        and loaded_weight.shape[0] == self.head_size * self.total_num_kv_heads * 2
         | 
| 670 | 
            -
                    ):
         | 
| 671 | 
            -
                        loaded_weight = get_original_weight(loaded_weight, self.head_size)
         | 
| 672 | 
            -
             | 
| 673 | 
            -
                    origin_weight_loader(self, param, loaded_weight, loaded_shard_id)
         | 
| 674 | 
            -
             | 
| 675 | 
            -
                setattr(QKVParallelLinear, "weight_loader", weight_loader_srt)
         | 
| 631 | 
            +
                        logger.warning(f"Fail to set RLIMIT_NOFILE: {e}")
         | 
| 676 632 |  | 
| 677 633 |  | 
| 678 634 | 
             
            def add_api_key_middleware(app, api_key: str):
         | 
    
        sglang/test/test_utils.py
    CHANGED
    
    | @@ -304,7 +304,6 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser): | |
| 304 304 | 
             
            def select_sglang_backend(args: argparse.Namespace):
         | 
| 305 305 | 
             
                if args.backend.startswith("srt"):
         | 
| 306 306 | 
             
                    if args.backend == "srt-no-parallel":
         | 
| 307 | 
            -
                        global_config.enable_parallel_decoding = False
         | 
| 308 307 | 
             
                        global_config.enable_parallel_encoding = False
         | 
| 309 308 | 
             
                    backend = RuntimeEndpoint(f"{args.host}:{args.port}")
         | 
| 310 309 | 
             
                elif args.backend.startswith("gpt-"):
         | 
    
        sglang/version.py
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            __version__ = "0.3.1"
         | 
| 1 | 
            +
            __version__ = "0.3.1.post1"
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.1
         | 
| 2 2 | 
             
            Name: sglang
         | 
| 3 | 
            -
            Version: 0.3.1
         | 
| 3 | 
            +
            Version: 0.3.1.post1
         | 
| 4 4 | 
             
            Summary: SGLang is yet another fast serving framework for large language models and vision language models.
         | 
| 5 5 | 
             
            License: Apache License
         | 
| 6 6 | 
             
                                               Version 2.0, January 2004
         | 
| @@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ | |
| 318 318 | 
             
            ### Method 2: From source
         | 
| 319 319 | 
             
            ```
         | 
| 320 320 | 
             
            # Use the last release branch
         | 
| 321 | 
            -
            git clone -b v0.3.1 https://github.com/sgl-project/sglang.git
         | 
| 321 | 
            +
            git clone -b v0.3.1.post1 https://github.com/sgl-project/sglang.git
         | 
| 322 322 | 
             
            cd sglang
         | 
| 323 323 |  | 
| 324 324 | 
             
            pip install --upgrade pip
         | 
| @@ -1,18 +1,18 @@ | |
| 1 1 | 
             
            sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
         | 
| 2 2 | 
             
            sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
         | 
| 3 | 
            -
            sglang/bench_latency.py,sha256= | 
| 3 | 
            +
            sglang/bench_latency.py,sha256=CDMrch4QwIyb2DTH2kBIgQ6Q8sGHwtrx3Cz49qZNfpU,17078
         | 
| 4 4 | 
             
            sglang/bench_serving.py,sha256=6OM5JIDuoxJDg-VLE4ijGGcS8-6ViaidV05lIrZmSzo,36239
         | 
| 5 5 | 
             
            sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
         | 
| 6 | 
            -
            sglang/global_config.py,sha256= | 
| 6 | 
            +
            sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
         | 
| 7 7 | 
             
            sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
         | 
| 8 8 | 
             
            sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
         | 
| 9 9 | 
             
            sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
         | 
| 10 | 
            -
            sglang/version.py,sha256= | 
| 10 | 
            +
            sglang/version.py,sha256=83xK6WSmRR5ba-i5fDLUmoJT83Eg_dpsWgwcnsUhMpA,28
         | 
| 11 11 | 
             
            sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 12 12 | 
             
            sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
         | 
| 13 13 | 
             
            sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
         | 
| 14 14 | 
             
            sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
         | 
| 15 | 
            -
            sglang/lang/interpreter.py,sha256= | 
| 15 | 
            +
            sglang/lang/interpreter.py,sha256=rOquFbMzxry7IItZlAn5TwtQfxMy718JPxOkiXO-yrg,30234
         | 
| 16 16 | 
             
            sglang/lang/ir.py,sha256=W3UfZikcGeT86PDDjDjw-yNzrKY2e2UYO4DTatMCfm0,17704
         | 
| 17 17 | 
             
            sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
         | 
| 18 18 | 
             
            sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| @@ -25,56 +25,56 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE | |
| 25 25 | 
             
            sglang/srt/conversation.py,sha256=S5w5V6G1xigNxa3UQoSxRcMpQLWWDT9EPBoHBvHkSAk,19663
         | 
| 26 26 | 
             
            sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVvUdIMDo,6016
         | 
| 27 27 | 
             
            sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
         | 
| 28 | 
            -
            sglang/srt/server.py,sha256= | 
| 29 | 
            -
            sglang/srt/server_args.py,sha256= | 
| 30 | 
            -
            sglang/srt/utils.py,sha256= | 
| 28 | 
            +
            sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
         | 
| 29 | 
            +
            sglang/srt/server_args.py,sha256=M1Bm9u2JRsEptne-kw-D-B_29Q-M6V4UpAM7K-JxXAc,23309
         | 
| 30 | 
            +
            sglang/srt/utils.py,sha256=8yxiMRttCcfswynkNPWD3yZFNAGFz2P1PzSuxHCBGns,22340
         | 
| 31 31 | 
             
            sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
         | 
| 32 32 | 
             
            sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
         | 
| 33 33 | 
             
            sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
         | 
| 34 34 | 
             
            sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5UOS_4,2070
         | 
| 35 35 | 
             
            sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
         | 
| 36 | 
            -
            sglang/srt/constrained/fsm_cache.py,sha256= | 
| 36 | 
            +
            sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
         | 
| 37 37 | 
             
            sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
         | 
| 38 | 
            -
            sglang/srt/layers/activation.py,sha256= | 
| 39 | 
            -
            sglang/srt/layers/attention_backend.py,sha256= | 
| 38 | 
            +
            sglang/srt/layers/activation.py,sha256=awcwOODYcVdUtC2JxJ1TGsV8Tru0eACKcxYN6cWHbl4,5148
         | 
| 39 | 
            +
            sglang/srt/layers/attention_backend.py,sha256=lqMsY4VaOO_szIWoTAinXf1DnP2UsbF32kzvwFySz9w,18119
         | 
| 40 40 | 
             
            sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
         | 
| 41 | 
            -
            sglang/srt/layers/layernorm.py,sha256 | 
| 41 | 
            +
            sglang/srt/layers/layernorm.py,sha256=-9Yph4nnMZYX_Q31MUGAimLajNclHXjgDkswpU2BTos,3694
         | 
| 42 42 | 
             
            sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
         | 
| 43 43 | 
             
            sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
         | 
| 44 44 | 
             
            sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
         | 
| 45 | 
            -
            sglang/srt/layers/sampler.py,sha256= | 
| 45 | 
            +
            sglang/srt/layers/sampler.py,sha256=Q4u46oYu66e34rBNzr50VoXO8FM-assYiCoROolq3Zs,3661
         | 
| 46 46 | 
             
            sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJNLZlE,2703
         | 
| 47 47 | 
             
            sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
         | 
| 48 48 | 
             
            sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
         | 
| 49 | 
            -
            sglang/srt/layers/fused_moe/layer.py,sha256= | 
| 49 | 
            +
            sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
         | 
| 50 50 | 
             
            sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
         | 
| 51 51 | 
             
            sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
         | 
| 52 52 | 
             
            sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
         | 
| 53 53 | 
             
            sglang/srt/lora/lora.py,sha256=ksj866lgDul6zxO30Jm7Nrjv-mFAMrzdvP8sez3Pl6U,14938
         | 
| 54 54 | 
             
            sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
         | 
| 55 | 
            -
            sglang/srt/lora/lora_manager.py,sha256= | 
| 55 | 
            +
            sglang/srt/lora/lora_manager.py,sha256=7J7cGmyy1Ph4HCvLdM-ViAizAbV1snZqD-S7JLWXasI,9561
         | 
| 56 56 | 
             
            sglang/srt/managers/controller_multi.py,sha256=KolZDso2WqH1ZhQw9p1eTmlFRgo4bcvzBxE44_sNE_o,6300
         | 
| 57 57 | 
             
            sglang/srt/managers/controller_single.py,sha256=DiZALP_iIPZQMRx09a-LwT5_Dg7p-WU8HXyMoxJ9sRA,4955
         | 
| 58 58 | 
             
            sglang/srt/managers/detokenizer_manager.py,sha256=yQkL5gLomLiy1qc6e9HNz8hcj7JQFHm1AfIrzpXaWJE,6852
         | 
| 59 59 | 
             
            sglang/srt/managers/io_struct.py,sha256=bqmL3NDPLqOn6Au3WLF0NOe8Dh7ECMN7BTHCkEZ_Edk,11247
         | 
| 60 60 | 
             
            sglang/srt/managers/policy_scheduler.py,sha256=tiBUi2GJU5eQEBK6HfsO1_YjWtFkougo40954DIp4dM,13026
         | 
| 61 | 
            -
            sglang/srt/managers/schedule_batch.py,sha256= | 
| 61 | 
            +
            sglang/srt/managers/schedule_batch.py,sha256=ppHYK65GP0dtuCEzpSbGm9uAne5rEoRmW8osLknXJpI,27384
         | 
| 62 62 | 
             
            sglang/srt/managers/tokenizer_manager.py,sha256=ql-sObjl1oRigJwnLtqqTaaw-i7gPTDMoNXDEMftr40,29643
         | 
| 63 | 
            -
            sglang/srt/managers/tp_worker.py,sha256= | 
| 63 | 
            +
            sglang/srt/managers/tp_worker.py,sha256=4Hhla9rfGYEdQtzGmxlIEqxt_WVkn2dkLLNQZHgpkf0,39270
         | 
| 64 64 | 
             
            sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
         | 
| 65 65 | 
             
            sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
         | 
| 66 66 | 
             
            sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
         | 
| 67 67 | 
             
            sglang/srt/mem_cache/memory_pool.py,sha256=4br3Ea2bfA-YsF_sPOVHlF2zQzYGd8fVaYTp197yZsE,7871
         | 
| 68 68 | 
             
            sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
         | 
| 69 | 
            -
            sglang/srt/model_executor/cuda_graph_runner.py,sha256= | 
| 69 | 
            +
            sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZeO-8Mg4Tf0iP-L9FXcyhHfNzGWpTPEDGeUoC2lzHTE,10418
         | 
| 70 70 | 
             
            sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
         | 
| 71 | 
            -
            sglang/srt/model_executor/model_runner.py,sha256= | 
| 71 | 
            +
            sglang/srt/model_executor/model_runner.py,sha256=LoQ7OFVwOiK_BfdpRfitss1TfJ8qrysHgWM-xXu7n2Y,22433
         | 
| 72 72 | 
             
            sglang/srt/models/baichuan.py,sha256=NrG1rMJXhemkrUCEf8xKOSDQVsOD-nN8RQz6MWHOg84,15124
         | 
| 73 73 | 
             
            sglang/srt/models/chatglm.py,sha256=KwxLHBEvK02McXDvBS0gnRxfIvOAu2QP7lgibrj9Nbc,13371
         | 
| 74 74 | 
             
            sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
         | 
| 75 75 | 
             
            sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
         | 
| 76 76 | 
             
            sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
         | 
| 77 | 
            -
            sglang/srt/models/deepseek_v2.py,sha256= | 
| 77 | 
            +
            sglang/srt/models/deepseek_v2.py,sha256=bPaGRL8ieBCXKIf-KY7-D9Rus7Qj3VGvvtERzAXAZWs,28421
         | 
| 78 78 | 
             
            sglang/srt/models/exaone.py,sha256=3I5ZoiLotf7U-8c9QJRubpgf6JDx9I_z-ViXQlCC-x8,13087
         | 
| 79 79 | 
             
            sglang/srt/models/gemma.py,sha256=GkwgGFHgGlXgBZN7s7Wooz5tMyCp1YtgLahU2NOo66M,12273
         | 
| 80 80 | 
             
            sglang/srt/models/gemma2.py,sha256=sFfCNEm0_OOWElRSTDuroRv8wNMX8v_81Uko9m546KA,14923
         | 
| @@ -87,10 +87,11 @@ sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2z | |
| 87 87 | 
             
            sglang/srt/models/llava.py,sha256=O4XGdl70Hh4tM_OHapFGHbReC82mbe9xLw6GELKWKhU,24881
         | 
| 88 88 | 
             
            sglang/srt/models/llavavid.py,sha256=ou5uIuskBoBo0lXvqFFfDLBYYVfehx27n-Lu8X9gpLs,11992
         | 
| 89 89 | 
             
            sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
         | 
| 90 | 
            -
            sglang/srt/models/minicpm3.py,sha256= | 
| 90 | 
            +
            sglang/srt/models/minicpm3.py,sha256=_C96kO3qGK0KRctXZf8LBR9s0sEW0QXWSGU0Vf6OrI8,25206
         | 
| 91 91 | 
             
            sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
         | 
| 92 92 | 
             
            sglang/srt/models/mixtral.py,sha256=oRC7mKBrPJhvzkWSabrbeQQQac-jtF4EV6H2Sgjc5JY,13897
         | 
| 93 93 | 
             
            sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
         | 
| 94 | 
            +
            sglang/srt/models/olmoe.py,sha256=d0ECpU-IXXwGYg9tkVeMARUbqVcqEnWfpH3rrNiGKA0,15336
         | 
| 94 95 | 
             
            sglang/srt/models/qwen.py,sha256=nqSRzkiZzpRVG6WGQ1MBUclQnXyw8jlvoOq-euM8j5s,9954
         | 
| 95 96 | 
             
            sglang/srt/models/qwen2.py,sha256=9_M-VkHN1_T1XN-gsl_L636QMQ9BLF2WqvTcx_1L6aw,12432
         | 
| 96 97 | 
             
            sglang/srt/models/qwen2_moe.py,sha256=s7b5XnSvsBYtZZUkjPp442m59CqPJ3HxGUIwXBVWsXw,17153
         | 
| @@ -100,7 +101,7 @@ sglang/srt/models/xverse_moe.py,sha256=YR--WZ33G7XEMsS7ZJl1cQ62Q8PDo9gWqpvJBY_cb | |
| 100 101 | 
             
            sglang/srt/models/yivl.py,sha256=B6MELthWIm5KdSzX3o2tbbpApY8XdjUdmcQSD4dQe_I,4835
         | 
| 101 102 | 
             
            sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
         | 
| 102 103 | 
             
            sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
         | 
| 103 | 
            -
            sglang/srt/sampling/sampling_batch_info.py,sha256= | 
| 104 | 
            +
            sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
         | 
| 104 105 | 
             
            sglang/srt/sampling/sampling_params.py,sha256=ggOXxafqfCD-xrGYcM57byLZ79CIeBP4AD5F44L_CW0,5635
         | 
| 105 106 | 
             
            sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
         | 
| 106 107 | 
             
            sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
         | 
| @@ -120,10 +121,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9 | |
| 120 121 | 
             
            sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
         | 
| 121 122 | 
             
            sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
         | 
| 122 123 | 
             
            sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
         | 
| 123 | 
            -
            sglang/test/test_utils.py,sha256= | 
| 124 | 
            +
            sglang/test/test_utils.py,sha256=NLiJqFRWnCeQ-gdCBe0ubNFCsig1CPb1EU-Ay9CtSfU,17109
         | 
| 124 125 | 
             
            sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
         | 
| 125 | 
            -
            sglang-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 126 | 
            -
            sglang-0.3.1.dist-info/METADATA,sha256= | 
| 127 | 
            -
            sglang-0.3.1.dist-info/WHEEL,sha256= | 
| 128 | 
            -
            sglang-0.3.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
         | 
| 129 | 
            -
            sglang-0.3.1.dist-info/RECORD,,
         | 
| 126 | 
            +
            sglang-0.3.1.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 127 | 
            +
            sglang-0.3.1.post1.dist-info/METADATA,sha256=zswdq5UTi5aLVmpEyjnc7SzIi60yc4w2hlMhckdxmcU,38137
         | 
| 128 | 
            +
            sglang-0.3.1.post1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
         | 
| 129 | 
            +
            sglang-0.3.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
         | 
| 130 | 
            +
            sglang-0.3.1.post1.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |