PyPI - sglang - Versions diffs - 0.3.5.post2__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

sglang 0.3.5.post2py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

sglang/bench_latency.py +1 -553
sglang/bench_offline_throughput.py +48 -20
sglang/bench_one_batch.py +474 -0
sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
sglang/bench_serving.py +71 -1
sglang/check_env.py +3 -6
sglang/srt/constrained/outlines_backend.py +15 -2
sglang/srt/constrained/xgrammar_backend.py +22 -14
sglang/srt/layers/activation.py +3 -0
sglang/srt/layers/attention/flashinfer_backend.py +93 -48
sglang/srt/layers/attention/triton_backend.py +9 -7
sglang/srt/layers/custom_op_util.py +26 -0
sglang/srt/layers/fused_moe/fused_moe.py +11 -4
sglang/srt/layers/layernorm.py +4 -0
sglang/srt/layers/logits_processor.py +10 -10
sglang/srt/layers/sampler.py +4 -8
sglang/srt/layers/torchao_utils.py +2 -0
sglang/srt/managers/data_parallel_controller.py +74 -9
sglang/srt/managers/detokenizer_manager.py +1 -0
sglang/srt/managers/io_struct.py +27 -0
sglang/srt/managers/schedule_batch.py +104 -38
sglang/srt/managers/schedule_policy.py +5 -1
sglang/srt/managers/scheduler.py +204 -54
sglang/srt/managers/session_controller.py +62 -0
sglang/srt/managers/tokenizer_manager.py +38 -0
sglang/srt/managers/tp_worker.py +12 -1
sglang/srt/managers/tp_worker_overlap_thread.py +49 -52
sglang/srt/model_executor/cuda_graph_runner.py +43 -6
sglang/srt/model_executor/forward_batch_info.py +109 -15
sglang/srt/model_executor/model_runner.py +99 -43
sglang/srt/model_parallel.py +98 -0
sglang/srt/models/deepseek_v2.py +147 -44
sglang/srt/models/gemma2.py +9 -8
sglang/srt/models/llava.py +1 -1
sglang/srt/models/llavavid.py +1 -1
sglang/srt/models/olmo.py +3 -3
sglang/srt/models/phi3_small.py +447 -0
sglang/srt/models/qwen2_vl.py +13 -6
sglang/srt/models/torch_native_llama.py +94 -78
sglang/srt/openai_api/adapter.py +6 -2
sglang/srt/openai_api/protocol.py +1 -1
sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
sglang/srt/sampling/sampling_batch_info.py +58 -57
sglang/srt/sampling/sampling_params.py +1 -1
sglang/srt/server.py +27 -1
sglang/srt/server_args.py +78 -62
sglang/srt/utils.py +71 -52
sglang/test/runners.py +25 -6
sglang/test/srt/sampling/penaltylib/utils.py +23 -21
sglang/test/test_utils.py +30 -19
sglang/version.py +1 -1
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/METADATA +43 -43
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/RECORD +60 -55
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/WHEEL +1 -1
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/LICENSE +0 -0
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/top_level.txt +0 -0

sglang/test/runners.py CHANGED Viewed

@@ -58,6 +58,28 @@ def get_top_logprobs(logits, k):
     return logprobs
+def _get_sentence_transformer_embedding_model(model_path, torch_dtype):
+    from sentence_transformers import SentenceTransformer
+    from sentence_transformers.util import is_sentence_transformer_model
+    if is_sentence_transformer_model(model_path):
+        model = SentenceTransformer(
+            model_path,
+            model_kwargs={"torch_dtype": torch_dtype},
+        )
+    else:  # if no pre-trained sentence-transformers model
+        from sentence_transformers import models
+        word_embedding_model = models.Transformer(model_path).to(dtype=torch_dtype)
+        pooling_model = models.Pooling(
+            word_embedding_model.get_word_embedding_dimension(),
+            pooling_mode="lasttoken",
+        )
+        model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
+    return model.cuda()
 @dataclass
 class ModelOutput:
     output_strs: List[str] = None
@@ -114,12 +136,9 @@ class HFRunner:
                 low_cpu_mem_usage=True,
             ).cuda()
         elif self.model_type == "embedding":
-            from sentence_transformers import SentenceTransformer
-            self.model = SentenceTransformer(
-                model_path,
-                model_kwargs={"torch_dtype": torch_dtype},
-            ).cuda()
+            self.model = _get_sentence_transformer_embedding_model(
+                model_path, torch_dtype
+            )
         elif self.model_type == "reward":
             from transformers import AutoModelForSequenceClassification

sglang/test/srt/sampling/penaltylib/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import dataclasses
 import enum
-import typing
 import unittest
+from typing import Dict, List, Optional, Set, Tuple, Type
 import torch
@@ -16,7 +16,7 @@ from sglang.srt.sampling.penaltylib.orchestrator import (
 class MockSamplingParams:
     frequency_penalty: float = 0.0
     min_new_tokens: int = 0
-    stop_token_ids: typing.List[int] = None
+    stop_token_ids: List[int] = None
     presence_penalty: float = 0.0
     repetition_penalty: float = 1.0
@@ -24,12 +24,12 @@ class MockSamplingParams:
 @dataclasses.dataclass
 class MockTokenizer:
     eos_token_id: int
-    additional_stop_token_ids: typing.Optional[typing.List[int]] = None
+    additional_stop_token_ids: Optional[List[int]] = None
 @dataclasses.dataclass
 class MockReq:
-    origin_input_ids: typing.List[int]
+    origin_input_ids: List[int]
     sampling_params: MockSamplingParams
     tokenizer: MockTokenizer
@@ -42,8 +42,8 @@ class StepType(enum.Enum):
 @dataclasses.dataclass
 class Step:
     type: StepType
-    token_ids: typing.List[int]
-    expected_tensors: typing.Dict[str, torch.Tensor]
+    token_ids: List[int]
+    expected_tensors: Dict[str, torch.Tensor]
     # assume initial logits are all 1
     expected_logits: torch.Tensor
@@ -52,7 +52,7 @@ class Step:
 class Subject:
     sampling_params: MockSamplingParams
     # first step must be input, which will be converted to Req
-    steps: typing.List[Step]
+    steps: List[Step]
     eos_token_id: int = -1
     def __post_init__(self):
@@ -66,7 +66,7 @@ class Subject:
                     f"Expected tensors keys must be the same for all steps. Got {self.steps[i].expected_tensors.keys()} for key={i} and {self.steps[0].expected_tensors.keys()}"
                 )
-    def tensor_keys(self, i: int = 0) -> typing.Set[str]:
+    def tensor_keys(self, i: int = 0) -> Set[str]:
         return set(self.steps[i].expected_tensors.keys())
     def to_req(self) -> MockReq:
@@ -80,7 +80,7 @@ class Subject:
 @dataclasses.dataclass
 class Case:
     enabled: bool
-    test_subjects: typing.List[Subject]
+    test_subjects: List[Subject]
     def __post_init__(self):
         # each test_subjects.steps should have the same expected_tensors.keys()
@@ -90,12 +90,12 @@ class Case:
                     f"Expected tensors keys must be the same for all test_subjects. Got {self.test_subjects[i].tensor_keys()} for key={i} and {self.test_subjects[0].tensor_keys()}"
                 )
-    def tensor_keys(self, i: int = 0) -> typing.List[str]:
+    def tensor_keys(self, i: int = 0) -> List[str]:
         return set(self.test_subjects[i].tensor_keys())
 class BaseBatchedPenalizerTest(unittest.TestCase):
-    Penalizer: typing.Type[_BatchedPenalizer]
+    Penalizer: Type[_BatchedPenalizer]
     device = "cuda"
     vocab_size = 5
@@ -115,7 +115,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
         """
         return torch.tensor(data, **kwargs, device=self.device)
-    def create_test_subjects(self) -> typing.List[Subject]:
+    def create_test_subjects(self) -> List[Subject]:
         raise NotImplementedError()
     def create_test_cases(self):
@@ -127,7 +127,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
     def _create_penalizer(
         self, case: Case
-    ) -> typing.Tuple[BatchedPenalizerOrchestrator, _BatchedPenalizer]:
+    ) -> Tuple[BatchedPenalizerOrchestrator, _BatchedPenalizer]:
         orchestrator = BatchedPenalizerOrchestrator(
             vocab_size=self.vocab_size,
             batch=_BatchLike(reqs=[subject.to_req() for subject in case.test_subjects]),
@@ -287,22 +287,24 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
                         if i < len(subject.steps)
                     ]
-                    inputs: typing.List[typing.List[int]] = []
-                    outputs: typing.List[typing.List[int]] = []
+                    inputs: List[List[int]] = []
+                    outputs: List[List[int]] = []
                     for subject in filtered_subjects:
                         step = subject.steps[i]
                         if step.type == StepType.INPUT:
-                            inputs.append(step.token_ids)
-                            outputs.append([])
+                            raise NotImplementedError()
                         else:
                             inputs.append([])
                             outputs.append(step.token_ids)
-                    if any(inputs):
-                        orchestrator.cumulate_input_tokens(inputs)
                     if any(outputs):
-                        orchestrator.cumulate_output_tokens(outputs)
+                        for j in range(max(len(x) for x in outputs)):
+                            tmp_outputs = torch.tensor(
+                                [x[j] for x in outputs],
+                                dtype=torch.int32,
+                                device=orchestrator.device,
+                            )
+                            orchestrator.cumulate_output_tokens(tmp_outputs)
                     if penalizer.is_required():
                         self.assertTrue(penalizer.is_prepared())

sglang/test/test_utils.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import argparse
 import asyncio
+import copy
 import os
 import random
 import subprocess
@@ -438,18 +439,22 @@ def popen_launch_server(
         process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
     start_time = time.time()
-    while time.time() - start_time < timeout:
-        try:
-            headers = {
-                "Content-Type": "application/json; charset=utf-8",
-                "Authorization": f"Bearer {api_key}",
-            }
-            response = requests.get(f"{base_url}/health_generate", headers=headers)
-            if response.status_code == 200:
-                return process
-        except requests.RequestException:
-            pass
-        time.sleep(10)
+    with requests.Session() as session:
+        while time.time() - start_time < timeout:
+            try:
+                headers = {
+                    "Content-Type": "application/json; charset=utf-8",
+                    "Authorization": f"Bearer {api_key}",
+                }
+                response = session.get(
+                    f"{base_url}/health_generate",
+                    headers=headers,
+                )
+                if response.status_code == 200:
+                    return process
+            except requests.RequestException:
+                pass
+            time.sleep(10)
     raise TimeoutError("Server failed to start within the timeout period.")
@@ -529,6 +534,7 @@ def run_bench_serving(
     random_input_len=4096,
     random_output_len=2048,
     disable_stream=False,
+    need_warmup=False,
 ):
     # Launch the server
     base_url = DEFAULT_URL_FOR_TEST
@@ -562,9 +568,14 @@ def run_bench_serving(
         disable_stream=disable_stream,
         disable_ignore_eos=False,
         extra_request_body=None,
+        profile=None,
     )
     try:
+        if need_warmup:
+            warmup_args = copy.deepcopy(args)
+            warmup_args.num_prompts = 16
+            run_benchmark(warmup_args)
         res = run_benchmark(args)
     finally:
         kill_child_process(process.pid, include_self=True)
@@ -573,11 +584,11 @@ def run_bench_serving(
     return res
-def run_bench_latency(model, other_args):
+def run_bench_one_batch(model, other_args):
     command = [
         "python3",
         "-m",
-        "sglang.bench_latency",
+        "sglang.bench_one_batch",
         "--model-path",
         model,
         "--batch-size",
@@ -664,7 +675,7 @@ def run_and_check_memory_leak(
     workload_func,
     disable_radix_cache,
     enable_mixed_chunk,
-    enable_overlap,
+    disable_overlap,
     chunked_prefill_size,
 ):
     other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
@@ -672,8 +683,8 @@ def run_and_check_memory_leak(
         other_args += ["--disable-radix-cache"]
     if enable_mixed_chunk:
         other_args += ["--enable-mixed-chunk"]
-    if enable_overlap:
-        other_args += ["--enable-overlap-schedule"]
+    if disable_overlap:
+        other_args += ["--disable-overlap-schedule"]
     model = DEFAULT_MODEL_NAME_FOR_TEST
     port = random.randint(4000, 5000)
@@ -725,7 +736,7 @@ def run_and_check_memory_leak(
 def run_mmlu_test(
     disable_radix_cache=False,
     enable_mixed_chunk=False,
-    enable_overlap=False,
+    disable_overlap=False,
     chunked_prefill_size=32,
 ):
     def workload_func(base_url, model):
@@ -748,7 +759,7 @@ def run_mmlu_test(
         workload_func,
         disable_radix_cache,
         enable_mixed_chunk,
-        enable_overlap,
+        disable_overlap,
         chunked_prefill_size,
     )

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.~~5.post2~~"
1	+ __version__ = "0.3.6"

{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: sglang
-Version: 0.3.5.post2
+Version: 0.3.6
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
-License: Apache License
+License:                                  Apache License
                                    Version 2.0, January 2004
                                 http://www.apache.org/licenses/
@@ -215,74 +215,74 @@ Requires-Dist: requests
 Requires-Dist: tqdm
 Requires-Dist: numpy
 Requires-Dist: IPython
-Provides-Extra: all
-Requires-Dist: sglang[srt]; extra == "all"
-Requires-Dist: sglang[openai]; extra == "all"
-Requires-Dist: sglang[anthropic]; extra == "all"
-Requires-Dist: sglang[litellm]; extra == "all"
-Provides-Extra: all_hip
-Requires-Dist: sglang[srt_hip]; extra == "all-hip"
-Requires-Dist: sglang[openai]; extra == "all-hip"
-Requires-Dist: sglang[anthropic]; extra == "all-hip"
-Requires-Dist: sglang[litellm]; extra == "all-hip"
-Provides-Extra: all_xpu
-Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
-Requires-Dist: sglang[openai]; extra == "all-xpu"
-Requires-Dist: sglang[anthropic]; extra == "all-xpu"
-Requires-Dist: sglang[litellm]; extra == "all-xpu"
-Provides-Extra: anthropic
-Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
-Provides-Extra: dev
-Requires-Dist: sglang[all]; extra == "dev"
-Requires-Dist: sglang[test]; extra == "dev"
-Provides-Extra: dev_hip
-Requires-Dist: sglang[all_hip]; extra == "dev-hip"
-Requires-Dist: sglang[test]; extra == "dev-hip"
-Provides-Extra: dev_xpu
-Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
-Requires-Dist: sglang[test]; extra == "dev-xpu"
-Provides-Extra: litellm
-Requires-Dist: litellm>=1.0.0; extra == "litellm"
-Provides-Extra: openai
-Requires-Dist: openai>=1.0; extra == "openai"
-Requires-Dist: tiktoken; extra == "openai"
-Provides-Extra: runtime_common
+Provides-Extra: runtime-common
 Requires-Dist: aiohttp; extra == "runtime-common"
 Requires-Dist: decord; extra == "runtime-common"
 Requires-Dist: fastapi; extra == "runtime-common"
-Requires-Dist: hf-transfer; extra == "runtime-common"
-Requires-Dist: huggingface-hub; extra == "runtime-common"
+Requires-Dist: hf_transfer; extra == "runtime-common"
+Requires-Dist: huggingface_hub; extra == "runtime-common"
 Requires-Dist: interegular; extra == "runtime-common"
 Requires-Dist: orjson; extra == "runtime-common"
+Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
 Requires-Dist: packaging; extra == "runtime-common"
 Requires-Dist: pillow; extra == "runtime-common"
 Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
 Requires-Dist: psutil; extra == "runtime-common"
 Requires-Dist: pydantic; extra == "runtime-common"
 Requires-Dist: python-multipart; extra == "runtime-common"
+Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
 Requires-Dist: torchao; extra == "runtime-common"
 Requires-Dist: uvicorn; extra == "runtime-common"
 Requires-Dist: uvloop; extra == "runtime-common"
-Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
-Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
 Requires-Dist: modelscope; extra == "runtime-common"
 Provides-Extra: srt
 Requires-Dist: sglang[runtime_common]; extra == "srt"
 Requires-Dist: torch; extra == "srt"
-Requires-Dist: vllm==0.6.3.post1; extra == "srt"
-Provides-Extra: srt_hip
+Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
+Provides-Extra: srt-hip
 Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
 Requires-Dist: torch; extra == "srt-hip"
 Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
-Provides-Extra: srt_xpu
+Provides-Extra: srt-xpu
 Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
+Provides-Extra: litellm
+Requires-Dist: litellm>=1.0.0; extra == "litellm"
 Provides-Extra: test
 Requires-Dist: jsonlines; extra == "test"
 Requires-Dist: matplotlib; extra == "test"
 Requires-Dist: pandas; extra == "test"
-Requires-Dist: sentence-transformers; extra == "test"
+Requires-Dist: sentence_transformers; extra == "test"
 Requires-Dist: accelerate; extra == "test"
 Requires-Dist: peft; extra == "test"
+Provides-Extra: all
+Requires-Dist: sglang[srt]; extra == "all"
+Requires-Dist: sglang[openai]; extra == "all"
+Requires-Dist: sglang[anthropic]; extra == "all"
+Requires-Dist: sglang[litellm]; extra == "all"
+Provides-Extra: all-hip
+Requires-Dist: sglang[srt_hip]; extra == "all-hip"
+Requires-Dist: sglang[openai]; extra == "all-hip"
+Requires-Dist: sglang[anthropic]; extra == "all-hip"
+Requires-Dist: sglang[litellm]; extra == "all-hip"
+Provides-Extra: all-xpu
+Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
+Requires-Dist: sglang[openai]; extra == "all-xpu"
+Requires-Dist: sglang[anthropic]; extra == "all-xpu"
+Requires-Dist: sglang[litellm]; extra == "all-xpu"
+Provides-Extra: dev
+Requires-Dist: sglang[all]; extra == "dev"
+Requires-Dist: sglang[test]; extra == "dev"
+Provides-Extra: dev-hip
+Requires-Dist: sglang[all_hip]; extra == "dev-hip"
+Requires-Dist: sglang[test]; extra == "dev-hip"
+Provides-Extra: dev-xpu
+Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
+Requires-Dist: sglang[test]; extra == "dev-xpu"
 <div align="center"  id="sglangtop">
 <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
@@ -323,7 +323,7 @@ The core features include:
 - **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
 - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
-- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
+- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
 - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
 ## Getting Started

{sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,16 @@
 sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
 sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
-sglang/bench_latency.py,sha256=SSqZjcCNO88ExpT94qBZ5CmuA5o0T8wMTBnxLsNMqik,18259
-sglang/bench_offline_throughput.py,sha256=xBr7gI_ZbrpXXD72Nzu1F228oNyz1jggcblZCeUWJgw,9975
-sglang/bench_server_latency.py,sha256=N1MODIzcMk74yOWmY19d36aih3ewtHOemLxoieKtdhw,5866
-sglang/bench_serving.py,sha256=ytef89P9bqKRaMGXAqq69SmLTlNXWyHyhEraISLKYME,47975
-sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
+sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
+sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
+sglang/bench_one_batch.py,sha256=Ww5Qd1ATaY8zw0mDEGoTYjwxMtxPKmpaHrIdjvS9iVE,15706
+sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
+sglang/bench_serving.py,sha256=hn5mihMey8Cik2nvwV30DUQ8C4Goxyt6BWm4YtyjIrI,50511
+sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
 sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
 sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
 sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
 sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
-sglang/version.py,sha256=NlX-QUNR7ogIH-GcgzllsyHox7ItJoycFEUM_EYuhW4,28
+sglang/version.py,sha256=W_9dCm49nLvZulVAvvsafxLJjVBSKDBHz9K7szFZllo,22
 sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
 sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -27,38 +28,40 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
 sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
 sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
 sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
-sglang/srt/server.py,sha256=JUYAE8MDGYou_HbmuR10QFZfg319fGt9VamskvBkpFo,28776
-sglang/srt/server_args.py,sha256=V8sx2oY0yphHC_uATwv4UTiLUFnvMQl85o6y5AyaoXM,30086
-sglang/srt/utils.py,sha256=jGSlxbvI50xEybdupDQNHpsCaF1U_5buADrD149766g,27013
+sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
+sglang/srt/server.py,sha256=caZPEoP3zdbEnQJnGzOEqvSdzSjsVUX8opSc-SplH2A,29709
+sglang/srt/server_args.py,sha256=1VhWGvMOtr7ozW2BJV8KInPyptzfh2UiBN4jqdDJYS8,30714
+sglang/srt/utils.py,sha256=5YIElk7hP1Zr7ff-jFXBUfM-acurnh5HR1ofC18FOTU,27540
 sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
 sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
 sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
 sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
 sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
 sglang/srt/constrained/base_grammar_backend.py,sha256=OPuBSd_F_fRwjVj6YFWBQuGeikj7UQtkTvc-JgEYt4I,2259
-sglang/srt/constrained/outlines_backend.py,sha256=J03QQiT9pkdXyoYGw3Rj6taEyWlIr4VCBvxQ3aMiB8A,5786
+sglang/srt/constrained/outlines_backend.py,sha256=i4dhg3hP406YHzEyP8x2FQmLlGEn8Uby51KNLAcdhak,6353
 sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
-sglang/srt/constrained/xgrammar_backend.py,sha256=wMWqkLN5KhnJXL6GBqbcrhxvAAMx60nG88KIBU1bFSc,4505
-sglang/srt/layers/activation.py,sha256=7VEkCrx2dvl629Lz0fkJcJfVoZA-ykEdkpTzKEc_drQ,5225
-sglang/srt/layers/layernorm.py,sha256=HCj8Y_X6MNNdtQU2sWKgyjIqVERxl9dqrmjbBbyJjpE,3796
+sglang/srt/constrained/xgrammar_backend.py,sha256=r11pWwtctbaBJGdjhQbaD_SN8n9qw902CUDh1I3ZPqo,4738
+sglang/srt/layers/activation.py,sha256=Yi2xdh7jmHUlRgERQFmStz9JwWvzT-kDmZbuf8yqy2I,5375
+sglang/srt/layers/custom_op_util.py,sha256=sE0dTU00Mkzu7RiWS0h1OvPzFey_m-StbkeR6grpY7o,827
+sglang/srt/layers/layernorm.py,sha256=1ceN6DLenmmKdxiif2uecplSUhc58qfd6s-6KWmXS9A,3943
 sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
-sglang/srt/layers/logits_processor.py,sha256=1l-hJoeZUfrPPmCWcyscl0ThgKWpprUELiL1mVDfbPE,12556
+sglang/srt/layers/logits_processor.py,sha256=FFW8gVvEFxhUqDFaUPRYf3I5wA9HKsSa2IbDk7TjZZU,12575
 sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
 sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
 sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
-sglang/srt/layers/sampler.py,sha256=3zfth1Kz24X4sUq7Z_cjZwHgPVivI-rgPtIeUbsiiWU,4589
-sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
+sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
+sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
 sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
 sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
 sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
-sglang/srt/layers/attention/flashinfer_backend.py,sha256=843CbZsRfzWp5FTusNXXL1o4N3jd0hoCNpsoUR6Qjxk,23306
-sglang/srt/layers/attention/triton_backend.py,sha256=DKUEzxQE8iBvJPNHmQwP1pyx2wXmSsLqzBhLjJznIUk,6482
+sglang/srt/layers/attention/flashinfer_backend.py,sha256=9V5xVyx4CnT_vN8MPBOfREePgYonwzGa_PesdZClVuI,24619
+sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
 sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
 sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
 sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
 sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
 sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
-sglang/srt/layers/fused_moe/fused_moe.py,sha256=N15tWTm2SGuesJxDIJAdV5FsDUpE-15sb_AIgr4swlw,23656
+sglang/srt/layers/fused_moe/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
 sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
 sglang/srt/layers/fused_moe/patch.py,sha256=K5CNLnFVxRPd8_jlY4hW6bj7pAACeCFZQA8y5loqqM4,4029
 sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
@@ -66,16 +69,17 @@ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87M
 sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
 sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
 sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
-sglang/srt/managers/data_parallel_controller.py,sha256=_XB6Ianc8TiqwLTW-7DH6gGjVYBeBU_6WjjaDk0snIY,5686
-sglang/srt/managers/detokenizer_manager.py,sha256=erRgf8RijFrGnYjZawu9an1u2mFPRY3tnxzF9PbKc80,7295
+sglang/srt/managers/data_parallel_controller.py,sha256=7Y3YOYJDe2GUyBBHJXUxDdoz24fuaO-5IGM0TwKxzFw,7895
+sglang/srt/managers/detokenizer_manager.py,sha256=ovux4AwPPTQ-JpPof7ClSTiA1sphY7IkAxPocCa1ZIs,7349
 sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
-sglang/srt/managers/io_struct.py,sha256=O_oHnikwmOexNqH4HP6bwAI5d_jG_C96JGapkLg8B7c,12289
-sglang/srt/managers/schedule_batch.py,sha256=4BgocYdKFTDCrrBkSXCT75EALBx-3RYnoN3SgtdsHlU,39595
-sglang/srt/managers/schedule_policy.py,sha256=LH0rh1PiI5LK-dSd3dar8_po6FidiBUuj0Xcp_yNQAA,12295
-sglang/srt/managers/scheduler.py,sha256=ty1sJ9U6JxifIGF4uzZX6CANMJtbjNWPe2k8aRPS6aI,48133
-sglang/srt/managers/tokenizer_manager.py,sha256=n_XCsCOwLZWCLv1ZJLGjyKgrAWCAQDyEhjnkxOptSa8,24436
-sglang/srt/managers/tp_worker.py,sha256=S5oim5xrkg1j68hYq6LfC8T533JYmQX9Kabt6U8ZXn4,5726
-sglang/srt/managers/tp_worker_overlap_thread.py,sha256=j5J4yHyR7w2HgAbN7S__299ADvsoyap5HK63SWMNavQ,7546
+sglang/srt/managers/io_struct.py,sha256=tp7RckbDklXW8YW03xXTX3Nv0DpZGjviGPx_iljoQdI,12885
+sglang/srt/managers/schedule_batch.py,sha256=kJvzb75Jmlo1iJvw1IWmLvKnBRuaUxok3MNOv-t5w18,41928
+sglang/srt/managers/schedule_policy.py,sha256=zPk5Um5-E65p0cLZ_ZwCCk7DO8dE6pWJAX9_SyfPUvw,12432
+sglang/srt/managers/scheduler.py,sha256=djbeXw7cfZBEu0uBOsQ-Wz4RCyvSWJ8ulpgaO6cSFyU,54711
+sglang/srt/managers/session_controller.py,sha256=vf2nQrxIu_14PO5xqVBhcw3WdqbdmufBOcIwnFpuyrc,2308
+sglang/srt/managers/tokenizer_manager.py,sha256=v1iCmFPhkT5IzK_LMJ-O0UPcov7pwjT49StRflBBK7Y,25882
+sglang/srt/managers/tp_worker.py,sha256=P8QQ9kAqPi7RYXkXVjFIWaZW2F5ezxQtYTJA6gJleBE,6082
+sglang/srt/managers/tp_worker_overlap_thread.py,sha256=f-zsbb6FcDrxNhLoRp2jjqSJE-tyAzZo0HAKVnx1PUY,7527
 sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
 sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
 sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
@@ -83,18 +87,18 @@ sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47
 sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
 sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
 sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
-sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZMkyfZpWgDXfBpJ4cenh1TxXtt1O2xqeiXhDkq6E5pU,12936
-sglang/srt/model_executor/forward_batch_info.py,sha256=61TVExbiXDQRvZ6oevNz9AIxG7e-KVddgj4I6MTivLg,9426
-sglang/srt/model_executor/model_runner.py,sha256=QdFjQRnxZU8r7-MP-NdsnFnPWMRfxa-zTUmKOYmM8HE,26879
+sglang/srt/model_executor/cuda_graph_runner.py,sha256=Rm4yt4RSbFf2Dee4gI5UrbJKWgGk4quomRlVJ90TaH4,14521
+sglang/srt/model_executor/forward_batch_info.py,sha256=4PGHIQM-ZckRosIFF987xhTlotEHkt9dTMKrZQUUKqU,12397
+sglang/srt/model_executor/model_runner.py,sha256=iUKjnn0oaa2KMJgeRm4rUYrDYhg35Eg7DlBnB8OUPSw,29116
 sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
 sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
 sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
 sglang/srt/models/dbrx.py,sha256=IiVIk_rVd0RlvfIJGIThPOPkoYT3U649PrduThiKRzg,14545
 sglang/srt/models/deepseek.py,sha256=DjW2B21isWE6A2C8A3VGZ-G0k1DkhWHO3dZZjcOVG50,15828
-sglang/srt/models/deepseek_v2.py,sha256=z6532MRN1tBltFNteFJfimnaGpyNmK6g_sdNmTzsVmk,28230
+sglang/srt/models/deepseek_v2.py,sha256=irh-2TE5PpwjsCojxpdDQCmBTuF016BTNKD673Gf4dY,32171
 sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
 sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
-sglang/srt/models/gemma2.py,sha256=iE56CYzPn-QCis4kcU7Yi0jvJ04KeU2deuZH2DaS2lM,14768
+sglang/srt/models/gemma2.py,sha256=6B999ZZBMl5twr_DMK9lnSmxwZAvVavpFHaOat71ANg,14783
 sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
 sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
 sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
@@ -105,39 +109,40 @@ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16
 sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
 sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
 sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
-sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
-sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
+sglang/srt/models/llava.py,sha256=URAPE0xB878s_pNacA4Z2t4lAxMuzzMjLZu5gf5MseA,24847
+sglang/srt/models/llavavid.py,sha256=bqFZ0qIBlOqp-mDsBFB-QGVSemYmN6wftUKcff3r3MM,12127
 sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
 sglang/srt/models/minicpm3.py,sha256=O6092exfoq8iHLmyfpVCubyQEzcfp4SmqtZJs7x4A8s,25014
 sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
 sglang/srt/models/mixtral.py,sha256=b6AqEgL4y9wQpKKAGnhgzBtUypYo3dca5UOpGNLEt_A,13949
 sglang/srt/models/mixtral_quant.py,sha256=2ND-aOHjcyeQMUvqLLqhXwOdlR_bEftMFk3hc3lnpvc,13969
 sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
-sglang/srt/models/olmo.py,sha256=eWPmo5AAnBhNGdMwklh1of3JnRzAszgQp4opeiiYidI,11887
+sglang/srt/models/olmo.py,sha256=OPEZCpFrwy47IGiwLZFYxX7UXpE5PP3KdC7UKxRhngE,11884
 sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15242
+sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
 sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
 sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
 sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
-sglang/srt/models/qwen2_vl.py,sha256=jb0RYMo0ShPIt4NtPCEcFGciZKstM-gYwVKND_LK7Ls,26052
+sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
 sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
-sglang/srt/models/torch_native_llama.py,sha256=d8gVNurlVVZ-tD3Uc_aHyGCVUUp1gR8awOH4fLRZHDE,19145
+sglang/srt/models/torch_native_llama.py,sha256=RTIO2qp1SitOwNZNVzMBz8i0Gbud3t1nxTCImTguVQg,19362
 sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
 sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
 sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
-sglang/srt/openai_api/adapter.py,sha256=xYBmBLZ_JxfMt_m8LtVe_OB70GV4S9zBOL8e5g_VRvs,53432
-sglang/srt/openai_api/protocol.py,sha256=Mou5JUMKJkxVxoj4n8R4_sgnYY3OcwniiAi2TEM3hfY,10070
-sglang/srt/sampling/sampling_batch_info.py,sha256=7uoHypbbp4o71DfPmF22R_LeyM_Q9BTxBFg8O4lkd9w,7648
-sglang/srt/sampling/sampling_params.py,sha256=zzWVm8DxcUDdPwV1MIh5q76mmLwtkun0E08T6U3ZyWA,5192
+sglang/srt/openai_api/adapter.py,sha256=10jD3QLOAlbxTUO4-PnhgoaiNtWxbadUfb9bWyqN6gw,53540
+sglang/srt/openai_api/protocol.py,sha256=dRundxpM2kutsz-03u2nPfd3jVA0zJKmPYGAEY93t8c,10078
+sglang/srt/sampling/sampling_batch_info.py,sha256=8bQ1UvsJooPEBq_t6BXSocDAcm8OqivSUYXm4mBtnUQ,8379
+sglang/srt/sampling/sampling_params.py,sha256=u9RL8yTXYSPD6OZPvGdKvD1hmmRDY2_dg6cs2CaJhbg,5192
 sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
-sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
-sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
-sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=MmfqRqJ-leSoY9iO5Hg_ILlX-M0M0tObYrxrb_quStg,3717
-sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
-sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
+sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
+sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
+sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
+sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
+sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
 sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
 sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
 sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
-sglang/test/runners.py,sha256=JxfsGEW9L3cz87fHYmWqb3Vnbk6K1csLLLftR3LogxU,14297
+sglang/test/runners.py,sha256=31tkr6ZZ4WksLXZglAil05E1JiO71kftlg9dBiHq_u0,15034
 sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
 sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
 sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
@@ -147,10 +152,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
 sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
 sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
 sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
-sglang/test/test_utils.py,sha256=XvIAMeLXr4D7uLxCUSLTKP5Upc1EJd0JX2egL897Jfo,23100
-sglang/test/srt/sampling/penaltylib/utils.py,sha256=q98pQDikkmvvvvAG-AXMYaYte1iHHW2TFhKGtAeGvdE,12802
-sglang-0.3.5.post2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sglang-0.3.5.post2.dist-info/METADATA,sha256=ajoktPOWOAmE37TcZw562A22FmxntBUWO4zLOShVKpQ,21568
-sglang-0.3.5.post2.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-sglang-0.3.5.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
-sglang-0.3.5.post2.dist-info/RECORD,,
+sglang/test/test_utils.py,sha256=lBwINKlekJx03zJbnjEcO_KIkCMcBnfFa22LNt5Mwy4,23462
+sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
+sglang-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+sglang-0.3.6.dist-info/METADATA,sha256=Xqs3Fv5BkPx7ROZyCxhEBfIJzESsYz4PzjihzkA-ZZ8,21602
+sglang-0.3.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+sglang-0.3.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
+sglang-0.3.6.dist-info/RECORD,,

sglang 0.3.5.post2__py3-none-any.whl → 0.3.6__py3-none-any.whl

sglang 0.3.5.post2py3-none-any.whl → 0.3.6py3-none-any.whl