PyPI - sglang - Versions diffs - 0.2.9.post1__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

sglang 0.2.9.post1py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

sglang/bench_latency.py +114 -63
sglang/check_env.py +1 -0
sglang/lang/backend/runtime_endpoint.py +0 -11
sglang/srt/hf_transformers_utils.py +2 -2
sglang/srt/layers/extend_attention.py +59 -7
sglang/srt/layers/radix_attention.py +22 -9
sglang/srt/layers/token_attention.py +28 -2
sglang/srt/managers/io_struct.py +9 -4
sglang/srt/managers/schedule_batch.py +15 -11
sglang/srt/managers/tokenizer_manager.py +28 -13
sglang/srt/mem_cache/memory_pool.py +65 -24
sglang/srt/model_config.py +11 -0
sglang/srt/model_executor/model_runner.py +46 -17
sglang/srt/models/deepseek_v2.py +198 -16
sglang/srt/openai_api/adapter.py +120 -20
sglang/srt/openai_api/protocol.py +1 -1
sglang/srt/server.py +87 -78
sglang/srt/server_args.py +8 -2
sglang/srt/utils.py +25 -20
sglang/test/run_eval.py +21 -10
sglang/test/runners.py +237 -0
sglang/test/simple_eval_common.py +12 -12
sglang/test/simple_eval_gpqa.py +92 -0
sglang/test/simple_eval_humaneval.py +5 -5
sglang/test/simple_eval_math.py +72 -0
sglang/test/test_utils.py +94 -13
sglang/utils.py +15 -37
sglang/version.py +1 -1
{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/METADATA +29 -28
{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/RECORD +33 -30
{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/LICENSE +0 -0
{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/WHEEL +0 -0
{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/top_level.txt +0 -0

sglang/test/test_utils.py CHANGED Viewed

@@ -1,9 +1,14 @@
 """Common utilities for testing and benchmarking"""
+import argparse
 import asyncio
+import multiprocessing
 import subprocess
+import threading
 import time
+import unittest
 from functools import partial
+from typing import Callable, List, Optional
 import numpy as np
 import requests
@@ -247,7 +252,7 @@ async def call_select_lmql(context, choices, temperature=0, max_len=4096, model=
     return choices.index(answer)
-def add_common_other_args_and_parse(parser):
+def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
     parser.add_argument("--parallel", type=int, default=64)
     parser.add_argument("--host", type=str, default="http://127.0.0.1")
     parser.add_argument("--port", type=int, default=None)
@@ -286,7 +291,7 @@ def add_common_other_args_and_parse(parser):
     return args
-def add_common_sglang_args_and_parse(parser):
+def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
     parser.add_argument("--parallel", type=int, default=64)
     parser.add_argument("--host", type=str, default="http://127.0.0.1")
     parser.add_argument("--port", type=int, default=30000)
@@ -296,7 +301,7 @@ def add_common_sglang_args_and_parse(parser):
     return args
-def select_sglang_backend(args):
+def select_sglang_backend(args: argparse.Namespace):
     if args.backend.startswith("srt"):
         if args.backend == "srt-no-parallel":
             global_config.enable_parallel_decoding = False
@@ -309,7 +314,7 @@ def select_sglang_backend(args):
     return backend
-def _get_call_generate(args):
+def _get_call_generate(args: argparse.Namespace):
     if args.backend == "lightllm":
         return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
     elif args.backend == "vllm":
@@ -336,7 +341,7 @@ def _get_call_generate(args):
         raise ValueError(f"Invalid backend: {args.backend}")
-def _get_call_select(args):
+def _get_call_select(args: argparse.Namespace):
     if args.backend == "lightllm":
         return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
     elif args.backend == "vllm":
@@ -359,7 +364,7 @@ def _get_call_select(args):
         raise ValueError(f"Invalid backend: {args.backend}")
-def get_call_generate(args):
+def get_call_generate(args: argparse.Namespace):
     call_generate = _get_call_generate(args)
     def func(*args, **kwargs):
@@ -372,7 +377,7 @@ def get_call_generate(args):
     return func
-def get_call_select(args):
+def get_call_select(args: argparse.Namespace):
     call_select = _get_call_select(args)
     def func(*args, **kwargs):
@@ -385,7 +390,16 @@ def get_call_select(args):
     return func
-def popen_launch_server(model, port, timeout, *args):
+def popen_launch_server(
+    model: str,
+    base_url: str,
+    timeout: float,
+    api_key: Optional[str] = None,
+    other_args: tuple = (),
+):
+    _, host, port = base_url.split(":")
+    host = host[2:]
     command = [
         "python3",
         "-m",
@@ -393,21 +407,88 @@ def popen_launch_server(model, port, timeout, *args):
         "--model-path",
         model,
         "--host",
-        "localhost",
+        host,
         "--port",
-        str(port),
-        *args,
+        port,
+        *other_args,
     ]
+    if api_key:
+        command += ["--api-key", api_key]
     process = subprocess.Popen(command, stdout=None, stderr=None)
-    base_url = f"http://localhost:{port}/v1"
     start_time = time.time()
     while time.time() - start_time < timeout:
         try:
-            response = requests.get(f"{base_url}/models")
+            headers = {
+                "Content-Type": "application/json; charset=utf-8",
+                "Authorization": f"Bearer {api_key}",
+            }
+            response = requests.get(f"{base_url}/v1/models", headers=headers)
             if response.status_code == 200:
                 return process
         except requests.RequestException:
             pass
         time.sleep(10)
     raise TimeoutError("Server failed to start within the timeout period.")
+def run_with_timeout(
+    func: Callable,
+    args: tuple = (),
+    kwargs: Optional[dict] = None,
+    timeout: float = None,
+):
+    """Run a function with timeout."""
+    ret_value = []
+    def _target_func():
+        ret_value.append(func(*args, **(kwargs or {})))
+    t = threading.Thread(target=_target_func)
+    t.start()
+    t.join(timeout=timeout)
+    if t.is_alive():
+        raise TimeoutError()
+    if not ret_value:
+        raise RuntimeError()
+    return ret_value[0]
+def run_unittest_files(files: List[str], timeout_per_file: float):
+    tic = time.time()
+    success = True
+    for filename in files:
+        def func():
+            print(f"\n\nRun {filename}\n\n")
+            ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
+        p = multiprocessing.Process(target=func)
+        def run_one_file():
+            p.start()
+            p.join()
+        try:
+            run_with_timeout(run_one_file, timeout=timeout_per_file)
+            if p.exitcode != 0:
+                success = False
+                break
+        except TimeoutError:
+            p.terminate()
+            time.sleep(5)
+            print(
+                "\nTimeout after {timeout_per_file} seconds when running {filename}\n"
+            )
+            return False
+    if success:
+        print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
+    else:
+        print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
+    return 0 if success else -1

sglang/utils.py CHANGED Viewed

@@ -12,6 +12,7 @@ import urllib.request
 from concurrent.futures import ThreadPoolExecutor
 from io import BytesIO
 from json import dumps
+from typing import Union
 import numpy as np
 import requests
@@ -25,7 +26,7 @@ def get_exception_traceback():
     return err_str
-def is_same_type(values):
+def is_same_type(values: list):
     """Return whether the elements in values are of the same type."""
     if len(values) <= 1:
         return True
@@ -45,7 +46,7 @@ def read_jsonl(filename: str):
     return rets
-def dump_state_text(filename, states, mode="w"):
+def dump_state_text(filename: str, states: list, mode: str = "w"):
     """Dump program state in a text file."""
     from sglang.lang.interpreter import ProgramState
@@ -75,19 +76,13 @@ class HttpResponse:
         return self.resp.status
-def http_request(
-    url, json=None, stream=False, auth_token=None, api_key=None, verify=None
-):
+def http_request(url, json=None, stream=False, api_key=None, verify=None):
     """A faster version of requests.post with low-level urllib API."""
     headers = {"Content-Type": "application/json; charset=utf-8"}
-    # add the Authorization header if an auth token is provided
-    if auth_token is not None:
-        headers["Authorization"] = f"Bearer {auth_token}"
-    # add the API Key header if an API key is provided
+    # add the Authorization header if an api key is provided
     if api_key is not None:
-        headers["X-API-Key"] = api_key
+        headers["Authorization"] = f"Bearer {api_key}"
     if stream:
         return requests.post(url, json=json, stream=True, headers=headers)
@@ -105,7 +100,7 @@ def http_request(
             return HttpResponse(e)
-def encode_image_base64(image_path):
+def encode_image_base64(image_path: Union[str, bytes]):
     """Encode an image in base64."""
     if isinstance(image_path, str):
         with open(image_path, "rb") as image_file:
@@ -144,7 +139,7 @@ def encode_frame(frame):
     return frame_bytes
-def encode_video_base64(video_path, num_frames=16):
+def encode_video_base64(video_path: str, num_frames: int = 16):
     import cv2  # pip install opencv-python-headless
     cap = cv2.VideoCapture(video_path)
@@ -190,7 +185,7 @@ def encode_video_base64(video_path, num_frames=16):
     return video_base64
-def _is_chinese_char(cp):
+def _is_chinese_char(cp: int):
     """Checks whether CP is the codepoint of a CJK character."""
     # This defines a "chinese character" as anything in the CJK Unicode block:
     #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
@@ -215,7 +210,7 @@ def _is_chinese_char(cp):
     return False
-def find_printable_text(text):
+def find_printable_text(text: str):
     """Returns the longest printable substring of text that contains only entire words."""
     # Borrowed from https://github.com/huggingface/transformers/blob/061580c82c2db1de9139528243e105953793f7a2/src/transformers/generation/streamers.py#L99
@@ -234,26 +229,7 @@ def find_printable_text(text):
         return text[: text.rfind(" ") + 1]
-def run_with_timeout(func, args=(), kwargs=None, timeout=None):
-    """Run a function with timeout."""
-    ret_value = []
-    def _target_func():
-        ret_value.append(func(*args, **(kwargs or {})))
-    t = threading.Thread(target=_target_func)
-    t.start()
-    t.join(timeout=timeout)
-    if t.is_alive():
-        raise TimeoutError()
-    if not ret_value:
-        raise RuntimeError()
-    return ret_value[0]
-def graceful_registry(sub_module_name):
+def graceful_registry(sub_module_name: str):
     def graceful_shutdown(signum, frame):
         logger.info(
             f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
@@ -265,7 +241,9 @@ def graceful_registry(sub_module_name):
 class LazyImport:
-    def __init__(self, module_name, class_name):
+    """Lazy import to make `import sglang` run faster."""
+    def __init__(self, module_name: str, class_name: str):
         self.module_name = module_name
         self.class_name = class_name
         self._module = None
@@ -276,7 +254,7 @@ class LazyImport:
             self._module = getattr(module, self.class_name)
         return self._module
-    def __getattr__(self, name):
+    def __getattr__(self, name: str):
         module = self._load()
         return getattr(module, name)

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.2.~~9.post1~~"
1	+ __version__ = "0.2.10"

{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sglang
-Version: 0.2.9.post1
+Version: 0.2.10
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License: Apache License
                                    Version 2.0, January 2004
@@ -215,34 +215,35 @@ Requires-Dist: requests
 Requires-Dist: tqdm
 Requires-Dist: numpy
 Provides-Extra: all
-Requires-Dist: sglang[srt] ; extra == 'all'
-Requires-Dist: sglang[openai] ; extra == 'all'
-Requires-Dist: sglang[anthropic] ; extra == 'all'
-Requires-Dist: sglang[litellm] ; extra == 'all'
+Requires-Dist: sglang[srt]; extra == "all"
+Requires-Dist: sglang[openai]; extra == "all"
+Requires-Dist: sglang[anthropic]; extra == "all"
+Requires-Dist: sglang[litellm]; extra == "all"
 Provides-Extra: anthropic
-Requires-Dist: anthropic >=0.20.0 ; extra == 'anthropic'
+Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
 Provides-Extra: litellm
-Requires-Dist: litellm >=1.0.0 ; extra == 'litellm'
+Requires-Dist: litellm>=1.0.0; extra == "litellm"
 Provides-Extra: openai
-Requires-Dist: openai >=1.0 ; extra == 'openai'
-Requires-Dist: tiktoken ; extra == 'openai'
+Requires-Dist: openai>=1.0; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
 Provides-Extra: srt
-Requires-Dist: aiohttp ; extra == 'srt'
-Requires-Dist: fastapi ; extra == 'srt'
-Requires-Dist: hf-transfer ; extra == 'srt'
-Requires-Dist: huggingface-hub ; extra == 'srt'
-Requires-Dist: interegular ; extra == 'srt'
-Requires-Dist: packaging ; extra == 'srt'
-Requires-Dist: pillow ; extra == 'srt'
-Requires-Dist: psutil ; extra == 'srt'
-Requires-Dist: pydantic ; extra == 'srt'
-Requires-Dist: torch ; extra == 'srt'
-Requires-Dist: uvicorn ; extra == 'srt'
-Requires-Dist: uvloop ; extra == 'srt'
-Requires-Dist: zmq ; extra == 'srt'
-Requires-Dist: vllm ==0.5.3.post1 ; extra == 'srt'
-Requires-Dist: outlines >=0.0.44 ; extra == 'srt'
-Requires-Dist: python-multipart ; extra == 'srt'
+Requires-Dist: aiohttp; extra == "srt"
+Requires-Dist: fastapi; extra == "srt"
+Requires-Dist: hf-transfer; extra == "srt"
+Requires-Dist: huggingface-hub; extra == "srt"
+Requires-Dist: interegular; extra == "srt"
+Requires-Dist: jsonlines; extra == "srt"
+Requires-Dist: packaging; extra == "srt"
+Requires-Dist: pillow; extra == "srt"
+Requires-Dist: psutil; extra == "srt"
+Requires-Dist: pydantic; extra == "srt"
+Requires-Dist: python-multipart; extra == "srt"
+Requires-Dist: torch; extra == "srt"
+Requires-Dist: uvicorn; extra == "srt"
+Requires-Dist: uvloop; extra == "srt"
+Requires-Dist: zmq; extra == "srt"
+Requires-Dist: vllm==0.5.3.post1; extra == "srt"
+Requires-Dist: outlines>=0.0.44; extra == "srt"
 <div align="center">
 <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400"></img>
@@ -300,8 +301,8 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
 ### Method 2: From source
 ```
-# Use the stable v0.2.9.post1 branch
-git clone -b v0.2.9.post1 https://github.com/sgl-project/sglang.git
+# Use the last release branch
+git clone -b v0.2.10 https://github.com/sgl-project/sglang.git
 cd sglang
 pip install --upgrade pip
@@ -453,7 +454,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec
 ### Benchmark Performance
-- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as those for `launch_server.py`. This is not a dynamic batching server, so it may run out of memory for a batch size that can run successfully with a real server. This is because a real server will truncate the prefill into several batches/chunks, while this unit test does not do this.
+- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`.
   ```
   python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
   ```

{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
 sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
-sglang/bench_latency.py,sha256=JPatRvstM3nXb-ViVgtR-TaRrFHpcHzqoDG7BQmRYK8,10539
+sglang/bench_latency.py,sha256=lHk9C3XM1e-UQd6HY2qn-njr2rG5AFQ_sNVD5hcF5Vc,12162
 sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
-sglang/check_env.py,sha256=M4hWWl9IAmrhVATj6nbPlGiZ4FtLI3K9ETL9hFzxh8Q,4138
+sglang/check_env.py,sha256=XlVou81XC20tPFVTuKDSKqDqLQJoO2QvlnReWMf-Ho4,4152
 sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
 sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
 sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
-sglang/utils.py,sha256=r0Z7hY_bFFk-b6WeQJir9br-hCW2-p7n5E7Et2WziaQ,8776
-sglang/version.py,sha256=xiigmrV7lCZAjfifluudBrG9T_WlhKAusKo0iARTZm0,28
+sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
+sglang/version.py,sha256=waXgc7p-jgGCsUjdVfO_KjlVZblnCvrzf4A0dsBj_lg,23
 sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
 sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
@@ -19,51 +19,51 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
 sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
 sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
 sglang/lang/backend/openai.py,sha256=6ww2rwKouWgtmjaCf4hk-kXXJ6bY6n9Xnbm3UTFZvl4,14808
-sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
+sglang/lang/backend/runtime_endpoint.py,sha256=n78pyBWTCMYmDAS-0yZVFvzQYCiACz8Usj7FTDfdVKE,8763
 sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
 sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
-sglang/srt/hf_transformers_utils.py,sha256=Fg-3panb6lsqOhHmAYA0ivkXyBjdnvY5mqvilDv2xF4,11919
+sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
 sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
-sglang/srt/model_config.py,sha256=DO7m84WiT3dzPWmyKz_UXDAHEdqEjq8Lq5wCjzjYMME,6023
+sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
 sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
-sglang/srt/server.py,sha256=cDHUmLqj7MjF-3L9WcfA-4z9dRl55cwF5ygXuncMl-Q,15852
-sglang/srt/server_args.py,sha256=wdRlxR-509RfNYuMQoxUAefMwoc5eme6sYwEMyRBHmk,16034
-sglang/srt/utils.py,sha256=5wgGe6kI59JAmf8kxLsItulJ4xQaOJHHYaWWd6_WWmo,23384
+sglang/srt/server.py,sha256=ur_fDb-nEmlzz1mSKwWa87XFJdQM1gxFz4cahMcMatA,16028
+sglang/srt/server_args.py,sha256=oUMzSSBrJ5_g0yeBapABUv2MlhDNWEfWLdLVROgqZOU,16305
+sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
 sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
 sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
 sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
 sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
 sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
-sglang/srt/layers/extend_attention.py,sha256=zuNnAdL_wF6BX0Mwn1dgDJvh3YJjYwqa5Fbzp8muOVc,12573
+sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
 sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
 sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
 sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
-sglang/srt/layers/radix_attention.py,sha256=tdA-kdd9LQY1wbw3iYuy-9cikVJYmy3EctwAlUfN-Uo,6945
-sglang/srt/layers/token_attention.py,sha256=ylUqUnozJCCohxTGAiiP3sxgUrcXfEVic8-qgcHYDj4,7968
+sglang/srt/layers/radix_attention.py,sha256=cNSQWO74DcXgpAMKSMaHzfpy5IcLORUnWe5gOwATLrw,7466
+sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
 sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
 sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
 sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
 sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
 sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
-sglang/srt/managers/io_struct.py,sha256=Rz7Ur9Yw6prDGdy6XjsSiUmVBccS6cef-G_9TW7HA_4,7105
+sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
 sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
-sglang/srt/managers/schedule_batch.py,sha256=LIoVCPNivh0u1dOrrWRgFD6a4ywq3nrG_4dNgCK0kIw,37697
-sglang/srt/managers/tokenizer_manager.py,sha256=rtZ44aiZOMHLHkXDhMgj0HDR3gExpeGjWfoCD0PfG_o,20574
+sglang/srt/managers/schedule_batch.py,sha256=yIjiiMcaYYN9iaEOGQZoPUpFviDptMVh9hMwRRnDAco,37896
+sglang/srt/managers/tokenizer_manager.py,sha256=kxkoAa8VbQt9FJPX1fN-7IzAD8RIcIvz3AGR8uEMYjk,21202
 sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
 sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
 sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
 sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
-sglang/srt/mem_cache/memory_pool.py,sha256=wkhjyYLbAZrl2FB5i4ODkxgMufBuDpe4N0kbXhu6ZO0,4509
+sglang/srt/mem_cache/memory_pool.py,sha256=8N4eHybhtBuwIwYyeNSvrZI90LGgMG8sA3OrXdXZAZs,5496
 sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
 sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
-sglang/srt/model_executor/model_runner.py,sha256=RadluuL2Ou_BTOgo2IrLfMMEX7Z_DRgPL3JVBIr2KaU,16189
+sglang/srt/model_executor/model_runner.py,sha256=tJHlqk_JH3RJDaPAiSljaDI951LUu9AYW679eCKMJXs,17404
 sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
 sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
 sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
 sglang/srt/models/commandr.py,sha256=gaTI77hgitPlcUNyxMEdGu_XZQj2DuAMnh3KbZQ9HFg,14166
 sglang/srt/models/dbrx.py,sha256=LQu7I2KH-XzY9iBlaK7IQsM1o3kzsuI1vTCspK2C19o,14655
 sglang/srt/models/deepseek.py,sha256=adr57ZX6aPOBOpmvm7YIvoqo6u0jdrKJPZ8SGcVXAh8,16014
-sglang/srt/models/deepseek_v2.py,sha256=9CORl-IroSguYPX3wz_aGe7mFoUE7cQRMs7CgbkBYLk,20087
+sglang/srt/models/deepseek_v2.py,sha256=jaVaQlL1aPCTu8nLcvtAW_rmtvHe6y2CviIOjXzh4q4,26962
 sglang/srt/models/gemma.py,sha256=PMPI1-WLuLdk6e7u6I9d_LoCkauLkWY3aOP8MFEZ-sI,12279
 sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,16420
 sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
@@ -82,16 +82,19 @@ sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12
 sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
 sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
 sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
-sglang/srt/openai_api/adapter.py,sha256=h6TIU0Fu3jU361pye4J12vcDug7UJJRPiBAY_HfFUuE,32599
-sglang/srt/openai_api/protocol.py,sha256=JXLnnQ63I-bJv93ICPfP0cBpyomQA5IYE_mkUg5X4Es,8177
-sglang/test/run_eval.py,sha256=WvMLSi70G9fhruP8cPLOfDJ9XEKL7yNn2pylx-7tNsQ,3054
-sglang/test/simple_eval_common.py,sha256=Qh1-iEXJCKfJmgpAzNSp28fcP1TUJzt3s9i1FjvemHY,12340
-sglang/test/simple_eval_humaneval.py,sha256=IW0ZC6D4SXu06IJiMoAY9DK9SMsTOlDPAwu4cfbJco0,5826
+sglang/srt/openai_api/adapter.py,sha256=p2HeYO9Qgl7EERXutwpsQ659NvZhFnkQmTZX5s-x-oI,37444
+sglang/srt/openai_api/protocol.py,sha256=q1MuDUhwSM-8G2uGnWUMeEk87aZxei8lCcaP6VuA8So,8200
+sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
+sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
+sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
+sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
+sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
+sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
 sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
 sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
-sglang/test/test_utils.py,sha256=PndOL1zdseMrpHTHGmgsHHepxqYBn__eNLrlsSXLy6k,11905
-sglang-0.2.9.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sglang-0.2.9.post1.dist-info/METADATA,sha256=BY728JRrlJkEdmiLJAvi_qfuUY2LDBqcKqqtooU9dxk,33281
-sglang-0.2.9.post1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-sglang-0.2.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
-sglang-0.2.9.post1.dist-info/RECORD,,
+sglang/test/test_utils.py,sha256=p-G6iiT5-Vkg6LMYgvDheomLJ6IYMLsYHCp3tkatiy8,13983
+sglang-0.2.10.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+sglang-0.2.10.dist-info/METADATA,sha256=Lt9wnP2134unvF88fDj2PfQIf2YaeYJ6xZdfmMAJkoM,33303
+sglang-0.2.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+sglang-0.2.10.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
+sglang-0.2.10.dist-info/RECORD,,

{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/LICENSE RENAMED Viewed

File without changes

{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

sglang 0.2.9.post1__py3-none-any.whl → 0.2.10__py3-none-any.whl

sglang 0.2.9.post1py3-none-any.whl → 0.2.10py3-none-any.whl