PyPI - sglang - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

sglang 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sglang/lang/backend/runtime_endpoint.py +4 -4
sglang/lang/interpreter.py +4 -4
sglang/srt/constrained/fsm_cache.py +21 -1
sglang/srt/hf_transformers_utils.py +3 -1
sglang/srt/layers/logits_processor.py +70 -61
sglang/srt/layers/radix_attention.py +5 -2
sglang/srt/layers/token_attention.py +1 -1
sglang/srt/managers/controller/cuda_graph_runner.py +26 -17
sglang/srt/managers/controller/infer_batch.py +54 -13
sglang/srt/managers/controller/model_runner.py +22 -7
sglang/srt/managers/controller/tp_worker.py +47 -41
sglang/srt/managers/io_struct.py +2 -2
sglang/srt/managers/tokenizer_manager.py +62 -43
sglang/srt/model_config.py +5 -0
sglang/srt/models/deepseek_v2.py +517 -0
sglang/srt/models/llama_classification.py +3 -3
sglang/srt/openai_api/adapter.py +33 -33
sglang/srt/openai_api/protocol.py +1 -1
sglang/srt/sampling_params.py +5 -4
sglang/srt/server.py +2 -15
sglang/srt/server_args.py +28 -7
sglang/test/test_programs.py +5 -1
sglang/version.py +1 -1
{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/METADATA +9 -7
{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/RECORD +28 -27
{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/LICENSE +0 -0
{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/WHEEL +0 -0
{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/top_level.txt +0 -0

sglang/srt/server.py CHANGED Viewed

@@ -65,9 +65,6 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 app = FastAPI()
 tokenizer_manager = None
-# Put some args for easily access
-global_server_args_dict = {}
 @app.get("/health")
 async def health() -> Response:
@@ -150,14 +147,6 @@ def available_models():
     return ModelList(data=model_cards)
-def _set_global_server_args(server_args: ServerArgs):
-    global global_server_args_dict
-    global_server_args_dict = {
-        "disable_flashinfer": server_args.disable_flashinfer,
-        "attention_reduce_in_fp32": server_args.attention_reduce_in_fp32,
-    }
 def _set_torch_compile_config():
     # The following configurations are for torch compile optimizations
     import torch._dynamo.config
@@ -176,6 +165,8 @@ def launch_server(
     model_overide_args: Optional[dict] = None,
     pipe_finish_writer: Optional[mp.connection.Connection] = None,
 ):
+    server_args.check_server_args()
     """Launch an HTTP server."""
     global tokenizer_manager
@@ -211,8 +202,6 @@ def launch_server(
     if server_args.enable_torch_compile:
         _set_torch_compile_config()
-    _set_global_server_args(server_args)
     # Allocate ports
     server_args.port, server_args.additional_ports = allocate_init_ports(
         server_args.port,
@@ -230,8 +219,6 @@ def launch_server(
     # Handle multi-node tensor parallelism
     if server_args.nnodes > 1:
-        assert server_args.dp_size == 1, "Multi-node dp is not supported."
         if server_args.node_rank != 0:
             tp_size_local = server_args.tp_size // server_args.nnodes
             gpu_ids = [

sglang/srt/server_args.py CHANGED Viewed

@@ -28,6 +28,7 @@ class ServerArgs:
     mem_fraction_static: Optional[float] = None
     max_prefill_tokens: Optional[int] = None
     max_running_requests: Optional[int] = None
+    max_num_reqs: Optional[int] = None
     schedule_heuristic: str = "lpm"
     schedule_conservativeness: float = 1.0
@@ -51,13 +52,14 @@ class ServerArgs:
     # Optimization/debug options
     disable_flashinfer: bool = False
+    disable_flashinfer_sampling: bool = False
     disable_radix_cache: bool = False
     disable_regex_jump_forward: bool = False
     disable_cuda_graph: bool = False
     disable_disk_cache: bool = False
     enable_torch_compile: bool = False
-    attention_reduce_in_fp32: bool = False
     enable_p2p_check: bool = False
+    attention_reduce_in_fp32: bool = False
     efficient_weight_load: bool = False
     # Distributed args
@@ -203,6 +205,12 @@ class ServerArgs:
             default=ServerArgs.max_running_requests,
             help="The maximum number of running requests.",
         )
+        parser.add_argument(
+            "--max-num-reqs",
+            type=int,
+            default=None,
+            help="The maximum number of requests to serve in the memory pool. If the model have a large context length, you may need to decrease this value to avoid out-of-memory errors.",
+        )
         parser.add_argument(
             "--schedule-heuristic",
             type=str,
@@ -296,7 +304,12 @@ class ServerArgs:
         parser.add_argument(
             "--disable-flashinfer",
             action="store_true",
-            help="Disable flashinfer inference kernels.",
+            help="Disable flashinfer attention kernels.",
+        )
+        parser.add_argument(
+            "--disable-flashinfer-sampling",
+            action="store_true",
+            help="Disable flashinfer sampling kernels.",
         )
         parser.add_argument(
             "--disable-radix-cache",
@@ -324,15 +337,15 @@ class ServerArgs:
             help="Optimize the model with torch.compile, experimental feature.",
         )
         parser.add_argument(
-            "--attention-reduce-in-fp32",
+            "--enable-p2p-check",
             action="store_true",
-            help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16."
-            "This only affects Triton attention kernels",
+            help="Enable P2P check for GPU access, otherwise the p2p access is allowed by default.",
         )
         parser.add_argument(
-            "--enable-p2p-check",
+            "--attention-reduce-in-fp32",
             action="store_true",
-            help="Enable P2P check for GPU access, otherwise the p2p access is allowed by default.",
+            help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16."
+            "This only affects Triton attention kernels",
         )
         parser.add_argument(
             "--efficient-weight-load",
@@ -357,6 +370,14 @@ class ServerArgs:
             f"disable_disk_cache={self.disable_disk_cache}, "
         )
+    def check_server_args(self):
+        assert (
+            self.tp_size % self.nnodes == 0
+        ), "tp_size must be divisible by number of nodes"
+        assert not (
+            self.dp_size > 1 and self.node_rank is not None
+        ), "multi-node data parallel is not supported"
 @dataclasses.dataclass
 class PortArgs:

sglang/test/test_programs.py CHANGED Viewed

@@ -118,7 +118,11 @@ def test_decode_json_regex():
             s += "}"
     ret = decode_json.run()
-    js_obj = json.loads(ret["json_output"])
+    try:
+        js_obj = json.loads(ret["json_output"])
+    except json.decoder.JSONDecodeError:
+        print(ret["json_output"])
+        raise
     assert isinstance(js_obj["name"], str)
     assert isinstance(js_obj["population"], int)

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.2.5"
1	+ __version__ = "0.2.6"

{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sglang
-Version: 0.2.5
+Version: 0.2.6
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License: Apache License
                                    Version 2.0, January 2004
@@ -249,7 +249,7 @@ Requires-Dist: outlines >=0.0.44 ; extra == 'srt'
 --------------------------------------------------------------------------------
-| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) |
+| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) |
 SGLang is a fast serving framework for large language models and vision language models.
 It makes your interaction with models faster and more controllable by co-designing the backend runtime and frontend language.
@@ -404,16 +404,17 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ### Run Llama 3.1 405B
 ```bash
-# 2 nodes run 405B fp16
+## Run 405B (fp8) on a single node
+python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
+## Run 405B (fp16) on two nodes
 # replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
 # on the first node
 GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
 # on the second
 GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
-# single node run 405B fp8
-python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
 ```
 ### Supported Models
@@ -422,6 +423,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instr
 - Mistral / Mixtral
 - Gemma / Gemma 2
 - Qwen / Qwen 2 / Qwen 2 MoE
+- DeepSeek / DeepSeek 2
 - LLaVA 1.5 / 1.6
   - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000`
   - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000`
@@ -442,7 +444,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec
 ### Benchmark Performance
-- Benchmark a single static batch. Run the following command without launching a server. The arguments are the same as those for `launch_server.py`.
+- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as those for `launch_server.py`. This is not a dynamic batching server, so it may run out of memory for a batch size that can run successfully with a real server. This is because a real server will truncate the prefill into several batches/chunks, while this unit test does not do this.
   ```
   python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
   ```

{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/RECORD RENAMED Viewed

@@ -7,11 +7,11 @@ sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
 sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
 sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
 sglang/utils.py,sha256=arJuwOAEX445M2NL9SAOi6jBNu0-cfU04PLAr-hIH3U,8168
-sglang/version.py,sha256=Xsa3ayOMVkhUWm4t06YeyHE0apjpZefxLH4ylp0CDtU,22
+sglang/version.py,sha256=Oz5HbwHMyE87nmwV80AZzpkJPf-wBg7eDuJr_BXZkhU,22
 sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
 sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
-sglang/lang/interpreter.py,sha256=27j7H9p7TY4uUfF9f5E17FxK1xCNeNju4aut_PaWCrQ,29693
+sglang/lang/interpreter.py,sha256=fbPrKF_SDpVPsiV2WbmlMfwRA7C9T9_IyVmGnpaXa0A,29687
 sglang/lang/ir.py,sha256=5VVK2JnbspdysrhcGgkmp_JlAprd2XqqRnS_GfP_XWc,16645
 sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
 sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,55 +19,56 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
 sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
 sglang/lang/backend/litellm.py,sha256=QsaLRh0KVyuaxRZGAvLOdCCSStIMs-V0XyMX0PR6y0w,2452
 sglang/lang/backend/openai.py,sha256=-ScfI2TFALB_FTYBur9ab0gNYxK1ogHkhdLxX19t6-Y,14808
-sglang/lang/backend/runtime_endpoint.py,sha256=TZ0NV89or5_3MIZZFnc1JXAAjnv7tCfeQmHDla8R0e0,9208
+sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
 sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
 sglang/srt/conversation.py,sha256=Il7JJuu4o42k2xdBWVfONNmstTsAM-4idX6AcEOnrXQ,15526
 sglang/srt/flush_cache.py,sha256=SJsbZnmDhH-gb9ch3hIwnI_nuwaOLlKvlXADyLBGENk,403
-sglang/srt/hf_transformers_utils.py,sha256=94mOI93B2xOmXKqfJfEoGxqHgwwlWNbPHgsA47AQJK8,11245
+sglang/srt/hf_transformers_utils.py,sha256=RnyxC1_OmOf-QzdPBziqAUOIQXyRzrb4RNlqFB1ArEc,11354
 sglang/srt/memory_pool.py,sha256=FhJk5GtYortO3MJIsMMQ-o49agwDHVX1aEQH2LITq6c,3949
 sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
-sglang/srt/model_config.py,sha256=lZu1D-XLVMETHS6FBMoPn8Uowa9QFGe95d3SuWrr2q8,5282
-sglang/srt/sampling_params.py,sha256=OI11asr1Bd_E5soDjih614v4flgWxdMZU9HAF0aBafQ,3062
-sglang/srt/server.py,sha256=IUed6vnXCx7-xbrpEMAaJZ_aa4UubPAQ5pXvcv-xNoY,14607
-sglang/srt/server_args.py,sha256=aF6L35mEB-FU3BL_ooKuCIcOXLhYLxA9-MjpaOTQRCo,13189
+sglang/srt/model_config.py,sha256=9VF7ET0CGKEY-zdiU7kGv8Cg7H_9Q1fmqtI3C0z22S0,5458
+sglang/srt/sampling_params.py,sha256=WjJ_sOhbJVMKIBH8gJWQKhzeK5Ipu9XRNV7soWnLtak,3122
+sglang/srt/server.py,sha256=IKSTgp6FJN6TE9anog47zh9GJYXoyMjEKBNXUZ89Cuk,14197
+sglang/srt/server_args.py,sha256=RfWoipSUURmv5NqT4L_YF9qJ6gOkZ8omRUFC_5fmgts,14043
 sglang/srt/utils.py,sha256=HvKkGbut8sOxMpGIzYsJ9NEZJg48LOnxyGESaGZmANs,22385
 sglang/srt/constrained/__init__.py,sha256=5LB3_mDTMW6wcRkFA5J2Rd5HPHHEKRyiELhe4gtlBYM,1472
 sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
-sglang/srt/constrained/fsm_cache.py,sha256=P4qNDHHxpKpTnYL_8V1R6OFXlUwbM6ZcBdzddpcBgb4,1135
+sglang/srt/constrained/fsm_cache.py,sha256=HlzFs9TXvMFmeZhTpXmJU3UNQ_Kix4Ir-SwpqXGhX8k,2061
 sglang/srt/constrained/jump_forward.py,sha256=s60jZ7Ue8zaodgQm7gDpN6pSedpvpUck_waJALUMj60,5615
 sglang/srt/layers/context_flashattention_nopad.py,sha256=7ps_9W_ia9zikL9HqsSUwWHyBVotywosE-dOiPtaGY8,4615
 sglang/srt/layers/extend_attention.py,sha256=aYAAL9HZJpaSASp-ulMvbmSmyMcqdYUsgVQC-Lbm7_U,12008
 sglang/srt/layers/fused_moe.py,sha256=uyrbCaIHioq3G00xQUrCo53hYDoHzk5rep3Eji3oQiQ,20258
 sglang/srt/layers/linear.py,sha256=qLwFkOiRAljzE7LkAkLRdcCdVMk-t7b56jEjwQAuYDM,33953
-sglang/srt/layers/logits_processor.py,sha256=KyRYANCiq9Cfu_VPjrIbSBAlqN_clcAgF3JrG9waU5k,9674
-sglang/srt/layers/radix_attention.py,sha256=A3J_wOlysjblFXHgehAqRHBQmpYAHLyUovyLFsrMJ7A,6386
-sglang/srt/layers/token_attention.py,sha256=EJ4gjbVLfshOZ_vr1iB-Eq8_B-4F26n_wPDj6e1Zrww,7386
+sglang/srt/layers/logits_processor.py,sha256=VjP6T582K64X0mfyPUkhcIEZxsqJNu6ziqR3V82N_jE,10118
+sglang/srt/layers/radix_attention.py,sha256=to6w0kIq6dtaOYJtqIZcqR3t1yf05qBH1LWnFlE-jEQ,6374
+sglang/srt/layers/token_attention.py,sha256=uBtk3I6KeFjBRKRuQoG5BEZtVJsX4p7UOtJoej6ILZI,7411
 sglang/srt/layers/quantization/__init__.py,sha256=PQFzdPpul98DvywBA6YMBOnrMjtHE1LMlMpJ7FM8J3I,1971
 sglang/srt/layers/quantization/fp8.py,sha256=jaqgRFnHC--IL8iqB6Qygi-KXYPYBKKqt_j4Rk55_h4,24946
 sglang/srt/managers/detokenizer_manager.py,sha256=8rN2cdMr61LWy07lingEqLnNy0W5Rebdn14IsTQ9PCs,5049
-sglang/srt/managers/io_struct.py,sha256=VHy9wdZ3sfZA7fS6iq8lqbxdHL5WkBZNqxpacyZ8_8c,5483
-sglang/srt/managers/tokenizer_manager.py,sha256=SbivhFhZUR9HU9pLTe93MlYprAFAHzOU3KMBA2piQUk,19308
-sglang/srt/managers/controller/cuda_graph_runner.py,sha256=0aRqA1_34oJ557Zn8PjpJecex5bBWJdnCmBlcDVvYO0,8509
-sglang/srt/managers/controller/infer_batch.py,sha256=SKwCwhnZ_CNlG0mVCEc4X0e4HNjJFke-c8zdWP3TzjQ,34186
+sglang/srt/managers/io_struct.py,sha256=WmBGrWR8R6X2zh2p1FkfPZtJzuGSlNW8cmIDm0EEqMA,5528
+sglang/srt/managers/tokenizer_manager.py,sha256=2it1o4dKd7nFzfZflOw1cT03gFktqC2sVPICbBSR4c0,19594
+sglang/srt/managers/controller/cuda_graph_runner.py,sha256=KEqX4Tc1yEWW52LzzFb4THb-guYIaft2pxxH8rWchSA,8808
+sglang/srt/managers/controller/infer_batch.py,sha256=3DixMdSW0odH5I6p7h8_xtRlHx4q76ArR6YZW8Gkqzg,35888
 sglang/srt/managers/controller/manager_multi.py,sha256=DT8Y9RF5OyTxlrLEZYz4claNWir3UrVztdOZaVPiA6g,6077
 sglang/srt/managers/controller/manager_single.py,sha256=2xO_iWK6tWvc0B31nKbe2N3klxwQBJmPTnFhNjzhVSI,4566
-sglang/srt/managers/controller/model_runner.py,sha256=4-nBd9_MgIlamjEdLZDepBEykYNR8nL-65Sf1EYsnx0,14371
+sglang/srt/managers/controller/model_runner.py,sha256=9o4xWnfI9-FJU6-S7WfEFlGMjWA2YesAhUKpuq8urhk,14854
 sglang/srt/managers/controller/radix_cache.py,sha256=tx8LEQpqLxipw9UUVj4D1YQLMMDmWnjDYv8oDlOl-co,8210
 sglang/srt/managers/controller/schedule_heuristic.py,sha256=SQAGzPS3aB_TPj7rnPBhewwyR6W1sVwW4D3zG3JUY00,2714
-sglang/srt/managers/controller/tp_worker.py,sha256=yjz-Xzl0zEy4QSU-EYneZH5vi3oHtBuXTtYe4VuDp2g,30517
+sglang/srt/managers/controller/tp_worker.py,sha256=VYhO3xcJrcDQwonGLWSWKHq4T7BvFmb6-L5LxY3-fhE,30607
 sglang/srt/model_loader/model_loader.py,sha256=VS8VQL5ITN3akZ9eU_-uHWMan1axLMNG2_O12HzGysA,10132
 sglang/srt/model_loader/utils.py,sha256=I2PS5HIH5Cg-p7xKO_Cw_foK2vQ61xVc3zQv7CbeGEw,10120
 sglang/srt/models/chatglm.py,sha256=pH8g2Dj8qQLGPYpWVTb-IONfXsdfmpWi0-IEYNdSi4s,13296
 sglang/srt/models/commandr.py,sha256=hHsNQWi0X8rNL7_gpcoUxQxdhxtvx5_RVx8u6cLzqYQ,13606
 sglang/srt/models/dbrx.py,sha256=rRxOusGPu670ommeqXg62AllwB1apzE4yZoWc1fcr2M,14095
 sglang/srt/models/deepseek.py,sha256=YtoPmv4fKmiH_jsRMSab9Wxq3aOZga9pCPGnkCs3Vvs,15457
+sglang/srt/models/deepseek_v2.py,sha256=1FqLe6tSENFpYgcEkmMr2-M4qksgne2glU3kZhSBB0Q,19527
 sglang/srt/models/gemma.py,sha256=DweoalfWYhLL-ZWLAO5gl4SCZflWmejVeDG3Vky_WNo,11719
 sglang/srt/models/gemma2.py,sha256=x3Dua-TVwRm5fJjo5UDekdoWqwt9xYbMuB-ogfXyiT8,15860
 sglang/srt/models/gpt_bigcode.py,sha256=XHO1naPdXfiKYQRQ6uZe1fN3PBDhKH3-bchsaaZvfE4,9637
 sglang/srt/models/grok.py,sha256=611zrlIchvFaVfztRdBY7z97oU3KB-anykbOZy1hK6M,27295
 sglang/srt/models/internlm2.py,sha256=8MNcwxU5Th9IxWa314HqqmbCRlPUFScnfneBDs0riIU,11659
 sglang/srt/models/llama2.py,sha256=OyAf_lun5aZEsT80WmrIYBF8QXTXRpW8sUlylr4AZIc,14204
-sglang/srt/models/llama_classification.py,sha256=foCPvNyP2bTZ0YcRBF-qkmBv-gT24lhLNCXP30Oq4VU,4370
+sglang/srt/models/llama_classification.py,sha256=Z2dvZAdOwCnN-lGFZRcwU0rNreE1gKwLefeWzEH36Uw,4366
 sglang/srt/models/llava.py,sha256=vBI6EEeOG_9o23Shi9h8k58rxTOHZnSKMmPl3B3Q3uc,17924
 sglang/srt/models/llavavid.py,sha256=SrNQ-U2wekHvP_up-ZXRkCSros2NzheHpPfXHrp0YBU,13050
 sglang/srt/models/minicpm.py,sha256=9uE8D-NopAj-sfaKJ7d-0x-PuCTEevQPoHPZvZlwstA,13277
@@ -79,14 +80,14 @@ sglang/srt/models/qwen2.py,sha256=87Tt1Bti-Py3AGudcf7k5ni-OHhtDKPj_Hke44YGw4U,11
 sglang/srt/models/qwen2_moe.py,sha256=oHNoo45myV5kitkls2GWVzuGt1Q4pRHN2nLlXEltFI8,17581
 sglang/srt/models/stablelm.py,sha256=Z_XCDSHY_QMz3lZwwkZdIZjEOizZjLYJU9GDi8o08qQ,10802
 sglang/srt/models/yivl.py,sha256=55KPrQ-dVplI0hh2WCSugjc1luE0J2UAafjZxu_7Xuc,4367
-sglang/srt/openai_api/adapter.py,sha256=A0IG9ZKEMkkYCsLrVEspnVWzZHBUbc1vHv747LrF8ew,15920
-sglang/srt/openai_api/protocol.py,sha256=j7ifIR2SFQxTwaHAd9ksM096vfffcNltzTH4sg7H0RA,5739
+sglang/srt/openai_api/adapter.py,sha256=DVZ2niAEOgE8GQdYnuvwjrGiFRkAu5YtOB-yxOlF_Eg,15868
+sglang/srt/openai_api/protocol.py,sha256=jTb22jv5caB7k7Ub2ltYEbTtDheZjwwWAAUdvjiLTR0,5741
 sglang/test/test_conversation.py,sha256=gF_AyOxQgpPQBPnA57-kq-M0p_zFu-rBDMFgAq655Rw,1596
 sglang/test/test_openai_protocol.py,sha256=DVx3r6hrb8oRqbo5AYIleldxbqMBTtb-gtORM6t_Y1c,1661
-sglang/test/test_programs.py,sha256=uefeHUFKT2NJESOujj-CsnPXdw1aQQN2TzUbPCHJjGs,13654
+sglang/test/test_programs.py,sha256=s4WGpTmYP4Yx5g8JYZpbkeF9RN5iUnlKdi8FGAZovTc,13756
 sglang/test/test_utils.py,sha256=kD_fQe3WroZ9Kc3NBRKPiZOFJ_JD2uEE9XIvPp6AD9Y,11048
-sglang-0.2.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sglang-0.2.5.dist-info/METADATA,sha256=4Z5MwXfQgcYh4Gm39qyVUaafvApHVnafS0tVqf1gr8g,31692
-sglang-0.2.5.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
-sglang-0.2.5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
-sglang-0.2.5.dist-info/RECORD,,
+sglang-0.2.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+sglang-0.2.6.dist-info/METADATA,sha256=g_G_XHbWCNSY9F6RieXV43svnNzq1wonwrArNxX0VNA,32095
+sglang-0.2.6.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
+sglang-0.2.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
+sglang-0.2.6.dist-info/RECORD,,

{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{sglang-0.2.5.dist-info → sglang-0.2.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

sglang 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

sglang 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl