PyPI - sglang - Versions diffs - 0.3.5.post2__py3-none-any.whl → 0.3.6.post1__py3-none-any.whl - Mend

sglang 0.3.5.post2py3-none-any.whl → 0.3.6.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

sglang/__init__.py +2 -2
sglang/api.py +2 -2
sglang/bench_latency.py +1 -553
sglang/bench_offline_throughput.py +48 -20
sglang/bench_one_batch.py +472 -0
sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
sglang/bench_serving.py +125 -6
sglang/check_env.py +3 -6
sglang/lang/backend/base_backend.py +1 -1
sglang/lang/backend/runtime_endpoint.py +2 -2
sglang/srt/configs/model_config.py +13 -14
sglang/srt/constrained/__init__.py +13 -14
sglang/srt/constrained/base_grammar_backend.py +13 -15
sglang/srt/constrained/outlines_backend.py +28 -17
sglang/srt/constrained/outlines_jump_forward.py +13 -15
sglang/srt/constrained/xgrammar_backend.py +47 -58
sglang/srt/conversation.py +13 -15
sglang/srt/hf_transformers_utils.py +13 -15
sglang/srt/layers/activation.py +16 -13
sglang/srt/layers/attention/flashinfer_backend.py +106 -54
sglang/srt/layers/attention/triton_backend.py +9 -7
sglang/srt/layers/attention/triton_ops/decode_attention.py +51 -55
sglang/srt/layers/attention/triton_ops/extend_attention.py +16 -16
sglang/srt/layers/attention/triton_ops/prefill_attention.py +13 -15
sglang/srt/layers/custom_op_util.py +25 -0
sglang/srt/layers/fused_moe_grok/__init__.py +1 -0
sglang/srt/layers/{fused_moe → fused_moe_grok}/fused_moe.py +11 -4
sglang/srt/layers/{fused_moe → fused_moe_grok}/layer.py +4 -9
sglang/srt/layers/{fused_moe/patch.py → fused_moe_patch.py} +5 -0
sglang/srt/layers/fused_moe_triton/__init__.py +44 -0
sglang/srt/layers/fused_moe_triton/fused_moe.py +861 -0
sglang/srt/layers/fused_moe_triton/layer.py +633 -0
sglang/srt/layers/layernorm.py +17 -15
sglang/srt/layers/logits_processor.py +23 -25
sglang/srt/layers/quantization/__init__.py +77 -17
sglang/srt/layers/radix_attention.py +13 -15
sglang/srt/layers/rotary_embedding.py +13 -13
sglang/srt/layers/sampler.py +4 -8
sglang/srt/layers/torchao_utils.py +2 -0
sglang/srt/lora/lora.py +13 -14
sglang/srt/lora/lora_config.py +13 -14
sglang/srt/lora/lora_manager.py +22 -24
sglang/srt/managers/data_parallel_controller.py +98 -27
sglang/srt/managers/detokenizer_manager.py +13 -15
sglang/srt/managers/io_struct.py +63 -21
sglang/srt/managers/schedule_batch.py +154 -59
sglang/srt/managers/schedule_policy.py +18 -16
sglang/srt/managers/scheduler.py +278 -109
sglang/srt/managers/session_controller.py +61 -0
sglang/srt/managers/tokenizer_manager.py +63 -18
sglang/srt/managers/tp_worker.py +25 -16
sglang/srt/managers/tp_worker_overlap_thread.py +62 -67
sglang/srt/metrics/collector.py +13 -15
sglang/srt/metrics/func_timer.py +13 -15
sglang/srt/mm_utils.py +13 -14
sglang/srt/model_executor/cuda_graph_runner.py +63 -25
sglang/srt/model_executor/forward_batch_info.py +128 -32
sglang/srt/model_executor/model_runner.py +132 -64
sglang/srt/model_parallel.py +98 -0
sglang/srt/models/chatglm.py +15 -16
sglang/srt/models/commandr.py +15 -16
sglang/srt/models/dbrx.py +15 -16
sglang/srt/models/deepseek.py +15 -15
sglang/srt/models/deepseek_v2.py +162 -59
sglang/srt/models/exaone.py +14 -15
sglang/srt/models/gemma.py +14 -14
sglang/srt/models/gemma2.py +31 -25
sglang/srt/models/gemma2_reward.py +13 -14
sglang/srt/models/gpt_bigcode.py +14 -14
sglang/srt/models/grok.py +15 -15
sglang/srt/models/internlm2.py +13 -15
sglang/srt/models/internlm2_reward.py +13 -14
sglang/srt/models/llama.py +21 -21
sglang/srt/models/llama_classification.py +13 -14
sglang/srt/models/llama_reward.py +13 -14
sglang/srt/models/llava.py +14 -16
sglang/srt/models/llavavid.py +14 -16
sglang/srt/models/minicpm.py +13 -15
sglang/srt/models/minicpm3.py +13 -15
sglang/srt/models/mistral.py +13 -15
sglang/srt/models/mixtral.py +15 -15
sglang/srt/models/mixtral_quant.py +14 -14
sglang/srt/models/olmo.py +22 -20
sglang/srt/models/olmoe.py +23 -20
sglang/srt/models/phi3_small.py +447 -0
sglang/srt/models/qwen.py +14 -14
sglang/srt/models/qwen2.py +22 -19
sglang/srt/models/qwen2_moe.py +17 -18
sglang/srt/models/qwen2_vl.py +13 -6
sglang/srt/models/stablelm.py +18 -16
sglang/srt/models/torch_native_llama.py +107 -93
sglang/srt/models/xverse.py +13 -14
sglang/srt/models/xverse_moe.py +15 -16
sglang/srt/models/yivl.py +13 -15
sglang/srt/openai_api/adapter.py +19 -17
sglang/srt/openai_api/protocol.py +14 -16
sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
sglang/srt/sampling/sampling_batch_info.py +61 -57
sglang/srt/sampling/sampling_params.py +14 -16
sglang/srt/server.py +86 -35
sglang/srt/server_args.py +96 -80
sglang/srt/utils.py +266 -68
sglang/test/few_shot_gsm8k.py +8 -4
sglang/test/runners.py +38 -20
sglang/test/srt/sampling/penaltylib/utils.py +23 -21
sglang/test/test_utils.py +31 -20
sglang/version.py +1 -1
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/LICENSE +1 -1
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/METADATA +66 -57
sglang-0.3.6.post1.dist-info/RECORD +164 -0
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/WHEEL +1 -1
sglang/srt/layers/fused_moe/__init__.py +0 -1
sglang-0.3.5.post2.dist-info/RECORD +0 -156
{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/top_level.txt +0 -0

sglang/test/test_utils.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import argparse
 import asyncio
+import copy
 import os
 import random
 import subprocess
@@ -43,7 +44,7 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8
 def is_in_ci():
     """Return whether it is in CI runner."""
-    return os.getenv("SGLANG_IS_IN_CI", "false") == "true"
+    return os.getenv("SGLANG_IS_IN_CI", "false").lower() == "true"
 if is_in_ci():
@@ -438,18 +439,22 @@ def popen_launch_server(
         process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
     start_time = time.time()
-    while time.time() - start_time < timeout:
-        try:
-            headers = {
-                "Content-Type": "application/json; charset=utf-8",
-                "Authorization": f"Bearer {api_key}",
-            }
-            response = requests.get(f"{base_url}/health_generate", headers=headers)
-            if response.status_code == 200:
-                return process
-        except requests.RequestException:
-            pass
-        time.sleep(10)
+    with requests.Session() as session:
+        while time.time() - start_time < timeout:
+            try:
+                headers = {
+                    "Content-Type": "application/json; charset=utf-8",
+                    "Authorization": f"Bearer {api_key}",
+                }
+                response = session.get(
+                    f"{base_url}/health_generate",
+                    headers=headers,
+                )
+                if response.status_code == 200:
+                    return process
+            except requests.RequestException:
+                pass
+            time.sleep(10)
     raise TimeoutError("Server failed to start within the timeout period.")
@@ -529,6 +534,7 @@ def run_bench_serving(
     random_input_len=4096,
     random_output_len=2048,
     disable_stream=False,
+    need_warmup=False,
 ):
     # Launch the server
     base_url = DEFAULT_URL_FOR_TEST
@@ -562,9 +568,14 @@ def run_bench_serving(
         disable_stream=disable_stream,
         disable_ignore_eos=False,
         extra_request_body=None,
+        profile=None,
     )
     try:
+        if need_warmup:
+            warmup_args = copy.deepcopy(args)
+            warmup_args.num_prompts = 16
+            run_benchmark(warmup_args)
         res = run_benchmark(args)
     finally:
         kill_child_process(process.pid, include_self=True)
@@ -573,11 +584,11 @@ def run_bench_serving(
     return res
-def run_bench_latency(model, other_args):
+def run_bench_one_batch(model, other_args):
     command = [
         "python3",
         "-m",
-        "sglang.bench_latency",
+        "sglang.bench_one_batch",
         "--model-path",
         model,
         "--batch-size",
@@ -664,7 +675,7 @@ def run_and_check_memory_leak(
     workload_func,
     disable_radix_cache,
     enable_mixed_chunk,
-    enable_overlap,
+    disable_overlap,
     chunked_prefill_size,
 ):
     other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
@@ -672,8 +683,8 @@ def run_and_check_memory_leak(
         other_args += ["--disable-radix-cache"]
     if enable_mixed_chunk:
         other_args += ["--enable-mixed-chunk"]
-    if enable_overlap:
-        other_args += ["--enable-overlap-schedule"]
+    if disable_overlap:
+        other_args += ["--disable-overlap-schedule"]
     model = DEFAULT_MODEL_NAME_FOR_TEST
     port = random.randint(4000, 5000)
@@ -725,7 +736,7 @@ def run_and_check_memory_leak(
 def run_mmlu_test(
     disable_radix_cache=False,
     enable_mixed_chunk=False,
-    enable_overlap=False,
+    disable_overlap=False,
     chunked_prefill_size=32,
 ):
     def workload_func(base_url, model):
@@ -748,7 +759,7 @@ def run_mmlu_test(
         workload_func,
         disable_radix_cache,
         enable_mixed_chunk,
-        enable_overlap,
+        disable_overlap,
         chunked_prefill_size,
     )

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.5.~~post2~~"
1	+ __version__ = "0.3.6.post1"

{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/LICENSE RENAMED Viewed

@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2023-2024 SGLang Team
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: sglang
-Version: 0.3.5.post2
+Version: 0.3.6.post1
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
-License: Apache License
+License:                                  Apache License
                                    Version 2.0, January 2004
                                 http://www.apache.org/licenses/
@@ -190,7 +190,7 @@ License: Apache License
               same "printed page" as the copyright notice for easier
               identification within third-party archives.
-           Copyright [yyyy] [name of copyright owner]
+           Copyright 2023-2024 SGLang Team
            Licensed under the Apache License, Version 2.0 (the "License");
            you may not use this file except in compliance with the License.
@@ -215,74 +215,85 @@ Requires-Dist: requests
 Requires-Dist: tqdm
 Requires-Dist: numpy
 Requires-Dist: IPython
-Provides-Extra: all
-Requires-Dist: sglang[srt]; extra == "all"
-Requires-Dist: sglang[openai]; extra == "all"
-Requires-Dist: sglang[anthropic]; extra == "all"
-Requires-Dist: sglang[litellm]; extra == "all"
-Provides-Extra: all_hip
-Requires-Dist: sglang[srt_hip]; extra == "all-hip"
-Requires-Dist: sglang[openai]; extra == "all-hip"
-Requires-Dist: sglang[anthropic]; extra == "all-hip"
-Requires-Dist: sglang[litellm]; extra == "all-hip"
-Provides-Extra: all_xpu
-Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
-Requires-Dist: sglang[openai]; extra == "all-xpu"
-Requires-Dist: sglang[anthropic]; extra == "all-xpu"
-Requires-Dist: sglang[litellm]; extra == "all-xpu"
-Provides-Extra: anthropic
-Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
-Provides-Extra: dev
-Requires-Dist: sglang[all]; extra == "dev"
-Requires-Dist: sglang[test]; extra == "dev"
-Provides-Extra: dev_hip
-Requires-Dist: sglang[all_hip]; extra == "dev-hip"
-Requires-Dist: sglang[test]; extra == "dev-hip"
-Provides-Extra: dev_xpu
-Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
-Requires-Dist: sglang[test]; extra == "dev-xpu"
-Provides-Extra: litellm
-Requires-Dist: litellm>=1.0.0; extra == "litellm"
-Provides-Extra: openai
-Requires-Dist: openai>=1.0; extra == "openai"
-Requires-Dist: tiktoken; extra == "openai"
-Provides-Extra: runtime_common
+Provides-Extra: runtime-common
 Requires-Dist: aiohttp; extra == "runtime-common"
 Requires-Dist: decord; extra == "runtime-common"
 Requires-Dist: fastapi; extra == "runtime-common"
-Requires-Dist: hf-transfer; extra == "runtime-common"
-Requires-Dist: huggingface-hub; extra == "runtime-common"
+Requires-Dist: hf_transfer; extra == "runtime-common"
+Requires-Dist: huggingface_hub; extra == "runtime-common"
 Requires-Dist: interegular; extra == "runtime-common"
+Requires-Dist: modelscope; extra == "runtime-common"
 Requires-Dist: orjson; extra == "runtime-common"
+Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
 Requires-Dist: packaging; extra == "runtime-common"
 Requires-Dist: pillow; extra == "runtime-common"
 Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
 Requires-Dist: psutil; extra == "runtime-common"
 Requires-Dist: pydantic; extra == "runtime-common"
 Requires-Dist: python-multipart; extra == "runtime-common"
+Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
 Requires-Dist: torchao; extra == "runtime-common"
 Requires-Dist: uvicorn; extra == "runtime-common"
 Requires-Dist: uvloop; extra == "runtime-common"
-Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
-Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
-Requires-Dist: modelscope; extra == "runtime-common"
+Requires-Dist: xgrammar>=0.1.4; extra == "runtime-common"
 Provides-Extra: srt
 Requires-Dist: sglang[runtime_common]; extra == "srt"
 Requires-Dist: torch; extra == "srt"
-Requires-Dist: vllm==0.6.3.post1; extra == "srt"
-Provides-Extra: srt_hip
+Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
+Provides-Extra: srt-hip
 Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
 Requires-Dist: torch; extra == "srt-hip"
 Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
-Provides-Extra: srt_xpu
+Provides-Extra: srt-xpu
 Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
+Provides-Extra: srt-hpu
+Requires-Dist: sglang[runtime_common]; extra == "srt-hpu"
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
+Provides-Extra: litellm
+Requires-Dist: litellm>=1.0.0; extra == "litellm"
 Provides-Extra: test
 Requires-Dist: jsonlines; extra == "test"
 Requires-Dist: matplotlib; extra == "test"
 Requires-Dist: pandas; extra == "test"
-Requires-Dist: sentence-transformers; extra == "test"
+Requires-Dist: sentence_transformers; extra == "test"
 Requires-Dist: accelerate; extra == "test"
 Requires-Dist: peft; extra == "test"
+Provides-Extra: all
+Requires-Dist: sglang[srt]; extra == "all"
+Requires-Dist: sglang[openai]; extra == "all"
+Requires-Dist: sglang[anthropic]; extra == "all"
+Requires-Dist: sglang[litellm]; extra == "all"
+Provides-Extra: all-hip
+Requires-Dist: sglang[srt_hip]; extra == "all-hip"
+Requires-Dist: sglang[openai]; extra == "all-hip"
+Requires-Dist: sglang[anthropic]; extra == "all-hip"
+Requires-Dist: sglang[litellm]; extra == "all-hip"
+Provides-Extra: all-xpu
+Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
+Requires-Dist: sglang[openai]; extra == "all-xpu"
+Requires-Dist: sglang[anthropic]; extra == "all-xpu"
+Requires-Dist: sglang[litellm]; extra == "all-xpu"
+Provides-Extra: all-hpu
+Requires-Dist: sglang[srt_hpu]; extra == "all-hpu"
+Requires-Dist: sglang[openai]; extra == "all-hpu"
+Requires-Dist: sglang[anthropic]; extra == "all-hpu"
+Requires-Dist: sglang[litellm]; extra == "all-hpu"
+Provides-Extra: dev
+Requires-Dist: sglang[all]; extra == "dev"
+Requires-Dist: sglang[test]; extra == "dev"
+Provides-Extra: dev-hip
+Requires-Dist: sglang[all_hip]; extra == "dev-hip"
+Requires-Dist: sglang[test]; extra == "dev-hip"
+Provides-Extra: dev-xpu
+Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
+Requires-Dist: sglang[test]; extra == "dev-xpu"
+Provides-Extra: dev-hpu
+Requires-Dist: sglang[all_hpu]; extra == "dev-hpu"
+Requires-Dist: sglang[test]; extra == "dev-hpu"
 <div align="center"  id="sglangtop">
 <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
@@ -321,21 +332,16 @@ SGLang is a fast serving framework for large language models and vision language
 It makes your interaction with models faster and more controllable by co-designing the backend runtime and frontend language.
 The core features include:
-- **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
+- **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, overhead-free CPU scheduler, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (FP8/INT4/AWQ/GPTQ).
 - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
-- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
+- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
 - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
 ## Getting Started
-Install SGLang: See [https://sgl-project.github.io/start/install.html](https://sgl-project.github.io/start/install.html)
-Send requests: See [https://sgl-project.github.io/start/send_request.html](https://sgl-project.github.io/start/send_request.html)
-## Backend: SGLang Runtime (SRT)
-See [https://sgl-project.github.io/backend/backend.html](https://sgl-project.github.io/backend/backend.html)
-## Frontend: Structured Generation Language (SGLang)
-See [https://sgl-project.github.io/frontend/frontend.html](https://sgl-project.github.io/frontend/frontend.html)
+- [Install SGLang](https://sgl-project.github.io/start/install.html)
+- [Send requests](https://sgl-project.github.io/start/send_request.html)
+- [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html)
+- [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html)
 ## Benchmark And Performance
 Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)
@@ -343,6 +349,9 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
 ## Roadmap
 [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
-## Citation And Acknowledgment
+## Adoption and Sponsorship
+The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, and xAI.
+## Acknowledgment and Citation
+We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
 Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
-We also learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).

sglang-0.3.6.post1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,164 @@
+sglang/__init__.py,sha256=3M0oz0ZA8fULhV5LwQ4hxh-MRdHsOJRD1D63C60pdG4,1616
+sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
+sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
+sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
+sglang/bench_one_batch.py,sha256=WxrQUkMcxz5GV8OEHj0ckHgpC76HgO6YxmDvJFRDeyU,15670
+sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
+sglang/bench_serving.py,sha256=hI7FjaERyqKBrYtKewDU6E4rSufKxqsUPyUgtWtTKSI,52545
+sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
+sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
+sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
+sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
+sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
+sglang/version.py,sha256=YrfhKDmn6rTAj_qREKEXk2FahHCqSbHd4BNoD7wlIi0,28
+sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
+sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
+sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
+sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
+sglang/lang/ir.py,sha256=zpzzAO1YVldhE95Vwz5hU_TQltu-xt8A6rfFr0PuIDA,18410
+sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
+sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
+sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
+sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
+sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
+sglang/lang/backend/runtime_endpoint.py,sha256=IWbrAKrUkzNOvwV6V9_y6pkTr2SUYEkKBT-3kirgad0,10514
+sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
+sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
+sglang/srt/hf_transformers_utils.py,sha256=sUUCpjbTHuYDMuwOaz00nH5fataXKjliD8gCxXU64sw,6712
+sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
+sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
+sglang/srt/server.py,sha256=7PSxAUhiS796yQFeiQxiilRhLQ3FpV0wL53CfDgkCIk,30851
+sglang/srt/server_args.py,sha256=CfmpU6_EDnxJzpJiRx2n6AhOPCtrHPOf-7wEtTF__L0,30834
+sglang/srt/utils.py,sha256=APZEUancLC0jRI1JMbv7e5bIZy3OEySGyZspxGA60yQ,33509
+sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
+sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
+sglang/srt/configs/model_config.py,sha256=dQ58mYKN3M5IwldFZkwIb4CCBa6dREb5Om4Kg2kffOE,9565
+sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
+sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
+sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
+sglang/srt/constrained/outlines_backend.py,sha256=IDpyzXJS-ydRXYOHHzx1bO9VjiMRF8E5knn4CLFwPU8,6447
+sglang/srt/constrained/outlines_jump_forward.py,sha256=IGg6mThDepugfez0jnQ6HfLSHtiUl_Mq7bsPFppb3DA,6196
+sglang/srt/constrained/xgrammar_backend.py,sha256=4ZCQgcjWEY2Lg4r2V9sAiYJJblkQ_uVbEnvsjqhR1Pc,4548
+sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwxBQ,5471
+sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
+sglang/srt/layers/fused_moe_patch.py,sha256=dxjcBMY_zAqA0pnmy5KDUZZJSd5Q64Xlxhxyb33cdMk,4240
+sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
+sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
+sglang/srt/layers/logits_processor.py,sha256=V8fHxeQK8lzUhGD2Xc7MY1Y9qBhzFyh6hqp31RJVefg,12669
+sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
+sglang/srt/layers/radix_attention.py,sha256=C_mK4mfmKlxMRNeKYP9E5R3PRd3eT-OcE_g3mo36dJM,2058
+sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
+sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
+sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
+sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
+sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
+sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
+sglang/srt/layers/attention/flashinfer_backend.py,sha256=oblYMbmYzK94H3EA9lMhKWaKdi8HLH5NqAiZmjzj4Es,24875
+sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
+sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=BE63WhKiutSNkhJLsRwvfsRy-ExvuAv7FZyoWv73ul8,18744
+sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
+sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=Gfct-0_l-S2ZrP4F-zkzNiFbmd3C3f7uJovacOuDxaA,11472
+sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
+sglang/srt/layers/fused_moe_grok/__init__.py,sha256=rj_JBzcP--eaaM6LGQ-u580uQvqLisp5JtGBAs1fVYc,80
+sglang/srt/layers/fused_moe_grok/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
+sglang/srt/layers/fused_moe_grok/layer.py,sha256=v-o5YHYEU2HIEZwouyuc3UyfNj7YQrEYOO_BXKELU7Y,23453
+sglang/srt/layers/fused_moe_triton/__init__.py,sha256=PHKFqd2hPOO-g9kSMseg2g76lpg9OGXQDThWU6bt9vs,902
+sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=qwfRBOeY5DT48Q6z71Eh9cjFehvs_K6eLIVWNL044Ug,28363
+sglang/srt/layers/fused_moe_triton/layer.py,sha256=URDkTt8xEqnqpO5tb_3L7JlhlO53VWfqDDNSRYEu-LY,21545
+sglang/srt/layers/quantization/__init__.py,sha256=f9tCC_9sHjp7JCPvyZIvuoTB4KooIucGA9S2w7ADevw,4849
+sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
+sglang/srt/lora/lora.py,sha256=KhhO9aKCyFWvJnhI07lZKANIvNjtt882HrTYFNBZMv0,15065
+sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
+sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
+sglang/srt/managers/data_parallel_controller.py,sha256=JxRtJJTVn1FU2iD292rLZPftAsR4_8j4d3yF8j0dvBc,8327
+sglang/srt/managers/detokenizer_manager.py,sha256=nWBn54pz3aQ8tzVvViwwL2k0V4WATi0qw11H0Bzua-Q,7389
+sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
+sglang/srt/managers/io_struct.py,sha256=WLXz-tyn0jR7zNO9feRBXgyjphVa8qR55OoEOUdzoVI,13751
+sglang/srt/managers/schedule_batch.py,sha256=-5oYdkStPiYjPWl0tCkUVRjTGB7fjA0wIngK-09da7w,43111
+sglang/srt/managers/schedule_policy.py,sha256=ayFz4iPLIlG8mx5i1glTCAMHJPGpFedMP9UgRtqkNhA,12526
+sglang/srt/managers/scheduler.py,sha256=8owHPXG6fxZtsCWSJ6K7EOlFDcPxYinZC1DwKMJcEVM,55930
+sglang/srt/managers/session_controller.py,sha256=jXoPHxMGh8T1iYWIEjSXoPVwaL6NEjv3QtqlsrvPE1c,2355
+sglang/srt/managers/tokenizer_manager.py,sha256=zYbKEKNuM1B3PXzA7jnDpxew-0rZXSX-7dHmVLWG3e4,26477
+sglang/srt/managers/tp_worker.py,sha256=1SQJ60iKS9e5vGY555fT1iZ4OtLumXzeWfB08fSWKbk,6176
+sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7vhPebaOS4JamaS08CGf_hwxnUO7Gy_SXZXEPwNHKoY,7621
+sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
+sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
+sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
+sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
+sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
+sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
+sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
+sglang/srt/model_executor/cuda_graph_runner.py,sha256=4hbCtE3gt5kvMNHrnxkE8YPRFcgmVo0Bwz3lgbYZw_E,14805
+sglang/srt/model_executor/forward_batch_info.py,sha256=n5yk927COTU0klDAkQuwrFzamMygfkHxmDp1I6bJYD8,12612
+sglang/srt/model_executor/model_runner.py,sha256=AafFWd_EDWbOe0o5etAyutGum5O8_9tO55KRcaAWDW4,29680
+sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
+sglang/srt/models/chatglm.py,sha256=OikygdK8Mi6F2QPPhAr2E_P4l2V0yWQjDJOdnBAApPE,13216
+sglang/srt/models/commandr.py,sha256=XkzpfsdDPDx-W5oOac8nFIe39JJZvmv65K5GIpgJTz0,14212
+sglang/srt/models/dbrx.py,sha256=ucn3UJ1s4nx2qa5hUb8VhJmfVrDZ59e9oNetMU5EWq8,14624
+sglang/srt/models/deepseek.py,sha256=B5OuW--kDIPfZesOhvGGUhHQNWh0pMPNCYmdsv9lv5U,15922
+sglang/srt/models/deepseek_v2.py,sha256=shdHVtZGmLEZMZwGlIPz8NPoSb1c_n6hQxWKG45WahE,32265
+sglang/srt/models/exaone.py,sha256=6LJ1Mr9MbHOXdH_nK9Dba3SR28LMCJvdH1k53w9M9Vg,13081
+sglang/srt/models/gemma.py,sha256=079CfoQqBnrLIbW0LWcLp-nmb1aPVN1Tw6PxMQQ3Lsk,12289
+sglang/srt/models/gemma2.py,sha256=lbfQhQpUhf1MAEB_00Uo6rp20k4Hr353UbPKKuMsxec,15020
+sglang/srt/models/gemma2_reward.py,sha256=cQawatbsfBuWQTueivYHl_17ZoQUHEelI1sr1y5pvfY,2556
+sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
+sglang/srt/models/gpt_bigcode.py,sha256=lYo4ajy49VvvPkaduaFtOaCRT_ItqyNUE158S-BI5QA,10136
+sglang/srt/models/grok.py,sha256=rDIH_SFzauuEHcL_vCOSrYLjdBC3i3o_AcceL3amsJw,14927
+sglang/srt/models/internlm2.py,sha256=DxbA15d9QR0tLOczpC6DkB8QyNHXJRdZatY6Nskwv1k,12170
+sglang/srt/models/internlm2_reward.py,sha256=Lr-JA0vfTQJt9q5oDMiopGuoXAevyEv5PAoDe2rsTJk,2425
+sglang/srt/models/llama.py,sha256=FSGuM3BamhuT5h2jedh5cSFwFYduOJwkAZJJ672awRw,16423
+sglang/srt/models/llama_classification.py,sha256=c8WZ1ADa3f6s2IJVoP10ouVgeCwv_ndns_qMgLrC6QI,3413
+sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
+sglang/srt/models/llama_reward.py,sha256=prhHDPpf1k6tlQtGE6zq5gx0uSZAD3W5v7W28bdgy4U,4619
+sglang/srt/models/llava.py,sha256=72DnZXIwu78zYqU8YIElq_AaSIFO_icYOPTHXE0_-YQ,24941
+sglang/srt/models/llavavid.py,sha256=DeWqGSmXgIYGuLyy2ZrxjM9WqbRjueP4chNmXt7Bnus,12221
+sglang/srt/models/minicpm.py,sha256=KbiTf-kaDAJxSo9Z4IGMTrs9WrYYji1KXO1kA2iy-as,13816
+sglang/srt/models/minicpm3.py,sha256=C43mTr2Qjccj4sXuTDgzbfZhvCNbsEHNggMRXQ7SrWs,25108
+sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
+sglang/srt/models/mixtral.py,sha256=E3d8I7V3Dp1nCEHRbhh-PKBG8UaVK5XOHwl9QyIjcX0,14043
+sglang/srt/models/mixtral_quant.py,sha256=o-oTG8BGtWuNu-o6muHSarMNBQwrjQowyBFOQhuclZ8,14065
+sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
+sglang/srt/models/olmo.py,sha256=DEUPNDM0z83N-Qdhkj2WJMtbiz5JNbSBMIjUaYZN9RM,12068
+sglang/srt/models/olmoe.py,sha256=jVKrjqQQrWLdlkGSGUaMPdT9PHzNH4X-RVwON29eaGw,15412
+sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
+sglang/srt/models/qwen.py,sha256=P9zcFnz_Tsz73tVtLRwZ8uWzCtMxWOrzlv2o9Ys_Gck,9947
+sglang/srt/models/qwen2.py,sha256=ApFFASNwvrkDXi-KkCNA7fTk4uLMuJWoMg15zCaAKdA,12514
+sglang/srt/models/qwen2_moe.py,sha256=1oxDsKDq3jlHKx9jMi1SfHOqCRVyN5n76uw3M-CUODE,17048
+sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
+sglang/srt/models/stablelm.py,sha256=jpmsyWMJo_9JapOESnuV7ObNCh78BRznXY0iFvvIbZE,11354
+sglang/srt/models/torch_native_llama.py,sha256=vNQxsnbVAY1bdyMCCWDZAtWdbaFIiJXhmVxHjk5BB9Y,19400
+sglang/srt/models/xverse.py,sha256=LGe0ma0wOir3x-OLBT_cRocw8JEo9d3AYNxgA2OcLrk,13659
+sglang/srt/models/xverse_moe.py,sha256=YqbzkSsnTFt-8-aI8YobF9qJA70qrBjbS1Kjn1KNqVY,15766
+sglang/srt/models/yivl.py,sha256=yj4aWsOBVGQBLurSrLmYXVC7zGIPH7EYHHtAaAZ7Liw,4859
+sglang/srt/openai_api/adapter.py,sha256=MhOcWZjcLv4_OuvLvDMcAu6K_u2joJvhaZxaKm0hi3M,53634
+sglang/srt/openai_api/protocol.py,sha256=vBgrbTqtECsZ5dG0rgP1FHsTBt4eR9zbDX3FBIN-rz4,10172
+sglang/srt/sampling/sampling_batch_info.py,sha256=YC-KPyDWyLGNPL4YVcst4xwP8Wlz2zcCNJHB_5zljXQ,8470
+sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
+sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
+sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
+sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
+sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
+sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
+sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
+sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
+sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
+sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
+sglang/test/runners.py,sha256=ANzjrHkT_1E0G3UcD47O8XEKst3Si4AOfx-uErbFS7o,15129
+sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
+sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
+sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
+sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
+sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
+sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
+sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
+sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
+sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
+sglang/test/test_utils.py,sha256=ULF7C3pLXkMevXgE_Dodt29OBfvvXKUnRvwKhaBg1ys,23470
+sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
+sglang-0.3.6.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
+sglang-0.3.6.post1.dist-info/METADATA,sha256=XwhCEL8SbEVcT7LQLk26g6tzduS6mByBE7dDqZYpQxo,22073
+sglang-0.3.6.post1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+sglang-0.3.6.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
+sglang-0.3.6.post1.dist-info/RECORD,,

{sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.5.0)
+Generator: setuptools (75.6.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

sglang/srt/layers/fused_moe/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from sglang.srt.layers.fused_moe.layer import FusedMoE, FusedMoEMethodBase

sglang 0.3.5.post2__py3-none-any.whl → 0.3.6.post1__py3-none-any.whl

sglang 0.3.5.post2py3-none-any.whl → 0.3.6.post1py3-none-any.whl