PyPI - sglang - Versions diffs - 0.4.3.post3__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

sglang 0.4.3.post3py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

sglang/test/test_utils.py CHANGED Viewed

@@ -28,6 +28,10 @@ from sglang.test.run_eval import run_eval
 from sglang.utils import get_exception_traceback
 DEFAULT_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/Meta-Llama-3.1-8B-FP8"
+DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST = "neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
+DEFAULT_FP8_MODEL_NAME_FOR_DYNAMIC_QUANT_ACCURACY_TEST = (
+    "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamic"
+)
 DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
 DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
 DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -36,12 +40,15 @@ DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instru
 DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
 DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
 DEFAULT_REASONING_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST = (
+    "hugging-quants/Mixtral-8x7B-Instruct-v0.1-AWQ-INT4"
+)
 DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 1000
 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it"
 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct"
 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8"
 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
-DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
+DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4,hugging-quants/Mixtral-8x7B-Instruct-v0.1-AWQ-INT4"
 DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
 DEFAULT_SMALL_VLM_MODEL_NAME = "Qwen/Qwen2-VL-2B"
@@ -446,22 +453,31 @@ def run_with_timeout(
     return ret_value[0]
-def run_unittest_files(files: List[str], timeout_per_file: float):
+def run_unittest_files(files: List, timeout_per_file: float):
     tic = time.time()
     success = True
-    for filename in files:
+    for file in files:
+        filename, estimated_time = file.name, file.estimated_time
         process = None
         def run_one_file(filename):
             nonlocal process
             filename = os.path.join(os.getcwd(), filename)
-            print(f"\n\nRun:\npython3 {filename}\n\n", flush=True)
+            print(f".\n.\nBegin:\npython3 {filename}\n.\n.\n", flush=True)
+            tic = time.time()
             process = subprocess.Popen(
                 ["python3", filename], stdout=None, stderr=None, env=os.environ
             )
             process.wait()
+            elapsed = time.time() - tic
+            print(
+                f".\n.\nEnd:\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
+                flush=True,
+            )
             return process.returncode
         try:

sglang/utils.py CHANGED Viewed

@@ -24,14 +24,10 @@ import requests
 from IPython.display import HTML, display
 from tqdm import tqdm
-from sglang.srt.openai_api.protocol import ChatCompletionMessageContentPart
 from sglang.srt.utils import kill_process_tree
 logger = logging.getLogger(__name__)
-# type of content fields, can be only prompts or with images/videos
-MsgContent = Union[str, List[ChatCompletionMessageContentPart]]
 def get_exception_traceback():
     etype, value, tb = sys.exc_info()

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.~~3.post3~~"
1	+ __version__ = "0.4.4"

{sglang-0.4.3.post3.dist-info → sglang-0.4.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: sglang
-Version: 0.4.3.post3
+Version: 0.4.4
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -211,19 +211,22 @@ Classifier: License :: OSI Approved :: Apache Software License
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: aiohttp
 Requires-Dist: requests
 Requires-Dist: tqdm
 Requires-Dist: numpy
 Requires-Dist: IPython
 Requires-Dist: setproctitle
 Provides-Extra: runtime-common
-Requires-Dist: aiohttp; extra == "runtime-common"
+Requires-Dist: datasets; extra == "runtime-common"
 Requires-Dist: decord; extra == "runtime-common"
 Requires-Dist: fastapi; extra == "runtime-common"
 Requires-Dist: hf_transfer; extra == "runtime-common"
 Requires-Dist: huggingface_hub; extra == "runtime-common"
 Requires-Dist: interegular; extra == "runtime-common"
+Requires-Dist: llguidance>=0.6.15; extra == "runtime-common"
 Requires-Dist: modelscope; extra == "runtime-common"
+Requires-Dist: ninja; extra == "runtime-common"
 Requires-Dist: orjson; extra == "runtime-common"
 Requires-Dist: packaging; extra == "runtime-common"
 Requires-Dist: pillow; extra == "runtime-common"
@@ -233,23 +236,20 @@ Requires-Dist: pydantic; extra == "runtime-common"
 Requires-Dist: python-multipart; extra == "runtime-common"
 Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
 Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
+Requires-Dist: transformers==4.48.3; extra == "runtime-common"
 Requires-Dist: uvicorn; extra == "runtime-common"
 Requires-Dist: uvloop; extra == "runtime-common"
-Requires-Dist: xgrammar==0.1.14; extra == "runtime-common"
-Requires-Dist: ninja; extra == "runtime-common"
-Requires-Dist: transformers==4.48.3; extra == "runtime-common"
-Requires-Dist: llguidance>=0.6.15; extra == "runtime-common"
+Requires-Dist: xgrammar==0.1.15; extra == "runtime-common"
 Provides-Extra: srt
 Requires-Dist: sglang[runtime_common]; extra == "srt"
-Requires-Dist: sgl-kernel==0.0.3.post6; extra == "srt"
-Requires-Dist: flashinfer_python==0.2.2.post1; extra == "srt"
+Requires-Dist: sgl-kernel==0.0.5; extra == "srt"
+Requires-Dist: flashinfer_python==0.2.3; extra == "srt"
 Requires-Dist: torch==2.5.1; extra == "srt"
 Requires-Dist: vllm<=0.7.2,>=0.6.4.post1; extra == "srt"
 Requires-Dist: cuda-python; extra == "srt"
 Requires-Dist: outlines<=0.1.11,>=0.0.44; extra == "srt"
 Provides-Extra: srt-hip
 Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
-Requires-Dist: sgl-kernel==0.0.3.post6; extra == "srt-hip"
 Requires-Dist: torch; extra == "srt-hip"
 Requires-Dist: vllm==0.6.7.dev2; extra == "srt-hip"
 Requires-Dist: outlines==0.1.11; extra == "srt-hip"

sglang 0.4.3.post3__py3-none-any.whl → 0.4.4__py3-none-any.whl

sglang 0.4.3.post3py3-none-any.whl → 0.4.4py3-none-any.whl