PyPI - sglang - Versions diffs - 0.4.3__tar.gz → 0.4.3.post2__tar.gz - Mend

sglang 0.4.3tar.gz → 0.4.3.post2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (449) hide show

{sglang-0.4.3/sglang.egg-info → sglang-0.4.3.post2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: sglang
-Version: 0.4.3
+Version: 0.4.3.post2
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -235,7 +235,7 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
 Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
 Requires-Dist: uvicorn; extra == "runtime-common"
 Requires-Dist: uvloop; extra == "runtime-common"
-Requires-Dist: xgrammar>=0.1.10; extra == "runtime-common"
+Requires-Dist: xgrammar==0.1.10; extra == "runtime-common"
 Requires-Dist: ninja; extra == "runtime-common"
 Provides-Extra: srt
 Requires-Dist: sglang[runtime_common]; extra == "srt"
@@ -243,7 +243,7 @@ Requires-Dist: cuda-python; extra == "srt"
 Requires-Dist: sgl-kernel>=0.0.3.post6; extra == "srt"
 Requires-Dist: torch; extra == "srt"
 Requires-Dist: vllm<=0.7.2,>=0.6.4.post1; extra == "srt"
-Requires-Dist: flashinfer_python>=0.2.1.post1; extra == "srt"
+Requires-Dist: flashinfer_python>=0.2.1.post2; extra == "srt"
 Requires-Dist: outlines<=0.1.11,>=0.0.44; extra == "srt"
 Provides-Extra: srt-hip
 Requires-Dist: sglang[runtime_common]; extra == "srt-hip"

{sglang-0.4.3 → sglang-0.4.3.post2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "sglang"
-version = "0.4.3"
+version = "0.4.3.post2"
 description = "SGLang is yet another fast serving framework for large language models and vision language models."
 readme = "README.md"
 requires-python = ">=3.8"
@@ -21,12 +21,12 @@ runtime_common = [
     "hf_transfer", "huggingface_hub", "interegular", "modelscope",
     "orjson", "packaging", "pillow", "prometheus-client>=0.20.0",
     "psutil", "pydantic", "python-multipart", "pyzmq>=25.1.2",
-    "torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar>=0.1.10", "ninja"
+    "torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar==0.1.10", "ninja"
 ]
 srt = [
     "sglang[runtime_common]", "cuda-python",
     "sgl-kernel>=0.0.3.post6", "torch", "vllm>=0.6.4.post1,<=0.7.2",
-    "flashinfer_python>=0.2.1.post1",
+    "flashinfer_python>=0.2.1.post2",
     "outlines>=0.0.44,<=0.1.11",
 ]

{sglang-0.4.3 → sglang-0.4.3.post2}/sglang/lang/backend/openai.py RENAMED Viewed

@@ -161,6 +161,10 @@ class OpenAI(BaseBackend):
                 prompt = s.text_
             kwargs = sampling_params.to_openai_kwargs()
+            if self.model_name.startswith("o1") or self.model_name.startswith("o3"):
+                kwargs.pop("max_tokens", None)
+            else:
+                kwargs.pop("max_completion_tokens", None)
             comp = openai_completion(
                 client=self.client,
                 token_usage=self.token_usage,
@@ -175,6 +179,7 @@ class OpenAI(BaseBackend):
             ), "constrained type not supported on chat model"
             kwargs = sampling_params.to_openai_kwargs()
             kwargs.pop("stop")
             comp = openai_completion(
                 client=self.client,
                 token_usage=self.token_usage,

{sglang-0.4.3 → sglang-0.4.3.post2}/sglang/lang/chat_template.py RENAMED Viewed

@@ -353,7 +353,6 @@ register_chat_template(
     )
 )
 register_chat_template(
     ChatTemplate(
         name="deepseek-v3",
@@ -428,12 +427,15 @@ def match_chat_ml(model_path: str):
     if "tinyllama" in model_path:
         return get_chat_template("chatml")
     # Now the suffix for qwen2 chat model is "instruct"
-    if (
-        "qwen" in model_path
-        and ("chat" in model_path or "instruct" in model_path)
-        and ("llava" not in model_path)
-    ):
-        return get_chat_template("qwen")
+    if "qwen" in model_path and "vl" in model_path:
+        return get_chat_template("qwen2-vl")
+    if "qwen" in model_path:
+        if "vl" in model_path:
+            return get_chat_template("qwen2-vl")
+        if ("chat" in model_path or "instruct" in model_path) and (
+            "llava" not in model_path
+        ):
+            return get_chat_template("qwen")
     if (
         "llava-v1.6-34b" in model_path
         or "llava-v1.6-yi-34b" in model_path
@@ -443,6 +445,12 @@ def match_chat_ml(model_path: str):
         return get_chat_template("chatml-llava")
+@register_chat_template_matching_function
+def match_chat_minicpm(model_path: str):
+    if "minicpm" in model_path:
+        return get_chat_template("minicpmv")
 @register_chat_template_matching_function
 def match_chat_yi(model_path: str):
     model_path = model_path.lower()
@@ -459,6 +467,13 @@ def match_gemma_it(model_path: str):
         return get_chat_template("gemma-it")
+@register_chat_template_matching_function
+def match_openbmb_minicpm(model_path: str):
+    model_path = model_path.lower()
+    if "minicpm" in model_path:
+        return get_chat_template("minicpmv")
 @register_chat_template_matching_function
 def match_c4ai_command_r(model_path: str):
     model_path = model_path.lower()

{sglang-0.4.3 → sglang-0.4.3.post2}/sglang/lang/ir.py RENAMED Viewed

@@ -63,6 +63,7 @@ class SglSamplingParams:
             warnings.warn("Regular expression is not supported in the OpenAI backend.")
         return {
             "max_tokens": self.max_new_tokens,
+            "max_completion_tokens": self.max_new_tokens,
             "stop": self.stop or None,
             "temperature": self.temperature,
             "top_p": self.top_p,

{sglang-0.4.3 → sglang-0.4.3.post2}/sglang/srt/configs/__init__.py RENAMED Viewed

@@ -1,12 +1,15 @@
 from sglang.srt.configs.chatglm import ChatGLMConfig
 from sglang.srt.configs.dbrx import DbrxConfig
 from sglang.srt.configs.exaone import ExaoneConfig
-from sglang.srt.configs.qwen2vl import Qwen2VLConfig, Qwen2VLVisionConfig
+from sglang.srt.configs.qwen2_5_vl_config import (
+    Qwen2_5_VLConfig,
+    Qwen2_5_VLVisionConfig,
+)
 __all__ = [
     "ExaoneConfig",
-    "Qwen2VLConfig",
-    "Qwen2VLVisionConfig",
     "ChatGLMConfig",
     "DbrxConfig",
+    "Qwen2_5_VLConfig",
+    "Qwen2_5_VLVisionConfig",
 ]

{sglang-0.4.3 → sglang-0.4.3.post2}/sglang/srt/configs/model_config.py RENAMED Viewed

@@ -98,6 +98,7 @@ class ModelConfig:
         if (
             "DeepseekV2ForCausalLM" in self.hf_config.architectures
             or "DeepseekV3ForCausalLM" in self.hf_config.architectures
+            or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
         ):
             self.head_dim = 256
             self.attention_arch = AttentionArch.MLA
@@ -402,6 +403,7 @@ def is_multimodal_model(model_architectures: List[str]):
         or "LlavaVidForCausalLM" in model_architectures
         or "MllamaForConditionalGeneration" in model_architectures
         or "Qwen2VLForConditionalGeneration" in model_architectures
+        or "Qwen2_5_VLForConditionalGeneration" in model_architectures
         or "MiniCPMV" in model_architectures
     ):
         return True

sglang 0.4.3__tar.gz → 0.4.3.post2__tar.gz

sglang 0.4.3tar.gz → 0.4.3.post2tar.gz