PyPI - sglang - Versions diffs - 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl - Mend

sglang 0.4.3py3-none-any.whl → 0.4.3.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sglang/lang/backend/openai.py +5 -0
sglang/lang/chat_template.py +22 -7
sglang/lang/ir.py +1 -0
sglang/srt/configs/__init__.py +6 -3
sglang/srt/configs/model_config.py +2 -0
sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
sglang/srt/entrypoints/engine.py +16 -1
sglang/srt/hf_transformers_utils.py +2 -3
sglang/srt/managers/image_processor.py +217 -122
sglang/srt/model_executor/forward_batch_info.py +4 -1
sglang/srt/models/deepseek_nextn.py +295 -0
sglang/srt/models/deepseek_v2.py +4 -1
sglang/srt/models/llava.py +2 -1
sglang/srt/models/qwen2_5_vl.py +722 -0
sglang/srt/models/qwen2_vl.py +2 -1
sglang/srt/openai_api/adapter.py +17 -3
sglang/srt/server_args.py +6 -3
sglang/srt/speculative/eagle_worker.py +7 -2
sglang/srt/speculative/spec_info.py +11 -1
sglang/utils.py +99 -19
sglang/version.py +1 -1
{sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/METADATA +2 -2
{sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/RECORD +26 -24
sglang/srt/configs/qwen2vl.py +0 -130
{sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/LICENSE +0 -0
{sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/WHEEL +0 -0
{sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/top_level.txt +0 -0

sglang/lang/backend/openai.py CHANGED Viewed

@@ -161,6 +161,10 @@ class OpenAI(BaseBackend):
                 prompt = s.text_
             kwargs = sampling_params.to_openai_kwargs()
+            if self.model_name.startswith("o1") or self.model_name.startswith("o3"):
+                kwargs.pop("max_tokens", None)
+            else:
+                kwargs.pop("max_completion_tokens", None)
             comp = openai_completion(
                 client=self.client,
                 token_usage=self.token_usage,
@@ -175,6 +179,7 @@ class OpenAI(BaseBackend):
             ), "constrained type not supported on chat model"
             kwargs = sampling_params.to_openai_kwargs()
             kwargs.pop("stop")
             comp = openai_completion(
                 client=self.client,
                 token_usage=self.token_usage,

sglang/lang/chat_template.py CHANGED Viewed

@@ -353,7 +353,6 @@ register_chat_template(
     )
 )
 register_chat_template(
     ChatTemplate(
         name="deepseek-v3",
@@ -428,12 +427,15 @@ def match_chat_ml(model_path: str):
     if "tinyllama" in model_path:
         return get_chat_template("chatml")
     # Now the suffix for qwen2 chat model is "instruct"
-    if (
-        "qwen" in model_path
-        and ("chat" in model_path or "instruct" in model_path)
-        and ("llava" not in model_path)
-    ):
-        return get_chat_template("qwen")
+    if "qwen" in model_path and "vl" in model_path:
+        return get_chat_template("qwen2-vl")
+    if "qwen" in model_path:
+        if "vl" in model_path:
+            return get_chat_template("qwen2-vl")
+        if ("chat" in model_path or "instruct" in model_path) and (
+            "llava" not in model_path
+        ):
+            return get_chat_template("qwen")
     if (
         "llava-v1.6-34b" in model_path
         or "llava-v1.6-yi-34b" in model_path
@@ -443,6 +445,12 @@ def match_chat_ml(model_path: str):
         return get_chat_template("chatml-llava")
+@register_chat_template_matching_function
+def match_chat_minicpm(model_path: str):
+    if "minicpm" in model_path:
+        return get_chat_template("minicpmv")
 @register_chat_template_matching_function
 def match_chat_yi(model_path: str):
     model_path = model_path.lower()
@@ -459,6 +467,13 @@ def match_gemma_it(model_path: str):
         return get_chat_template("gemma-it")
+@register_chat_template_matching_function
+def match_openbmb_minicpm(model_path: str):
+    model_path = model_path.lower()
+    if "minicpm" in model_path:
+        return get_chat_template("minicpmv")
 @register_chat_template_matching_function
 def match_c4ai_command_r(model_path: str):
     model_path = model_path.lower()

sglang/lang/ir.py CHANGED Viewed

@@ -63,6 +63,7 @@ class SglSamplingParams:
             warnings.warn("Regular expression is not supported in the OpenAI backend.")
         return {
             "max_tokens": self.max_new_tokens,
+            "max_completion_tokens": self.max_new_tokens,
             "stop": self.stop or None,
             "temperature": self.temperature,
             "top_p": self.top_p,

sglang/srt/configs/__init__.py CHANGED Viewed

@@ -1,12 +1,15 @@
 from sglang.srt.configs.chatglm import ChatGLMConfig
 from sglang.srt.configs.dbrx import DbrxConfig
 from sglang.srt.configs.exaone import ExaoneConfig
-from sglang.srt.configs.qwen2vl import Qwen2VLConfig, Qwen2VLVisionConfig
+from sglang.srt.configs.qwen2_5_vl_config import (
+    Qwen2_5_VLConfig,
+    Qwen2_5_VLVisionConfig,
+)
 __all__ = [
     "ExaoneConfig",
-    "Qwen2VLConfig",
-    "Qwen2VLVisionConfig",
     "ChatGLMConfig",
     "DbrxConfig",
+    "Qwen2_5_VLConfig",
+    "Qwen2_5_VLVisionConfig",
 ]

sglang/srt/configs/model_config.py CHANGED Viewed

@@ -98,6 +98,7 @@ class ModelConfig:
         if (
             "DeepseekV2ForCausalLM" in self.hf_config.architectures
             or "DeepseekV3ForCausalLM" in self.hf_config.architectures
+            or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
         ):
             self.head_dim = 256
             self.attention_arch = AttentionArch.MLA
@@ -402,6 +403,7 @@ def is_multimodal_model(model_architectures: List[str]):
         or "LlavaVidForCausalLM" in model_architectures
         or "MllamaForConditionalGeneration" in model_architectures
         or "Qwen2VLForConditionalGeneration" in model_architectures
+        or "Qwen2_5_VLForConditionalGeneration" in model_architectures
         or "MiniCPMV" in model_architectures
     ):
         return True

sglang 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl

sglang 0.4.3py3-none-any.whl → 0.4.3.post1py3-none-any.whl