PyPI - sglang - Versions diffs - 0.4.6.post4__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

sglang 0.4.6.post4py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (358) hide show

sglang/srt/model_loader/utils.py CHANGED Viewed

@@ -2,12 +2,17 @@
 """Utilities for selecting and loading models."""
 import contextlib
+import logging
 from typing import Tuple, Type
 import torch
+import transformers
 from torch import nn
+from transformers.dynamic_module_utils import get_class_from_dynamic_module
-from sglang.srt.configs.model_config import ModelConfig
+from sglang.srt.configs.model_config import ModelConfig, ModelImpl
+logger = logging.getLogger(__name__)
 @contextlib.contextmanager
@@ -19,6 +24,61 @@ def set_default_torch_dtype(dtype: torch.dtype):
     torch.set_default_dtype(old_dtype)
+def resolve_transformers_arch(model_config: ModelConfig, architectures: list[str]):
+    for i, arch in enumerate(architectures):
+        if arch == "TransformersForCausalLM":
+            continue
+        auto_map: dict[str, str] = (
+            getattr(model_config.hf_config, "auto_map", None) or dict()
+        )
+        # Make sure that config class is always initialized before model class,
+        # otherwise the model class won't be able to access the config class,
+        # the expected auto_map should have correct order like:
+        # "auto_map": {
+        #     "AutoConfig": "<your-repo-name>--<config-name>",
+        #     "AutoModel": "<your-repo-name>--<config-name>",
+        #     "AutoModelFor<Task>": "<your-repo-name>--<config-name>",
+        # },
+        auto_modules = {
+            name: get_class_from_dynamic_module(
+                module, model_config.model_path, revision=model_config.revision
+            )
+            for name, module in sorted(auto_map.items(), key=lambda x: x[0])
+        }
+        model_module = getattr(transformers, arch, None)
+        if model_module is None:
+            if "AutoModel" not in auto_map:
+                raise ValueError(
+                    f"Cannot find model module. '{arch}' is not a registered "
+                    "model in the Transformers library (only relevant if the "
+                    "model is meant to be in Transformers) and 'AutoModel' is "
+                    "not present in the model config's 'auto_map' (relevant "
+                    "if the model is custom)."
+                )
+            model_module = auto_modules["AutoModel"]
+        if model_config.impl == ModelImpl.TRANSFORMERS:
+            if not model_module.is_backend_compatible():
+                raise ValueError(
+                    f"The Transformers implementation of {arch} is not "
+                    "compatible with vLLM."
+                )
+            architectures[i] = "TransformersForCausalLM"
+        if model_config.impl == ModelImpl.AUTO:
+            if not model_module.is_backend_compatible():
+                raise ValueError(
+                    f"{arch} has no SGlang implementation and the Transformers "
+                    "implementation is not compatible with SGLang."
+                )
+            logger.warning(
+                "%s has no SGLang implementation, falling back to Transformers "
+                "implementation. Some features may not be supported and "
+                "performance may not be optimal.",
+                arch,
+            )
+            architectures[i] = "TransformersForCausalLM"
+    return architectures
 def get_model_architecture(model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
     from sglang.srt.models.registry import ModelRegistry
@@ -34,6 +94,12 @@ def get_model_architecture(model_config: ModelConfig) -> Tuple[Type[nn.Module],
     ):
         architectures = ["QuantMixtralForCausalLM"]
+    supported_archs = ModelRegistry.get_supported_archs()
+    is_native_supported = any(arch in supported_archs for arch in architectures)
+    if not is_native_supported or model_config.impl == ModelImpl.TRANSFORMERS:
+        architectures = resolve_transformers_arch(model_config, architectures)
     return ModelRegistry.resolve_model_cls(architectures)

sglang/srt/models/clip.py CHANGED Viewed

@@ -168,7 +168,7 @@ class CLIPEncoderLayer(nn.Module):
             softmax_in_single_precision=softmax_in_single_precision,
             flatten_batch=True,
             quant_config=quant_config,
-            prefix=add_prefix("attn", prefix),
+            prefix=add_prefix("self_attn", prefix),
         )
         self.mlp = CLIPMLP(
             config,
@@ -395,6 +395,10 @@ class CLIPVisionModel(nn.Module):
             config, quant_config, prefix=add_prefix("vision_model", prefix)
         )
+    @property
+    def device(self) -> torch.device:
+        return self.vision_model.device
     def forward(self, pixel_values: torch.Tensor):
         return self.vision_model(pixel_values)

sglang/srt/models/deepseek_nextn.py CHANGED Viewed

@@ -122,7 +122,7 @@ class DeepseekV3ForCausalLMNextN(DeepseekV3ForCausalLM):
         self.config = config
         self.tp_size = get_tensor_model_parallel_world_size()
         self.quant_config = quant_config
-        self.determine_n_share_experts_fusion("DeepseekV3ForCausalLMNextN")
+        self.determine_num_fused_shared_experts("DeepseekV3ForCausalLMNextN")
         self.model = DeepseekModelNextN(
             config, quant_config, prefix=add_prefix("model", prefix)

sglang 0.4.6.post4__py3-none-any.whl → 0.4.7__py3-none-any.whl

sglang 0.4.6.post4py3-none-any.whl → 0.4.7py3-none-any.whl