sglang 0.4.3__py3-none-any.whl → 0.4.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/lang/backend/openai.py +5 -0
- sglang/lang/chat_template.py +22 -7
- sglang/lang/ir.py +1 -0
- sglang/srt/configs/__init__.py +6 -3
- sglang/srt/configs/model_config.py +2 -0
- sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
- sglang/srt/entrypoints/engine.py +17 -2
- sglang/srt/hf_transformers_utils.py +2 -3
- sglang/srt/layers/attention/flashinfer_backend.py +101 -30
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/managers/image_processor.py +217 -122
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/model_executor/forward_batch_info.py +4 -1
- sglang/srt/model_executor/model_runner.py +1 -0
- sglang/srt/models/deepseek_nextn.py +295 -0
- sglang/srt/models/deepseek_v2.py +9 -3
- sglang/srt/models/llava.py +2 -1
- sglang/srt/models/qwen2_5_vl.py +722 -0
- sglang/srt/models/qwen2_vl.py +2 -1
- sglang/srt/openai_api/adapter.py +17 -3
- sglang/srt/server_args.py +6 -3
- sglang/srt/speculative/eagle_worker.py +7 -2
- sglang/srt/speculative/spec_info.py +11 -1
- sglang/utils.py +99 -19
- sglang/version.py +1 -1
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/METADATA +3 -3
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/RECORD +43 -27
- sglang/srt/configs/qwen2vl.py +0 -130
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/WHEEL +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/top_level.txt +0 -0
sglang/srt/configs/qwen2vl.py
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
# coding=utf-8
|
2
|
-
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
|
3
|
-
# All rights reserved.
|
4
|
-
#
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
# See the License for the specific language governing permissions and
|
15
|
-
# limitations under the License.
|
16
|
-
"""Qwen2VL model configuration"""
|
17
|
-
|
18
|
-
import os
|
19
|
-
from typing import Union
|
20
|
-
|
21
|
-
from transformers import PretrainedConfig
|
22
|
-
|
23
|
-
|
24
|
-
class Qwen2VLVisionConfig(PretrainedConfig):
|
25
|
-
model_type = "qwen2_vl"
|
26
|
-
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
depth=32,
|
30
|
-
embed_dim=1280,
|
31
|
-
hidden_size=3584,
|
32
|
-
hidden_act="quick_gelu",
|
33
|
-
mlp_ratio=4,
|
34
|
-
num_heads=16,
|
35
|
-
in_channels=3,
|
36
|
-
patch_size=14,
|
37
|
-
spatial_merge_size=2,
|
38
|
-
temporal_patch_size=2,
|
39
|
-
**kwargs,
|
40
|
-
):
|
41
|
-
super().__init__(**kwargs)
|
42
|
-
|
43
|
-
self.depth = depth
|
44
|
-
self.embed_dim = embed_dim
|
45
|
-
self.hidden_size = hidden_size
|
46
|
-
self.hidden_act = hidden_act
|
47
|
-
self.mlp_ratio = mlp_ratio
|
48
|
-
self.num_heads = num_heads
|
49
|
-
self.in_channels = in_channels
|
50
|
-
self.patch_size = patch_size
|
51
|
-
self.spatial_merge_size = spatial_merge_size
|
52
|
-
self.temporal_patch_size = temporal_patch_size
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def from_pretrained(
|
56
|
-
cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
|
57
|
-
) -> "PretrainedConfig":
|
58
|
-
cls._set_token_in_kwargs(kwargs)
|
59
|
-
|
60
|
-
config_dict, kwargs = cls.get_config_dict(
|
61
|
-
pretrained_model_name_or_path, **kwargs
|
62
|
-
)
|
63
|
-
|
64
|
-
if config_dict.get("model_type") == "qwen2_vl":
|
65
|
-
config_dict = config_dict["vision_config"]
|
66
|
-
|
67
|
-
return cls.from_dict(config_dict, **kwargs)
|
68
|
-
|
69
|
-
|
70
|
-
class Qwen2VLConfig(PretrainedConfig):
|
71
|
-
model_type = "qwen2_vl"
|
72
|
-
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
vocab_size=152064,
|
76
|
-
hidden_size=8192,
|
77
|
-
intermediate_size=29568,
|
78
|
-
num_hidden_layers=80,
|
79
|
-
num_attention_heads=64,
|
80
|
-
num_key_value_heads=8,
|
81
|
-
hidden_act="silu",
|
82
|
-
max_position_embeddings=32768,
|
83
|
-
initializer_range=0.02,
|
84
|
-
rms_norm_eps=1e-05,
|
85
|
-
use_cache=True,
|
86
|
-
tie_word_embeddings=False,
|
87
|
-
rope_theta=1000000.0,
|
88
|
-
use_sliding_window=False,
|
89
|
-
sliding_window=4096,
|
90
|
-
max_window_layers=80,
|
91
|
-
attention_dropout=0.0,
|
92
|
-
vision_config=None,
|
93
|
-
rope_scaling=None,
|
94
|
-
**kwargs,
|
95
|
-
):
|
96
|
-
if isinstance(vision_config, dict):
|
97
|
-
self.vision_config = Qwen2VLVisionConfig(**vision_config)
|
98
|
-
elif vision_config is None:
|
99
|
-
self.vision_config = Qwen2VLVisionConfig()
|
100
|
-
|
101
|
-
self.vocab_size = vocab_size
|
102
|
-
self.max_position_embeddings = max_position_embeddings
|
103
|
-
self.hidden_size = hidden_size
|
104
|
-
self.intermediate_size = intermediate_size
|
105
|
-
self.num_hidden_layers = num_hidden_layers
|
106
|
-
self.num_attention_heads = num_attention_heads
|
107
|
-
self.use_sliding_window = use_sliding_window
|
108
|
-
self.sliding_window = sliding_window
|
109
|
-
self.max_window_layers = max_window_layers
|
110
|
-
|
111
|
-
# for backward compatibility
|
112
|
-
if num_key_value_heads is None:
|
113
|
-
num_key_value_heads = num_attention_heads
|
114
|
-
|
115
|
-
self.num_key_value_heads = num_key_value_heads
|
116
|
-
self.hidden_act = hidden_act
|
117
|
-
self.initializer_range = initializer_range
|
118
|
-
self.rms_norm_eps = rms_norm_eps
|
119
|
-
self.use_cache = use_cache
|
120
|
-
self.rope_theta = rope_theta
|
121
|
-
self.attention_dropout = attention_dropout
|
122
|
-
self.rope_scaling = rope_scaling
|
123
|
-
|
124
|
-
# NOTE(HandH1998): This is necessary for configuring the `rope_type`` of qwen2vl models after removing dependencies on vllm.
|
125
|
-
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
126
|
-
if self.rope_scaling["type"] == "mrope":
|
127
|
-
self.rope_scaling["type"] = "default"
|
128
|
-
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
129
|
-
|
130
|
-
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
File without changes
|
File without changes
|
File without changes
|