sglang 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/lang/backend/openai.py +5 -0
- sglang/lang/chat_template.py +22 -7
- sglang/lang/ir.py +1 -0
- sglang/srt/configs/__init__.py +6 -3
- sglang/srt/configs/model_config.py +2 -0
- sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
- sglang/srt/entrypoints/engine.py +16 -1
- sglang/srt/hf_transformers_utils.py +2 -3
- sglang/srt/managers/image_processor.py +217 -122
- sglang/srt/model_executor/forward_batch_info.py +4 -1
- sglang/srt/models/deepseek_nextn.py +295 -0
- sglang/srt/models/deepseek_v2.py +4 -1
- sglang/srt/models/llava.py +2 -1
- sglang/srt/models/qwen2_5_vl.py +722 -0
- sglang/srt/models/qwen2_vl.py +2 -1
- sglang/srt/openai_api/adapter.py +17 -3
- sglang/srt/server_args.py +6 -3
- sglang/srt/speculative/eagle_worker.py +7 -2
- sglang/srt/speculative/spec_info.py +11 -1
- sglang/utils.py +99 -19
- sglang/version.py +1 -1
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/METADATA +2 -2
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/RECORD +26 -24
- sglang/srt/configs/qwen2vl.py +0 -130
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/WHEEL +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/top_level.txt +0 -0
sglang/lang/backend/openai.py
CHANGED
@@ -161,6 +161,10 @@ class OpenAI(BaseBackend):
|
|
161
161
|
prompt = s.text_
|
162
162
|
|
163
163
|
kwargs = sampling_params.to_openai_kwargs()
|
164
|
+
if self.model_name.startswith("o1") or self.model_name.startswith("o3"):
|
165
|
+
kwargs.pop("max_tokens", None)
|
166
|
+
else:
|
167
|
+
kwargs.pop("max_completion_tokens", None)
|
164
168
|
comp = openai_completion(
|
165
169
|
client=self.client,
|
166
170
|
token_usage=self.token_usage,
|
@@ -175,6 +179,7 @@ class OpenAI(BaseBackend):
|
|
175
179
|
), "constrained type not supported on chat model"
|
176
180
|
kwargs = sampling_params.to_openai_kwargs()
|
177
181
|
kwargs.pop("stop")
|
182
|
+
|
178
183
|
comp = openai_completion(
|
179
184
|
client=self.client,
|
180
185
|
token_usage=self.token_usage,
|
sglang/lang/chat_template.py
CHANGED
@@ -353,7 +353,6 @@ register_chat_template(
|
|
353
353
|
)
|
354
354
|
)
|
355
355
|
|
356
|
-
|
357
356
|
register_chat_template(
|
358
357
|
ChatTemplate(
|
359
358
|
name="deepseek-v3",
|
@@ -428,12 +427,15 @@ def match_chat_ml(model_path: str):
|
|
428
427
|
if "tinyllama" in model_path:
|
429
428
|
return get_chat_template("chatml")
|
430
429
|
# Now the suffix for qwen2 chat model is "instruct"
|
431
|
-
if
|
432
|
-
"
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
430
|
+
if "qwen" in model_path and "vl" in model_path:
|
431
|
+
return get_chat_template("qwen2-vl")
|
432
|
+
if "qwen" in model_path:
|
433
|
+
if "vl" in model_path:
|
434
|
+
return get_chat_template("qwen2-vl")
|
435
|
+
if ("chat" in model_path or "instruct" in model_path) and (
|
436
|
+
"llava" not in model_path
|
437
|
+
):
|
438
|
+
return get_chat_template("qwen")
|
437
439
|
if (
|
438
440
|
"llava-v1.6-34b" in model_path
|
439
441
|
or "llava-v1.6-yi-34b" in model_path
|
@@ -443,6 +445,12 @@ def match_chat_ml(model_path: str):
|
|
443
445
|
return get_chat_template("chatml-llava")
|
444
446
|
|
445
447
|
|
448
|
+
@register_chat_template_matching_function
|
449
|
+
def match_chat_minicpm(model_path: str):
|
450
|
+
if "minicpm" in model_path:
|
451
|
+
return get_chat_template("minicpmv")
|
452
|
+
|
453
|
+
|
446
454
|
@register_chat_template_matching_function
|
447
455
|
def match_chat_yi(model_path: str):
|
448
456
|
model_path = model_path.lower()
|
@@ -459,6 +467,13 @@ def match_gemma_it(model_path: str):
|
|
459
467
|
return get_chat_template("gemma-it")
|
460
468
|
|
461
469
|
|
470
|
+
@register_chat_template_matching_function
|
471
|
+
def match_openbmb_minicpm(model_path: str):
|
472
|
+
model_path = model_path.lower()
|
473
|
+
if "minicpm" in model_path:
|
474
|
+
return get_chat_template("minicpmv")
|
475
|
+
|
476
|
+
|
462
477
|
@register_chat_template_matching_function
|
463
478
|
def match_c4ai_command_r(model_path: str):
|
464
479
|
model_path = model_path.lower()
|
sglang/lang/ir.py
CHANGED
@@ -63,6 +63,7 @@ class SglSamplingParams:
|
|
63
63
|
warnings.warn("Regular expression is not supported in the OpenAI backend.")
|
64
64
|
return {
|
65
65
|
"max_tokens": self.max_new_tokens,
|
66
|
+
"max_completion_tokens": self.max_new_tokens,
|
66
67
|
"stop": self.stop or None,
|
67
68
|
"temperature": self.temperature,
|
68
69
|
"top_p": self.top_p,
|
sglang/srt/configs/__init__.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
from sglang.srt.configs.chatglm import ChatGLMConfig
|
2
2
|
from sglang.srt.configs.dbrx import DbrxConfig
|
3
3
|
from sglang.srt.configs.exaone import ExaoneConfig
|
4
|
-
from sglang.srt.configs.
|
4
|
+
from sglang.srt.configs.qwen2_5_vl_config import (
|
5
|
+
Qwen2_5_VLConfig,
|
6
|
+
Qwen2_5_VLVisionConfig,
|
7
|
+
)
|
5
8
|
|
6
9
|
__all__ = [
|
7
10
|
"ExaoneConfig",
|
8
|
-
"Qwen2VLConfig",
|
9
|
-
"Qwen2VLVisionConfig",
|
10
11
|
"ChatGLMConfig",
|
11
12
|
"DbrxConfig",
|
13
|
+
"Qwen2_5_VLConfig",
|
14
|
+
"Qwen2_5_VLVisionConfig",
|
12
15
|
]
|
@@ -98,6 +98,7 @@ class ModelConfig:
|
|
98
98
|
if (
|
99
99
|
"DeepseekV2ForCausalLM" in self.hf_config.architectures
|
100
100
|
or "DeepseekV3ForCausalLM" in self.hf_config.architectures
|
101
|
+
or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
|
101
102
|
):
|
102
103
|
self.head_dim = 256
|
103
104
|
self.attention_arch = AttentionArch.MLA
|
@@ -402,6 +403,7 @@ def is_multimodal_model(model_architectures: List[str]):
|
|
402
403
|
or "LlavaVidForCausalLM" in model_architectures
|
403
404
|
or "MllamaForConditionalGeneration" in model_architectures
|
404
405
|
or "Qwen2VLForConditionalGeneration" in model_architectures
|
406
|
+
or "Qwen2_5_VLForConditionalGeneration" in model_architectures
|
405
407
|
or "MiniCPMV" in model_architectures
|
406
408
|
):
|
407
409
|
return True
|