sglang 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -161,6 +161,10 @@ class OpenAI(BaseBackend):
161
161
  prompt = s.text_
162
162
 
163
163
  kwargs = sampling_params.to_openai_kwargs()
164
+ if self.model_name.startswith("o1") or self.model_name.startswith("o3"):
165
+ kwargs.pop("max_tokens", None)
166
+ else:
167
+ kwargs.pop("max_completion_tokens", None)
164
168
  comp = openai_completion(
165
169
  client=self.client,
166
170
  token_usage=self.token_usage,
@@ -175,6 +179,7 @@ class OpenAI(BaseBackend):
175
179
  ), "constrained type not supported on chat model"
176
180
  kwargs = sampling_params.to_openai_kwargs()
177
181
  kwargs.pop("stop")
182
+
178
183
  comp = openai_completion(
179
184
  client=self.client,
180
185
  token_usage=self.token_usage,
@@ -353,7 +353,6 @@ register_chat_template(
353
353
  )
354
354
  )
355
355
 
356
-
357
356
  register_chat_template(
358
357
  ChatTemplate(
359
358
  name="deepseek-v3",
@@ -428,12 +427,15 @@ def match_chat_ml(model_path: str):
428
427
  if "tinyllama" in model_path:
429
428
  return get_chat_template("chatml")
430
429
  # Now the suffix for qwen2 chat model is "instruct"
431
- if (
432
- "qwen" in model_path
433
- and ("chat" in model_path or "instruct" in model_path)
434
- and ("llava" not in model_path)
435
- ):
436
- return get_chat_template("qwen")
430
+ if "qwen" in model_path and "vl" in model_path:
431
+ return get_chat_template("qwen2-vl")
432
+ if "qwen" in model_path:
433
+ if "vl" in model_path:
434
+ return get_chat_template("qwen2-vl")
435
+ if ("chat" in model_path or "instruct" in model_path) and (
436
+ "llava" not in model_path
437
+ ):
438
+ return get_chat_template("qwen")
437
439
  if (
438
440
  "llava-v1.6-34b" in model_path
439
441
  or "llava-v1.6-yi-34b" in model_path
@@ -443,6 +445,12 @@ def match_chat_ml(model_path: str):
443
445
  return get_chat_template("chatml-llava")
444
446
 
445
447
 
448
+ @register_chat_template_matching_function
449
+ def match_chat_minicpm(model_path: str):
450
+ if "minicpm" in model_path:
451
+ return get_chat_template("minicpmv")
452
+
453
+
446
454
  @register_chat_template_matching_function
447
455
  def match_chat_yi(model_path: str):
448
456
  model_path = model_path.lower()
@@ -459,6 +467,13 @@ def match_gemma_it(model_path: str):
459
467
  return get_chat_template("gemma-it")
460
468
 
461
469
 
470
+ @register_chat_template_matching_function
471
+ def match_openbmb_minicpm(model_path: str):
472
+ model_path = model_path.lower()
473
+ if "minicpm" in model_path:
474
+ return get_chat_template("minicpmv")
475
+
476
+
462
477
  @register_chat_template_matching_function
463
478
  def match_c4ai_command_r(model_path: str):
464
479
  model_path = model_path.lower()
sglang/lang/ir.py CHANGED
@@ -63,6 +63,7 @@ class SglSamplingParams:
63
63
  warnings.warn("Regular expression is not supported in the OpenAI backend.")
64
64
  return {
65
65
  "max_tokens": self.max_new_tokens,
66
+ "max_completion_tokens": self.max_new_tokens,
66
67
  "stop": self.stop or None,
67
68
  "temperature": self.temperature,
68
69
  "top_p": self.top_p,
@@ -1,12 +1,15 @@
1
1
  from sglang.srt.configs.chatglm import ChatGLMConfig
2
2
  from sglang.srt.configs.dbrx import DbrxConfig
3
3
  from sglang.srt.configs.exaone import ExaoneConfig
4
- from sglang.srt.configs.qwen2vl import Qwen2VLConfig, Qwen2VLVisionConfig
4
+ from sglang.srt.configs.qwen2_5_vl_config import (
5
+ Qwen2_5_VLConfig,
6
+ Qwen2_5_VLVisionConfig,
7
+ )
5
8
 
6
9
  __all__ = [
7
10
  "ExaoneConfig",
8
- "Qwen2VLConfig",
9
- "Qwen2VLVisionConfig",
10
11
  "ChatGLMConfig",
11
12
  "DbrxConfig",
13
+ "Qwen2_5_VLConfig",
14
+ "Qwen2_5_VLVisionConfig",
12
15
  ]
@@ -98,6 +98,7 @@ class ModelConfig:
98
98
  if (
99
99
  "DeepseekV2ForCausalLM" in self.hf_config.architectures
100
100
  or "DeepseekV3ForCausalLM" in self.hf_config.architectures
101
+ or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
101
102
  ):
102
103
  self.head_dim = 256
103
104
  self.attention_arch = AttentionArch.MLA
@@ -402,6 +403,7 @@ def is_multimodal_model(model_architectures: List[str]):
402
403
  or "LlavaVidForCausalLM" in model_architectures
403
404
  or "MllamaForConditionalGeneration" in model_architectures
404
405
  or "Qwen2VLForConditionalGeneration" in model_architectures
406
+ or "Qwen2_5_VLForConditionalGeneration" in model_architectures
405
407
  or "MiniCPMV" in model_architectures
406
408
  ):
407
409
  return True