sglang 0.4.9.post1__py3-none-any.whl → 0.4.9.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. sglang/srt/configs/model_config.py +24 -1
  2. sglang/srt/conversation.py +21 -2
  3. sglang/srt/disaggregation/ascend/__init__.py +6 -0
  4. sglang/srt/disaggregation/ascend/conn.py +44 -0
  5. sglang/srt/disaggregation/ascend/transfer_engine.py +58 -0
  6. sglang/srt/disaggregation/mooncake/conn.py +15 -14
  7. sglang/srt/disaggregation/mooncake/transfer_engine.py +17 -8
  8. sglang/srt/disaggregation/utils.py +25 -3
  9. sglang/srt/entrypoints/engine.py +1 -1
  10. sglang/srt/entrypoints/http_server.py +1 -0
  11. sglang/srt/entrypoints/openai/protocol.py +11 -0
  12. sglang/srt/entrypoints/openai/serving_chat.py +7 -0
  13. sglang/srt/function_call/function_call_parser.py +2 -0
  14. sglang/srt/function_call/kimik2_detector.py +220 -0
  15. sglang/srt/hf_transformers_utils.py +18 -0
  16. sglang/srt/jinja_template_utils.py +8 -0
  17. sglang/srt/layers/communicator.py +17 -4
  18. sglang/srt/layers/linear.py +12 -2
  19. sglang/srt/layers/moe/ep_moe/kernels.py +2 -1
  20. sglang/srt/layers/moe/ep_moe/layer.py +2 -1
  21. sglang/srt/layers/moe/fused_moe_triton/layer.py +7 -2
  22. sglang/srt/layers/moe/topk.py +8 -2
  23. sglang/srt/layers/parameter.py +19 -3
  24. sglang/srt/layers/quantization/fp8_kernel.py +2 -2
  25. sglang/srt/layers/quantization/moe_wna16.py +1 -2
  26. sglang/srt/layers/quantization/w8a8_int8.py +738 -14
  27. sglang/srt/managers/io_struct.py +27 -2
  28. sglang/srt/managers/mm_utils.py +55 -94
  29. sglang/srt/managers/schedule_batch.py +16 -5
  30. sglang/srt/managers/scheduler.py +21 -1
  31. sglang/srt/managers/tokenizer_manager.py +16 -0
  32. sglang/srt/mem_cache/memory_pool.py +65 -40
  33. sglang/srt/model_executor/forward_batch_info.py +13 -1
  34. sglang/srt/model_loader/loader.py +23 -12
  35. sglang/srt/models/deepseek_janus_pro.py +1 -1
  36. sglang/srt/models/deepseek_v2.py +62 -17
  37. sglang/srt/models/deepseek_vl2.py +1 -1
  38. sglang/srt/models/gemma3_mm.py +1 -1
  39. sglang/srt/models/gemma3n_mm.py +6 -3
  40. sglang/srt/models/internvl.py +8 -2
  41. sglang/srt/models/kimi_vl.py +8 -2
  42. sglang/srt/models/llama.py +2 -0
  43. sglang/srt/models/llava.py +3 -1
  44. sglang/srt/models/llavavid.py +1 -1
  45. sglang/srt/models/minicpmo.py +1 -2
  46. sglang/srt/models/minicpmv.py +1 -1
  47. sglang/srt/models/mixtral_quant.py +4 -0
  48. sglang/srt/models/mllama4.py +13 -4
  49. sglang/srt/models/phi4mm.py +8 -2
  50. sglang/srt/models/phimoe.py +553 -0
  51. sglang/srt/models/qwen2.py +2 -0
  52. sglang/srt/models/qwen2_5_vl.py +10 -7
  53. sglang/srt/models/qwen2_vl.py +12 -1
  54. sglang/srt/models/vila.py +8 -2
  55. sglang/srt/multimodal/processors/base_processor.py +197 -137
  56. sglang/srt/multimodal/processors/deepseek_vl_v2.py +1 -1
  57. sglang/srt/multimodal/processors/gemma3.py +4 -2
  58. sglang/srt/multimodal/processors/gemma3n.py +1 -1
  59. sglang/srt/multimodal/processors/internvl.py +1 -1
  60. sglang/srt/multimodal/processors/janus_pro.py +1 -1
  61. sglang/srt/multimodal/processors/kimi_vl.py +1 -1
  62. sglang/srt/multimodal/processors/minicpm.py +4 -3
  63. sglang/srt/multimodal/processors/mllama4.py +1 -1
  64. sglang/srt/multimodal/processors/phi4mm.py +1 -1
  65. sglang/srt/multimodal/processors/pixtral.py +1 -1
  66. sglang/srt/multimodal/processors/qwen_vl.py +203 -80
  67. sglang/srt/multimodal/processors/vila.py +1 -1
  68. sglang/srt/server_args.py +11 -4
  69. sglang/srt/utils.py +154 -31
  70. sglang/version.py +1 -1
  71. {sglang-0.4.9.post1.dist-info → sglang-0.4.9.post2.dist-info}/METADATA +4 -3
  72. {sglang-0.4.9.post1.dist-info → sglang-0.4.9.post2.dist-info}/RECORD +75 -70
  73. {sglang-0.4.9.post1.dist-info → sglang-0.4.9.post2.dist-info}/WHEEL +0 -0
  74. {sglang-0.4.9.post1.dist-info → sglang-0.4.9.post2.dist-info}/licenses/LICENSE +0 -0
  75. {sglang-0.4.9.post1.dist-info → sglang-0.4.9.post2.dist-info}/top_level.txt +0 -0
@@ -106,7 +106,7 @@ class PixtralProcessor(BaseMultimodalProcessor):
106
106
  pixel_values=processor_output["pixel_values"],
107
107
  image_sizes=processor_output["image_sizes"],
108
108
  modality=Modality.IMAGE,
109
- image_offsets=image_offsets,
109
+ offsets=image_offsets,
110
110
  )
111
111
  ]
112
112
 
@@ -1,9 +1,13 @@
1
1
  import asyncio
2
2
  import math
3
+ import os
3
4
  import re
4
- from typing import Dict, List, Union
5
+ from typing import List, Union
5
6
 
7
+ import torch
8
+ import torchvision
6
9
  from PIL import Image
10
+ from torchvision.transforms import InterpolationMode
7
11
 
8
12
  from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
9
13
  from sglang.srt.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
@@ -12,6 +16,185 @@ from sglang.srt.multimodal.processors.base_processor import (
12
16
  BaseMultimodalProcessor as SGLangBaseProcessor,
13
17
  )
14
18
  from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
19
+ from sglang.utils import logger
20
+
21
+ IMAGE_FACTOR = 28
22
+ MIN_PIXELS = 4 * 28 * 28
23
+ MAX_PIXELS = 16384 * 28 * 28
24
+ MAX_RATIO = 200
25
+ VIDEO_TOTAL_PIXELS = int(
26
+ float(os.environ.get("VIDEO_MAX_PIXELS", 128000 * 28 * 28 * 0.9))
27
+ )
28
+
29
+ VIDEO_MIN_PIXELS = 128 * 28 * 28
30
+ VIDEO_MAX_PIXELS = 768 * 28 * 28
31
+ FRAME_FACTOR = 2
32
+ FPS = 2.0
33
+ FPS_MIN_FRAMES = 4
34
+ FPS_MAX_FRAMES = 768
35
+
36
+
37
+ def smart_resize(
38
+ height: int,
39
+ width: int,
40
+ factor: int = IMAGE_FACTOR,
41
+ min_pixels: int = MIN_PIXELS,
42
+ max_pixels: int = MAX_PIXELS,
43
+ ) -> tuple[int, int]:
44
+ """
45
+ Rescales the image so that the following conditions are met:
46
+
47
+ 1. Both dimensions (height and width) are divisible by 'factor'.
48
+
49
+ 2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
50
+
51
+ 3. The aspect ratio of the image is maintained as closely as possible.
52
+ """
53
+ if max(height, width) / min(height, width) > MAX_RATIO:
54
+ raise ValueError(
55
+ f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
56
+ )
57
+ h_bar = max(factor, round_by_factor(height, factor))
58
+ w_bar = max(factor, round_by_factor(width, factor))
59
+ if h_bar * w_bar > max_pixels:
60
+ beta = math.sqrt((height * width) / max_pixels)
61
+ h_bar = floor_by_factor(height / beta, factor)
62
+ w_bar = floor_by_factor(width / beta, factor)
63
+ elif h_bar * w_bar < min_pixels:
64
+ beta = math.sqrt(min_pixels / (height * width))
65
+ h_bar = ceil_by_factor(height * beta, factor)
66
+ w_bar = ceil_by_factor(width * beta, factor)
67
+ return h_bar, w_bar
68
+
69
+
70
+ def resize_image(image, size_factor: int = IMAGE_FACTOR) -> Image.Image:
71
+ width, height = image.size
72
+ min_pixels = MIN_PIXELS
73
+ max_pixels = MAX_PIXELS
74
+ resized_height, resized_width = smart_resize(
75
+ height,
76
+ width,
77
+ factor=size_factor,
78
+ min_pixels=min_pixels,
79
+ max_pixels=max_pixels,
80
+ )
81
+ image = image.resize((resized_width, resized_height))
82
+ return image
83
+
84
+
85
+ def round_by_factor(number: int, factor: int) -> int:
86
+ """Returns the closest integer to 'number' that is divisible by 'factor'."""
87
+ return round(number / factor) * factor
88
+
89
+
90
+ def ceil_by_factor(number: int, factor: int) -> int:
91
+ """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
92
+ return math.ceil(number / factor) * factor
93
+
94
+
95
+ def floor_by_factor(number: int, factor: int) -> int:
96
+ """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
97
+ return math.floor(number / factor) * factor
98
+
99
+
100
+ async def resize_image_async(image):
101
+ return resize_image(image)
102
+
103
+
104
+ def smart_nframes(
105
+ ele: dict,
106
+ total_frames: int,
107
+ video_fps: int | float,
108
+ ) -> int:
109
+ """calculate the number of frames for video used for model inputs.
110
+
111
+ Args:
112
+ ele (dict): a dict contains the configuration of video.
113
+ support either `fps` or `nframes`:
114
+ - nframes: the number of frames to extract for model inputs.
115
+ - fps: the fps to extract frames for model inputs.
116
+ - min_frames: the minimum number of frames of the video, only used when fps is provided.
117
+ - max_frames: the maximum number of frames of the video, only used when fps is provided.
118
+ total_frames (int): the original total number of frames of the video.
119
+ video_fps (int | float): the original fps of the video.
120
+
121
+ Raises:
122
+ ValueError: nframes should in interval [FRAME_FACTOR, total_frames].
123
+
124
+ Returns:
125
+ int: the number of frames for video used for model inputs.
126
+ """
127
+ assert not (
128
+ "fps" in ele and "nframes" in ele
129
+ ), "Only accept either `fps` or `nframes`"
130
+ if "nframes" in ele:
131
+ nframes = round_by_factor(ele["nframes"], FRAME_FACTOR)
132
+ else:
133
+ fps = ele.get("fps", FPS)
134
+ min_frames = ceil_by_factor(ele.get("min_frames", FPS_MIN_FRAMES), FRAME_FACTOR)
135
+ max_frames = floor_by_factor(
136
+ ele.get("max_frames", min(FPS_MAX_FRAMES, total_frames)), FRAME_FACTOR
137
+ )
138
+ nframes = total_frames / video_fps * fps
139
+ if nframes > total_frames:
140
+ logger.warning(
141
+ f"smart_nframes: nframes[{nframes}] > total_frames[{total_frames}]"
142
+ )
143
+ nframes = min(min(max(nframes, min_frames), max_frames), total_frames)
144
+ nframes = floor_by_factor(nframes, FRAME_FACTOR)
145
+ if not (FRAME_FACTOR <= nframes and nframes <= total_frames):
146
+ raise ValueError(
147
+ f"nframes should in interval [{FRAME_FACTOR}, {total_frames}], but got {nframes}."
148
+ )
149
+ return nframes
150
+
151
+
152
+ # process video, qwen-specific
153
+ async def preprocess_video(
154
+ vr,
155
+ image_factor: int = IMAGE_FACTOR,
156
+ # vr: VideoReader, image_factor: int = IMAGE_FACTOR
157
+ ) -> torch.Tensor:
158
+ ele = {}
159
+ total_frames, video_fps = len(vr), vr.get_avg_fps()
160
+ nframes = smart_nframes({}, total_frames=total_frames, video_fps=video_fps)
161
+ idx = torch.linspace(0, total_frames - 1, nframes).round().long().tolist()
162
+ video = vr.get_batch(idx).asnumpy()
163
+ video = torch.tensor(video).permute(0, 3, 1, 2) # Convert to TCHW format
164
+ nframes, _, height, width = video.shape
165
+ min_pixels = ele.get("min_pixels", VIDEO_MIN_PIXELS)
166
+ total_pixels = ele.get("total_pixels", VIDEO_TOTAL_PIXELS)
167
+ max_pixels = max(
168
+ min(VIDEO_MAX_PIXELS, total_pixels / nframes * FRAME_FACTOR),
169
+ int(min_pixels * 1.05),
170
+ )
171
+ max_pixels_supposed = ele.get("max_pixels", max_pixels)
172
+ if max_pixels_supposed > max_pixels:
173
+ logger.warning(
174
+ f"The given max_pixels[{max_pixels_supposed}] exceeds limit[{max_pixels}]."
175
+ )
176
+ max_pixels = min(max_pixels_supposed, max_pixels)
177
+ if "resized_height" in ele and "resized_width" in ele:
178
+ resized_height, resized_width = smart_resize(
179
+ ele["resized_height"],
180
+ ele["resized_width"],
181
+ factor=image_factor,
182
+ )
183
+ else:
184
+ resized_height, resized_width = smart_resize(
185
+ height,
186
+ width,
187
+ factor=image_factor,
188
+ min_pixels=min_pixels,
189
+ max_pixels=max_pixels,
190
+ )
191
+ video = torchvision.transforms.functional.resize(
192
+ video,
193
+ [resized_height, resized_width],
194
+ interpolation=InterpolationMode.BICUBIC,
195
+ antialias=True,
196
+ ).float()
197
+ return video
15
198
 
16
199
 
17
200
  # Compatible with Qwen2VL and Qwen2_5VL
@@ -37,104 +220,44 @@ class Qwen2_5VLImageProcessor(SGLangBaseProcessor):
37
220
  self.MIN_PIXELS = 4 * 28 * 28
38
221
  self.MAX_PIXELS = 16384 * 28 * 28
39
222
  self.MAX_RATIO = 200
223
+ # TODO(mick): move all MultimodalSpecialTokens initializations into processor init
224
+ self.mm_special_tokens = MultimodalSpecialTokens(
225
+ image_token=self.IMAGE_TOKEN,
226
+ image_token_regex=self.IMAGE_TOKEN_REGEX,
227
+ video_token=self.VIDEO_TOKEN_ID,
228
+ )
40
229
 
41
230
  async def process_mm_data_async(
42
231
  self,
43
- image_data: List[Union[str, bytes, Dict]],
232
+ image_data: List[Union[str, bytes]],
44
233
  input_text,
45
234
  request_obj,
46
235
  max_req_input_len,
47
236
  *args,
48
237
  **kwargs,
49
238
  ):
239
+
50
240
  base_output = self.load_mm_data(
51
241
  prompt=input_text,
52
242
  image_data=image_data,
53
- multimodal_tokens=MultimodalSpecialTokens(
54
- image_token=self.IMAGE_TOKEN,
55
- image_token_regex=self.IMAGE_TOKEN_REGEX,
56
- ),
243
+ video_data=request_obj.video_data,
244
+ multimodal_tokens=self.mm_special_tokens,
57
245
  max_req_input_len=max_req_input_len,
58
246
  )
59
247
 
60
- def smart_resize(
61
- height: int,
62
- width: int,
63
- factor: int = self.IMAGE_FACTOR,
64
- min_pixels: int = self.MIN_PIXELS,
65
- max_pixels: int = self.MAX_PIXELS,
66
- ) -> tuple[int, int]:
67
- """
68
- Rescales the image so that the following conditions are met:
69
-
70
- 1. Both dimensions (height and width) are divisible by 'factor'.
71
-
72
- 2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
73
-
74
- 3. The aspect ratio of the image is maintained as closely as possible.
75
- """
76
- if max(height, width) / min(height, width) > self.MAX_RATIO:
77
- raise ValueError(
78
- f"absolute aspect ratio must be smaller than {self.MAX_RATIO}, got {max(height, width) / min(height, width)}"
79
- )
80
- h_bar = max(factor, round_by_factor(height, factor))
81
- w_bar = max(factor, round_by_factor(width, factor))
82
- if h_bar * w_bar > max_pixels:
83
- beta = math.sqrt((height * width) / max_pixels)
84
- h_bar = floor_by_factor(height / beta, factor)
85
- w_bar = floor_by_factor(width / beta, factor)
86
- elif h_bar * w_bar < min_pixels:
87
- beta = math.sqrt(min_pixels / (height * width))
88
- h_bar = ceil_by_factor(height * beta, factor)
89
- w_bar = ceil_by_factor(width * beta, factor)
90
- return h_bar, w_bar
91
-
92
- def resize_image(image, size_factor: int = self.IMAGE_FACTOR) -> Image.Image:
93
- width, height = image.size
94
- min_pixels = self.MIN_PIXELS
95
- max_pixels = self.MAX_PIXELS
96
- resized_height, resized_width = smart_resize(
97
- height,
98
- width,
99
- factor=size_factor,
100
- min_pixels=min_pixels,
101
- max_pixels=max_pixels,
102
- )
103
- image = image.resize((resized_width, resized_height))
104
- return image
105
-
106
- def round_by_factor(number: int, factor: int) -> int:
107
- """Returns the closest integer to 'number' that is divisible by 'factor'."""
108
- return round(number / factor) * factor
109
-
110
- def ceil_by_factor(number: int, factor: int) -> int:
111
- """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
112
- return math.ceil(number / factor) * factor
113
-
114
- def floor_by_factor(number: int, factor: int) -> int:
115
- """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
116
- return math.floor(number / factor) * factor
117
-
118
- async def resize_image_async(image):
119
- return resize_image(image)
120
-
121
248
  # Qwen-specific: resize images if they are raw Image objects
122
249
  if base_output.images and isinstance(base_output.images[0], Image.Image):
123
250
  resize_tasks = [resize_image_async(image) for image in base_output.images]
124
251
  base_output.images = await asyncio.gather(*resize_tasks)
125
252
 
126
- video_grid_thw = None # TODO
127
-
128
- mm_items, input_ids = self.process_and_combine_mm_data(base_output)
129
-
130
- if not mm_items:
131
- # Note(Xinyuan): This is the case where image loading fails.
132
- return None
253
+ if base_output.videos:
254
+ base_output.videos = [
255
+ await preprocess_video(video) for video in base_output.videos
256
+ ]
133
257
 
134
- combined_mm_item = mm_items[0] # only image is supported for now
135
- video_grid_thw = None # TODO
136
- second_per_grid_ts = getattr(combined_mm_item, "second_per_grid_ts", None)
258
+ mm_items, input_ids, ret = self.process_and_combine_mm_data(base_output)
137
259
 
260
+ input_ids = input_ids.flatten()
138
261
  mrope_positions, mrope_position_delta = MRotaryEmbedding.get_rope_index(
139
262
  spatial_merge_size=self.hf_config.vision_config.spatial_merge_size,
140
263
  image_token_id=self.IM_TOKEN_ID,
@@ -145,9 +268,9 @@ class Qwen2_5VLImageProcessor(SGLangBaseProcessor):
145
268
  self.hf_config.vision_config, "tokens_per_second", None
146
269
  ),
147
270
  input_ids=input_ids.unsqueeze(0),
148
- image_grid_thw=combined_mm_item.image_grid_thw,
149
- video_grid_thw=video_grid_thw,
150
- second_per_grid_ts=second_per_grid_ts,
271
+ image_grid_thw=getattr(ret, "image_grid_thw", None),
272
+ video_grid_thw=getattr(ret, "video_grid_thw", None),
273
+ second_per_grid_ts=getattr(ret, "second_per_grid_ts", None),
151
274
  )
152
275
  mrope_positions = mrope_positions.squeeze(1)
153
276
 
@@ -57,7 +57,7 @@ class VILAMultimodalProcessor(BaseMultimodalProcessor):
57
57
  image_data=image_data,
58
58
  )
59
59
 
60
- mm_items, input_ids = self.process_and_combine_mm_data(base_output)
60
+ mm_items, input_ids, _ = self.process_and_combine_mm_data(base_output)
61
61
 
62
62
  return {
63
63
  "input_ids": input_ids.tolist(),
sglang/srt/server_args.py CHANGED
@@ -416,7 +416,7 @@ class ServerArgs:
416
416
  if self.enable_dp_lm_head:
417
417
  assert (
418
418
  self.enable_dp_attention
419
- ), "Please enable dp attention when setting enable_dp_attention. "
419
+ ), "Please enable dp attention when setting enable_dp_lm_head. "
420
420
 
421
421
  # DeepEP MoE
422
422
  if self.enable_deepep_moe:
@@ -1048,9 +1048,16 @@ class ServerArgs:
1048
1048
  parser.add_argument(
1049
1049
  "--tool-call-parser",
1050
1050
  type=str,
1051
- choices=["qwen25", "mistral", "llama3", "deepseekv3", "pythonic"],
1051
+ choices=[
1052
+ "qwen25",
1053
+ "mistral",
1054
+ "llama3",
1055
+ "deepseekv3",
1056
+ "pythonic",
1057
+ "kimi_k2",
1058
+ ],
1052
1059
  default=ServerArgs.tool_call_parser,
1053
- help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', 'llama3', 'deepseekv3', and 'pythonic'.",
1060
+ help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', 'llama3', 'deepseekv3', 'pythonic', and 'kimi_k2'.",
1054
1061
  )
1055
1062
 
1056
1063
  # Data parallelism
@@ -1614,7 +1621,7 @@ class ServerArgs:
1614
1621
  "--disaggregation-transfer-backend",
1615
1622
  type=str,
1616
1623
  default=ServerArgs.disaggregation_transfer_backend,
1617
- choices=["mooncake", "nixl"],
1624
+ choices=["mooncake", "nixl", "ascend"],
1618
1625
  help="The backend for disaggregation transfer. Default is mooncake.",
1619
1626
  )
1620
1627
  parser.add_argument(
sglang/srt/utils.py CHANGED
@@ -197,7 +197,7 @@ def get_int_env_var(name: str, default: int = 0) -> int:
197
197
 
198
198
 
199
199
  def support_triton(backend: str) -> bool:
200
- return backend not in ["torch_native", "intel_amx"]
200
+ return backend not in ["torch_native", "intel_amx", "ascend"]
201
201
 
202
202
 
203
203
  try:
@@ -728,33 +728,6 @@ def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarra
728
728
  return audio
729
729
 
730
730
 
731
- def encode_video(video_path, frame_count_limit=None):
732
- # Lazy import because decord is not available on some arm platforms.
733
- from decord import VideoReader, cpu
734
-
735
- if not os.path.exists(video_path):
736
- logger.error(f"Video {video_path} does not exist")
737
- return []
738
-
739
- if frame_count_limit == 0:
740
- return []
741
-
742
- def uniform_sample(l, n):
743
- gap = len(l) / n
744
- idxs = [int(i * gap + gap / 2) for i in range(n)]
745
- return [l[i] for i in idxs]
746
-
747
- vr = VideoReader(video_path, ctx=cpu(0))
748
- sample_fps = round(vr.get_avg_fps() / 1) # FPS
749
- frame_indices = [i for i in range(0, len(vr), sample_fps)]
750
- if frame_count_limit is not None and len(frame_indices) > frame_count_limit:
751
- frame_indices = uniform_sample(frame_indices, frame_count_limit)
752
-
753
- frames = vr.get_batch(frame_indices).asnumpy()
754
- frames = [Image.fromarray(v.astype("uint8")) for v in frames]
755
- return frames
756
-
757
-
758
731
  def load_image(
759
732
  image_file: Union[Image.Image, str, bytes],
760
733
  ) -> tuple[Image.Image, tuple[int, int]]:
@@ -774,9 +747,6 @@ def load_image(
774
747
  elif image_file.startswith("data:"):
775
748
  image_file = image_file.split(",")[1]
776
749
  image = Image.open(BytesIO(pybase64.b64decode(image_file, validate=True)))
777
- elif image_file.startswith("video:"):
778
- image_file = image_file.replace("video:", "")
779
- image, image_size = decode_video_base64(image_file)
780
750
  elif isinstance(image_file, str):
781
751
  image = Image.open(BytesIO(pybase64.b64decode(image_file, validate=True)))
782
752
  else:
@@ -785,6 +755,61 @@ def load_image(
785
755
  return image, image_size
786
756
 
787
757
 
758
+ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
759
+ # We import decord here to avoid a strange Segmentation fault (core dumped) issue.
760
+ from decord import VideoReader, cpu, gpu
761
+
762
+ try:
763
+ from decord.bridge import decord_bridge
764
+
765
+ ctx = gpu(0)
766
+ _ = decord_bridge.get_ctx_device(ctx)
767
+ except Exception:
768
+ ctx = cpu(0)
769
+
770
+ tmp_file = None
771
+ vr = None
772
+ try:
773
+ if isinstance(video_file, bytes):
774
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
775
+ tmp_file.write(video_file)
776
+ tmp_file.close()
777
+ vr = VideoReader(tmp_file.name, ctx=ctx)
778
+ elif isinstance(video_file, str):
779
+ if video_file.startswith(("http://", "https://")):
780
+ timeout = int(os.getenv("REQUEST_TIMEOUT", "10"))
781
+ response = requests.get(video_file, stream=True, timeout=timeout)
782
+ response.raise_for_status()
783
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
784
+ for chunk in response.iter_content(chunk_size=8192):
785
+ tmp_file.write(chunk)
786
+ tmp_file.close()
787
+ vr = VideoReader(tmp_file.name, ctx=ctx)
788
+ elif video_file.startswith("data:"):
789
+ _, encoded = video_file.split(",", 1)
790
+ video_bytes = base64.b64decode(encoded)
791
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
792
+ tmp_file.write(video_bytes)
793
+ tmp_file.close()
794
+ vr = VideoReader(tmp_file.name, ctx=ctx)
795
+ elif os.path.isfile(video_file):
796
+ vr = VideoReader(video_file, ctx=ctx)
797
+ else:
798
+ video_bytes = base64.b64decode(video_file)
799
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
800
+ tmp_file.write(video_bytes)
801
+ tmp_file.close()
802
+ vr = VideoReader(tmp_file.name, ctx=ctx)
803
+ else:
804
+ raise ValueError(f"Unsupported video input type: {type(video_file)}")
805
+
806
+ return vr
807
+
808
+ finally:
809
+ if tmp_file and os.path.exists(tmp_file.name):
810
+ os.unlink(tmp_file.name)
811
+
812
+
788
813
  def suppress_other_loggers():
789
814
  warnings.filterwarnings(
790
815
  "ignore", category=UserWarning, message="The given NumPy array is not writable"
@@ -2757,3 +2782,101 @@ def lru_cache_frozenset(maxsize=128):
2757
2782
  return wrapper
2758
2783
 
2759
2784
  return decorator
2785
+
2786
+
2787
+ def apply_module_patch(target_module, target_function, wrappers):
2788
+ original_module, original_function = parse_module_path(
2789
+ target_module, target_function, False
2790
+ )
2791
+
2792
+ original_function_id = id(original_function)
2793
+
2794
+ candidate = original_function
2795
+ for wrapper in wrappers:
2796
+ candidate = wrapper(candidate)
2797
+ if target_function is not None:
2798
+ setattr(original_module, target_function, candidate)
2799
+
2800
+ for key, value in sys.modules.copy().items():
2801
+ if (
2802
+ target_function is not None
2803
+ and hasattr(value, target_function)
2804
+ and id(getattr(value, target_function)) == original_function_id
2805
+ ):
2806
+ setattr(value, target_function, candidate)
2807
+
2808
+
2809
+ def parse_module_path(module_path, function_name, create_dummy):
2810
+ from importlib.machinery import ModuleSpec
2811
+
2812
+ def create_dummy_module(full_path, parent=None):
2813
+ """Create and register a placeholder module"""
2814
+ dummy = types.ModuleType(full_path)
2815
+ dummy.__file__ = "vllm_ascend.dummy_module.py"
2816
+ dummy.__spec__ = ModuleSpec(full_path, None)
2817
+ sys.modules[full_path] = dummy
2818
+ if parent:
2819
+ setattr(parent, full_path.split(".")[-1], dummy)
2820
+ return dummy
2821
+
2822
+ def create_placeholder_function(func_name):
2823
+ """Create dummy function that raises when called"""
2824
+
2825
+ def placeholder(*args, **kwargs):
2826
+ raise NotImplementedError(f"Function {func_name} is a placeholder")
2827
+
2828
+ placeholder.__name__ = func_name
2829
+ return placeholder
2830
+
2831
+ modules = module_path.split(".")
2832
+ current_module = None
2833
+ processed_path = []
2834
+
2835
+ for idx, part in enumerate(modules):
2836
+ current_path = ".".join(modules[: idx + 1])
2837
+ parent_path = ".".join(modules[:idx]) if idx > 0 else None
2838
+
2839
+ try:
2840
+ current_module = importlib.import_module(current_path)
2841
+ except ModuleNotFoundError:
2842
+ # Handle missing module
2843
+ parent = importlib.import_module(parent_path) if parent_path else None
2844
+ if parent and hasattr(parent, part):
2845
+ # Use existing attribute from parent
2846
+ current_module = getattr(parent, part)
2847
+ # Check for early function resolution
2848
+ if function_name and hasattr(current_module, function_name):
2849
+ return current_module, getattr(current_module, function_name)
2850
+ if function_name and create_dummy:
2851
+ ph_func = create_placeholder_function(function_name)
2852
+ setattr(current_module, function_name, ph_func)
2853
+ return current_module, ph_func
2854
+ if function_name:
2855
+ raise AttributeError(
2856
+ f"Function {function_name} missing in {current_path}"
2857
+ )
2858
+ else:
2859
+ if not create_dummy:
2860
+ raise
2861
+ # Create and register dummy module
2862
+ current_module = create_dummy_module(
2863
+ current_path,
2864
+ parent=(
2865
+ importlib.import_module(parent_path) if parent_path else None
2866
+ ),
2867
+ )
2868
+
2869
+ processed_path.append(part)
2870
+
2871
+ # Final function handling
2872
+ final_module = sys.modules[module_path]
2873
+ if function_name is not None:
2874
+ if not hasattr(final_module, function_name):
2875
+ if create_dummy:
2876
+ ph_func = create_placeholder_function(function_name)
2877
+ setattr(final_module, function_name, ph_func)
2878
+ else:
2879
+ setattr(final_module, function_name, None)
2880
+ return final_module, getattr(final_module, function_name)
2881
+
2882
+ return final_module, None
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.9.post1"
1
+ __version__ = "0.4.9.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.4.9.post1
3
+ Version: 0.4.9.post2
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -242,6 +242,7 @@ Requires-Dist: pynvml; extra == "runtime-common"
242
242
  Requires-Dist: pybase64; extra == "runtime-common"
243
243
  Requires-Dist: python-multipart; extra == "runtime-common"
244
244
  Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
245
+ Requires-Dist: sentencepiece; extra == "runtime-common"
245
246
  Requires-Dist: soundfile==0.13.1; extra == "runtime-common"
246
247
  Requires-Dist: scipy; extra == "runtime-common"
247
248
  Requires-Dist: torchao==0.9.0; extra == "runtime-common"
@@ -249,10 +250,10 @@ Requires-Dist: transformers==4.53.0; extra == "runtime-common"
249
250
  Requires-Dist: timm==1.0.16; extra == "runtime-common"
250
251
  Requires-Dist: uvicorn; extra == "runtime-common"
251
252
  Requires-Dist: uvloop; extra == "runtime-common"
252
- Requires-Dist: xgrammar==0.1.20; extra == "runtime-common"
253
+ Requires-Dist: xgrammar==0.1.21; extra == "runtime-common"
253
254
  Provides-Extra: srt
254
255
  Requires-Dist: sglang[runtime_common]; extra == "srt"
255
- Requires-Dist: sgl-kernel==0.2.4; extra == "srt"
256
+ Requires-Dist: sgl-kernel==0.2.5; extra == "srt"
256
257
  Requires-Dist: torch==2.7.1; extra == "srt"
257
258
  Requires-Dist: torchaudio==2.7.1; extra == "srt"
258
259
  Requires-Dist: torchvision==0.22.1; extra == "srt"