sglang 0.5.2rc0__py3-none-any.whl → 0.5.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. sglang/srt/configs/model_config.py +2 -1
  2. sglang/srt/distributed/parallel_state.py +3 -1
  3. sglang/srt/entrypoints/engine.py +1 -1
  4. sglang/srt/layers/moe/cutlass_w4a8_moe.py +1 -9
  5. sglang/srt/layers/moe/ep_moe/layer.py +2 -7
  6. sglang/srt/layers/moe/fused_moe_triton/__init__.py +5 -3
  7. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +5 -1048
  8. sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +212 -0
  9. sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +796 -0
  10. sglang/srt/layers/moe/fused_moe_triton/layer.py +5 -2
  11. sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py +87 -0
  12. sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +8 -0
  13. sglang/srt/layers/quantization/w4afp8.py +30 -25
  14. sglang/srt/managers/detokenizer_manager.py +0 -34
  15. sglang/srt/managers/multi_tokenizer_mixin.py +44 -6
  16. sglang/srt/managers/scheduler.py +3 -0
  17. sglang/srt/mem_cache/hiradix_cache.py +19 -3
  18. sglang/srt/mem_cache/memory_pool_host.py +2 -0
  19. sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py +61 -34
  20. sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +27 -6
  21. sglang/srt/models/deepseek_v2.py +5 -0
  22. sglang/srt/models/gpt_oss.py +5 -4
  23. sglang/srt/models/longcat_flash.py +26 -15
  24. sglang/srt/models/longcat_flash_nextn.py +23 -15
  25. sglang/srt/utils.py +0 -10
  26. sglang/test/test_cutlass_w4a8_moe.py +24 -9
  27. sglang/version.py +1 -1
  28. {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/METADATA +2 -2
  29. {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/RECORD +32 -29
  30. {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/WHEEL +0 -0
  31. {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/licenses/LICENSE +0 -0
  32. {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/top_level.txt +0 -0
@@ -113,6 +113,8 @@ def synchronized():
113
113
 
114
114
 
115
115
  class HiCacheHF3FS(HiCacheStorage):
116
+ """HiCache backend that stores KV cache pages in HF3FS files."""
117
+
116
118
  default_env_var: str = "SGLANG_HICACHE_HF3FS_CONFIG_PATH"
117
119
 
118
120
  def __init__(
@@ -176,15 +178,32 @@ class HiCacheHF3FS(HiCacheStorage):
176
178
  dtype: torch.dtype,
177
179
  storage_config: HiCacheStorageConfig = None,
178
180
  ) -> "HiCacheHF3FS":
181
+ """Create a HiCacheHF3FS instance from environment configuration.
182
+
183
+ Environment:
184
+ - Uses env var stored in `HiCacheHF3FS.default_env_var` to locate a JSON config.
185
+ - Falls back to a local single-machine config when the env var is not set.
186
+
187
+ Raises:
188
+ ValueError: If MLA Model is requested without global metadata server or required keys are missing.
189
+ """
179
190
  from sglang.srt.mem_cache.storage.hf3fs.mini_3fs_metadata_server import (
180
191
  Hf3fsGlobalMetadataClient,
181
192
  Hf3fsLocalMetadataClient,
182
193
  )
183
194
 
184
- rank = storage_config.tp_rank if storage_config is not None else 0
195
+ if storage_config is not None:
196
+ rank, is_mla_model = storage_config.tp_rank, storage_config.is_mla_model
197
+ else:
198
+ rank, is_mla_model = 0, False
199
+
200
+ mla_unsupported_msg = f"MLA model is not supported without global metadata server, please refer to https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/mem_cache/storage/hf3fs/docs/deploy_sglang_3fs_multinode.md"
185
201
 
186
202
  config_path = os.getenv(HiCacheHF3FS.default_env_var)
187
203
  if not config_path:
204
+ if is_mla_model:
205
+ raise ValueError(mla_unsupported_msg)
206
+
188
207
  return HiCacheHF3FS(
189
208
  rank=rank,
190
209
  file_path=f"/data/hicache.{rank}.bin",
@@ -214,25 +233,27 @@ class HiCacheHF3FS(HiCacheStorage):
214
233
  raise ValueError(f"Missing required keys in config: {missing_keys}")
215
234
 
216
235
  # Choose metadata client based on configuration
217
- is_mla_model = False
218
- if "metadata_server_url" in config and config["metadata_server_url"]:
236
+ if config.get("metadata_server_url"):
219
237
  # Use global metadata client to connect to metadata server
220
238
  metadata_server_url = config["metadata_server_url"]
221
239
  metadata_client = Hf3fsGlobalMetadataClient(metadata_server_url)
222
240
 
223
- # Enable MLA optimization only when using the global metadata client
224
- is_mla_model = storage_config.is_mla_model if storage_config else False
225
241
  logger.info(
226
242
  f"Using global metadata client with server url: {metadata_server_url}"
227
243
  )
228
244
  else:
245
+ # Enable MLA optimization only when using the global metadata client
246
+ if is_mla_model:
247
+ raise ValueError(mla_unsupported_msg)
248
+
229
249
  # Use local metadata client for single-machine deployment
230
250
  metadata_client = Hf3fsLocalMetadataClient()
231
251
 
252
+ rank_for_path = 0 if is_mla_model else rank
232
253
  return HiCacheHF3FS(
233
254
  rank=rank,
234
255
  # Let all ranks use the same file path for MLA model
235
- file_path=f"{config['file_path_prefix']}.{rank if not is_mla_model else 0}.bin",
256
+ file_path=f"{config['file_path_prefix']}.{rank_for_path}.bin",
236
257
  file_size=int(config["file_size"]),
237
258
  numjobs=int(config["numjobs"]),
238
259
  bytes_per_page=bytes_per_page,
@@ -2185,6 +2185,8 @@ class DeepseekV2ForCausalLM(nn.Module):
2185
2185
  disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
2186
2186
  elif get_moe_expert_parallel_world_size() > 1:
2187
2187
  disable_reason = "Deepseek V3/R1 can not use shared experts fusion optimization under expert parallelism."
2188
+ elif self.quant_config.get_name() == "w4afp8":
2189
+ disable_reason = "Deepseek V3/R1 W4AFP8 model uses different quant method for routed experts and shared experts."
2188
2190
 
2189
2191
  if disable_reason is not None:
2190
2192
  global_server_args_dict["disable_shared_experts_fusion"] = True
@@ -2496,6 +2498,9 @@ class DeepseekV2ForCausalLM(nn.Module):
2496
2498
  ckpt_up_proj_name="up_proj",
2497
2499
  num_experts=self.config.n_routed_experts + self.num_fused_shared_experts,
2498
2500
  )
2501
+ # Params for special naming rules in mixed-precision models, for example:
2502
+ # model.layers.xx.mlp.experts.xx.w1.input_scale. For details,
2503
+ # see https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/blob/main.
2499
2504
  if self.quant_config and self.quant_config.get_name() == "w4afp8":
2500
2505
  expert_params_mapping += FusedMoE.make_expert_input_scale_params_mapping(
2501
2506
  num_experts=self.config.n_routed_experts
@@ -193,8 +193,9 @@ class GptOssSparseMoeBlock(nn.Module):
193
193
  return ans
194
194
 
195
195
 
196
- def _enable_fused_set_kv_buffer():
197
- return _is_cuda
196
+ def _enable_fused_set_kv_buffer(forward_batch: ForwardBatch):
197
+ """Enable fused set_kv_buffer only on CUDA with bfloat16 KV cache."""
198
+ return _is_cuda and forward_batch.token_to_kv_pool.dtype == torch.bfloat16
198
199
 
199
200
 
200
201
  # TODO maybe move to a model-common utils
@@ -341,7 +342,7 @@ class GptOssAttention(nn.Module):
341
342
  layer=self.attn,
342
343
  forward_batch=forward_batch,
343
344
  )
344
- if _enable_fused_set_kv_buffer()
345
+ if _enable_fused_set_kv_buffer(forward_batch)
345
346
  else None
346
347
  ),
347
348
  )
@@ -355,7 +356,7 @@ class GptOssAttention(nn.Module):
355
356
  attn_output = self.attn(
356
357
  *inner_state,
357
358
  sinks=self.sinks,
358
- save_kv_cache=not _enable_fused_set_kv_buffer(),
359
+ save_kv_cache=not _enable_fused_set_kv_buffer(forward_batch),
359
360
  )
360
361
  output, _ = self.o_proj(attn_output)
361
362
  return output
@@ -651,9 +651,6 @@ class LongcatFlashForCausalLM(nn.Module):
651
651
  ).T
652
652
  else:
653
653
  w = self_attn.kv_b_proj.weight
654
- # NOTE(HandH1998): Since `bmm_fp8` only supports per-tensor scale, we have to requantize `self_attn.kv_b_proj`.
655
- # This may affect the accuracy of fp8 model.
656
- # Fix deepseek v3 blockwise bmm by using deep_gemm
657
654
  use_deep_gemm_bmm = False
658
655
 
659
656
  if w.dtype in (
@@ -790,6 +787,9 @@ class LongcatFlashForCausalLM(nn.Module):
790
787
  self.config.hidden_size / self.config.kv_lora_rank
791
788
  ) ** 0.5
792
789
 
790
+ # TODO(linguoyuan) EPMoE not support DEEPGEMM_BLACKWELL, DeepEP needs to be supported in the future
791
+ deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0 = False
792
+
793
793
  if (
794
794
  deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
795
795
  and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
@@ -804,24 +804,35 @@ class LongcatFlashForCausalLM(nn.Module):
804
804
  for layer_id in range(self.config.num_hidden_layers):
805
805
  layer = self.model.layers[layer_id]
806
806
  for i in range(2):
807
- for module in [
808
- layer.self_attn[i].fused_qkv_a_proj_with_mqa,
809
- layer.self_attn[i].q_b_proj,
810
- layer.self_attn[i].kv_b_proj,
811
- layer.self_attn[i].o_proj,
812
- ]:
813
- requant_weight_ue8m0_inplace(
814
- module.weight, module.weight_scale_inv, weight_block_size
815
- )
807
+ self_attn = layer.self_attn[i]
808
+ module_list = [
809
+ self_attn.kv_b_proj,
810
+ self_attn.o_proj,
811
+ ]
812
+
813
+ if self.config.q_lora_rank is not None:
814
+ module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
815
+ module_list.append(self_attn.q_b_proj)
816
+ else:
817
+ module_list.append(self_attn.kv_a_proj_with_mqa)
818
+ module_list.append(self_attn.q_proj)
819
+
820
+ for module in module_list:
821
+ if hasattr(module, "weight_scale_inv"):
822
+ requant_weight_ue8m0_inplace(
823
+ module.weight, module.weight_scale_inv, weight_block_size
824
+ )
825
+
816
826
  mlp = layer.mlps[i]
817
827
  assert isinstance(mlp, LongcatFlashMLP)
818
828
  for module in [
819
829
  mlp.gate_up_proj,
820
830
  mlp.down_proj,
821
831
  ]:
822
- requant_weight_ue8m0_inplace(
823
- module.weight, module.weight_scale_inv, weight_block_size
824
- )
832
+ if hasattr(module, "weight_scale_inv"):
833
+ requant_weight_ue8m0_inplace(
834
+ module.weight, module.weight_scale_inv, weight_block_size
835
+ )
825
836
 
826
837
  for layer_id in range(self.config.num_hidden_layers):
827
838
  experts = layer.mlp.experts
@@ -344,9 +344,6 @@ class LongcatFlashForCausalLMNextN(LongcatFlashForCausalLM):
344
344
  ).T
345
345
  else:
346
346
  w = self_attn.kv_b_proj.weight
347
- # NOTE(HandH1998): Since `bmm_fp8` only supports per-tensor scale, we have to requantize `self_attn.kv_b_proj`.
348
- # This may affect the accuracy of fp8 model.
349
- # Fix deepseek v3 blockwise bmm by using deep_gemm
350
347
  use_deep_gemm_bmm = False
351
348
  if w.dtype in (
352
349
  torch.float8_e4m3fn,
@@ -480,24 +477,35 @@ class LongcatFlashForCausalLMNextN(LongcatFlashForCausalLM):
480
477
  def _weight_requant_ue8m0(self):
481
478
  weight_block_size = self.quant_config.weight_block_size
482
479
  layer = self.model.decoder
483
- for module in [
484
- layer.self_attn.fused_qkv_a_proj_with_mqa,
485
- layer.self_attn.q_b_proj,
486
- layer.self_attn.kv_b_proj,
487
- layer.self_attn.o_proj,
488
- ]:
489
- requant_weight_ue8m0_inplace(
490
- module.weight, module.weight_scale_inv, weight_block_size
491
- )
480
+ self_attn = layer.self_attn
481
+ module_list = [
482
+ self_attn.kv_b_proj,
483
+ self_attn.o_proj,
484
+ ]
485
+
486
+ if self.config.q_lora_rank is not None:
487
+ module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
488
+ module_list.append(self_attn.q_b_proj)
489
+ else:
490
+ module_list.append(self_attn.kv_a_proj_with_mqa)
491
+ module_list.append(self_attn.q_proj)
492
+
493
+ for module in module_list:
494
+ if hasattr(module, "weight_scale_inv"):
495
+ requant_weight_ue8m0_inplace(
496
+ module.weight, module.weight_scale_inv, weight_block_size
497
+ )
498
+
492
499
  mlp = layer.mlps
493
500
  assert isinstance(mlp, LongcatFlashMLP)
494
501
  for module in [
495
502
  mlp.gate_up_proj,
496
503
  mlp.down_proj,
497
504
  ]:
498
- requant_weight_ue8m0_inplace(
499
- module.weight, module.weight_scale_inv, weight_block_size
500
- )
505
+ if hasattr(module, "weight_scale_inv"):
506
+ requant_weight_ue8m0_inplace(
507
+ module.weight, module.weight_scale_inv, weight_block_size
508
+ )
501
509
 
502
510
  def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
503
511
  stacked_params_mapping = [
sglang/srt/utils.py CHANGED
@@ -2787,16 +2787,6 @@ def lru_cache_frozenset(maxsize=128):
2787
2787
  return decorator
2788
2788
 
2789
2789
 
2790
- def get_worker_ids_from_req_rids(rids):
2791
- if isinstance(rids, list):
2792
- worker_ids = [int(rid.split("_")[0]) for rid in rids]
2793
- elif isinstance(rids, str):
2794
- worker_ids = [int(rids.split("_")[0])]
2795
- else:
2796
- worker_ids = []
2797
- return worker_ids
2798
-
2799
-
2800
2790
  def get_origin_rid(rid):
2801
2791
  return rid.split("_", 1)[1] if "_" in rid else rid
2802
2792
 
@@ -1,6 +1,6 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
 
3
- from typing import Optional
3
+ from typing import Literal, Optional
4
4
 
5
5
  import pytest
6
6
  import torch
@@ -25,7 +25,7 @@ def pack_int4_values_to_int8(int4_values_interleaved: torch.Tensor) -> torch.Ten
25
25
  return packed_tensor.to(torch.int8)
26
26
 
27
27
 
28
- def pack_interleave(num_experts, ref_weight, ref_scale):
28
+ def pack_interleave(num_experts, ref_weight, ref_scale, alignment=4):
29
29
  n, k = ref_weight.shape[1], ref_weight.shape[2]
30
30
 
31
31
  weight = pack_int4_values_to_int8(ref_weight.cpu()).cuda()
@@ -33,11 +33,16 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
33
33
  w_q = w_q.contiguous()
34
34
 
35
35
  scale_interleaved = ref_scale.reshape(
36
- ref_scale.shape[0], ref_scale.shape[1], (ref_scale.shape[2] // 4), 4
36
+ ref_scale.shape[0],
37
+ ref_scale.shape[1],
38
+ (ref_scale.shape[2] // alignment),
39
+ alignment,
37
40
  ) # [E, N, K/4, 4]
38
41
  scale_interleaved = scale_interleaved.permute(0, 2, 1, 3) # [E, K/4, N, 4]
39
42
  scale_interleaved = scale_interleaved.reshape(
40
- ref_scale.shape[0], ref_scale.shape[2] // 4, ref_scale.shape[1] * 4
43
+ ref_scale.shape[0],
44
+ ref_scale.shape[2] // alignment,
45
+ ref_scale.shape[1] * alignment,
41
46
  ) # [E, K/4, N*4]
42
47
  w_scale = scale_interleaved.contiguous()
43
48
 
@@ -48,12 +53,17 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
48
53
  @pytest.mark.parametrize("N", [2048])
49
54
  @pytest.mark.parametrize("K", [7168])
50
55
  @pytest.mark.parametrize("E", [256])
51
- @pytest.mark.parametrize("ep_size", [8])
56
+ @pytest.mark.parametrize("tp_size", [8])
57
+ @pytest.mark.parametrize("use_ep_moe", [True, False])
52
58
  @pytest.mark.parametrize("topk", [8])
53
59
  @pytest.mark.parametrize("group_size", [128])
54
60
  @pytest.mark.parametrize("dtype", [torch.bfloat16])
55
- def test_cutlass_w4a8_moe(M, N, K, E, ep_size, topk, group_size, dtype):
56
- local_e = E // ep_size
61
+ def test_cutlass_w4a8_moe(M, N, K, E, tp_size, use_ep_moe, topk, group_size, dtype):
62
+ if use_ep_moe:
63
+ local_e = E // tp_size
64
+ else: # tp mode
65
+ local_e = E
66
+ N = N // tp_size
57
67
 
58
68
  debug = False
59
69
  if debug:
@@ -87,7 +97,10 @@ def test_cutlass_w4a8_moe(M, N, K, E, ep_size, topk, group_size, dtype):
87
97
  )
88
98
 
89
99
  w1_q, w1_scale = pack_interleave(local_e, ref_weight_1, scale_1)
90
- w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2)
100
+ if use_ep_moe:
101
+ w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2)
102
+ else:
103
+ w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2, 1)
91
104
 
92
105
  device = "cuda"
93
106
  a_strides1 = torch.full((local_e, 3), K, device=device, dtype=torch.int64)
@@ -265,7 +278,9 @@ def ref(
265
278
 
266
279
  gate, fc1 = fc1.chunk(2, dim=-1)
267
280
  fc1 = fc1 * torch.nn.functional.silu(gate)
268
- act = (fc1 / pre_quant_scale_2.float()).to(torch.float8_e4m3fn)
281
+ act = torch.clamp((fc1 / pre_quant_scale_2.float()), -448.0, 448.0).to(
282
+ torch.float8_e4m3fn
283
+ )
269
284
  act = act.to(dtype)
270
285
 
271
286
  w2 = ref_weight_2[e_idx]
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.5.2rc0"
1
+ __version__ = "0.5.2rc1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.5.2rc0
3
+ Version: 0.5.2rc1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -257,7 +257,7 @@ Requires-Dist: uvloop; extra == "runtime-common"
257
257
  Requires-Dist: xgrammar==0.1.23; extra == "runtime-common"
258
258
  Provides-Extra: srt
259
259
  Requires-Dist: sglang[runtime_common]; extra == "srt"
260
- Requires-Dist: sgl-kernel==0.3.7.post1; extra == "srt"
260
+ Requires-Dist: sgl-kernel==0.3.8; extra == "srt"
261
261
  Requires-Dist: torch==2.8.0; extra == "srt"
262
262
  Requires-Dist: torchaudio==2.8.0; extra == "srt"
263
263
  Requires-Dist: torchvision; extra == "srt"
@@ -9,7 +9,7 @@ sglang/global_config.py,sha256=ZMTux_PsGnvkyJ0kTFwhTdbnFwIjjpGDogut_9Lu4Vo,1732
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/profiler.py,sha256=JCpZzlDhahoiPlPi5IG3n7GFrQHxfHEB6ELie3Ck55w,4397
11
11
  sglang/utils.py,sha256=dC2PNkKYTgDHsNrWdZJ74GvaXGSHCeIk_aZ-TA89OhY,16380
12
- sglang/version.py,sha256=wVQ3e58PHT2hhCLFcOLsyR4-dgdre7yd49m4mJK4CO8,25
12
+ sglang/version.py,sha256=Yk9OPhzYNwVtFzj5deHSdkGtcxF0FXLnKJ9OFFikW9M,25
13
13
  sglang/eval/llama3_eval.py,sha256=mLNRZJIqV4CfqrY8UGnJEcHw2Xsyr1eyYZgFSUFYr1g,9997
14
14
  sglang/eval/loogle_eval.py,sha256=-CC2s2kh5qUoDrHRkQVkC_jNvBgNojXbf456ny5s78s,4557
15
15
  sglang/lang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
@@ -46,7 +46,7 @@ sglang/srt/reasoning_parser.py,sha256=HEWAeFzPA_Jn3a44BYCz61QNV6kAvX46Y0tR8csAUg
46
46
  sglang/srt/server_args.py,sha256=qEh8ykOglDMHh3GvyUhG0oOSJq_tH8vUYtDzSukoOtk,104043
47
47
  sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
48
48
  sglang/srt/two_batch_overlap.py,sha256=UykF5nC2rja3Hvmu0D9glqKdVRIEhQGPV84Jm7veopQ,34150
49
- sglang/srt/utils.py,sha256=K0GxDnxVKs5LeqbCiphtGpGd5w4kPzzQsqgHkT1e0Ic,95522
49
+ sglang/srt/utils.py,sha256=3qKij1k6uj0Ch-gErdsfXJFUiCU941L0ePoqnIqxZvk,95252
50
50
  sglang/srt/warmup.py,sha256=zldxhMlXpclRAJXmfBjJNUJd1eDizVdysibBvQyTVuA,1782
51
51
  sglang/srt/configs/__init__.py,sha256=3GdmJ2DUiNq1zNs3yOILwZzL0J8fK-h8k2P5YVgxEI0,833
52
52
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -60,7 +60,7 @@ sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew
60
60
  sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
61
61
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
62
62
  sglang/srt/configs/longcat_flash.py,sha256=Qp25xJVLq2K72Z80cXhcJxtqhagAdiPySDoevuT0Sno,3589
63
- sglang/srt/configs/model_config.py,sha256=n28KH8pqOqQNyyOwvYEXpkDZ1z8tzPe1sfV4dGmPrHc,31607
63
+ sglang/srt/configs/model_config.py,sha256=0oEbC4bxtfPckBuY_p6uXHB1vDXxhkvJCpr9cPbExwI,31712
64
64
  sglang/srt/configs/step3_vl.py,sha256=_Otgnym57DVgB_kZ__8c1_Ys5gSalA_K0ZuVjcG51T0,4845
65
65
  sglang/srt/configs/update_config.py,sha256=GEf-XhL8JPrbX9-Hz8V7S3M6YTg76DVdIhc_4YdMDtc,6291
66
66
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
@@ -108,7 +108,7 @@ sglang/srt/disaggregation/nixl/conn.py,sha256=eSof87fG21Dd4COszfnbeXIxne3TWvw0mS
108
108
  sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
109
109
  sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
110
110
  sglang/srt/distributed/naive_distributed.py,sha256=5Kcfapzz61G3TtScTZrHoWa4bf6Vr27GlMcBAGMz7tQ,3260
111
- sglang/srt/distributed/parallel_state.py,sha256=RnondGDsfsbZxi2o8H1UivaoOMqDHiKVtDVb9HuOW44,65704
111
+ sglang/srt/distributed/parallel_state.py,sha256=NsWEw341ew7dElC9BQ3vBLzaLVTDKCmCkKIl37b72dg,65717
112
112
  sglang/srt/distributed/utils.py,sha256=aaCxATncLGnVgB0WlGpBdee0behKW8Dy_dakqcuKSaQ,8497
113
113
  sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
114
114
  sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=Q1kkKPKFPV0QMmKLyjOBlOnX8-Pr4UeGBZYkG6j0gc0,16570
@@ -124,7 +124,7 @@ sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=IrSrnpZnii0E
124
124
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
125
125
  sglang/srt/entrypoints/EngineBase.py,sha256=yKN76witT2jz1zhmLHmPNLGMpK2UiOTaKQ2KPD8l99U,2594
126
126
  sglang/srt/entrypoints/context.py,sha256=aD-94xkD0komuGO5gtYUoJKCHdc4hAipMxQt04yVRGA,8030
127
- sglang/srt/entrypoints/engine.py,sha256=7CBZly2Z1ekrBNbGHS187Yb0pj-Uuqw813s7oLrci8E,33496
127
+ sglang/srt/entrypoints/engine.py,sha256=wNEYxQTVFHt9EvMzQr5zutX9Cb7RDDn64c8Xckuwhsg,33490
128
128
  sglang/srt/entrypoints/harmony_utils.py,sha256=01T-A5GBUm2b306PcxNEg2rfx4cykBcqNYrzcXTWBlc,13590
129
129
  sglang/srt/entrypoints/http_server.py,sha256=_GEk6RgxlMWYUNXOx9he2OIFOs1-Qan1NrSm0EAGJ3M,49649
130
130
  sglang/srt/entrypoints/http_server_engine.py,sha256=_--j4U04OeJLlnnv1f0XmCd_Ry0z1FlhkrbePX8rYV0,4938
@@ -220,7 +220,7 @@ sglang/srt/layers/attention/wave_ops/prefill_attention.py,sha256=viTUit0rxjVV5Ua
220
220
  sglang/srt/layers/moe/__init__.py,sha256=63TxUpSiUpVg1SDY1zdlTg3WFJzAc7WSndOViOmUv4E,835
221
221
  sglang/srt/layers/moe/cutlass_moe.py,sha256=JKJED-4709ndP5AwhQ7Vi04GJjw5d9Xl_mWOsZPZ3U4,14298
222
222
  sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
223
- sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=rWyHMg0kYZhIdLWTJeC7pqRG9ywfeKqi47-OekNeF4Y,7306
223
+ sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=pUvYkbm3kD5IBjVuJeehU_hvEvdNcYcO2eRZXDc6iLI,7005
224
224
  sglang/srt/layers/moe/fused_moe_native.py,sha256=8SAToE4B-22H5JsENZgJ1Io6QfE5-D9ItWLtbGksFQQ,3372
225
225
  sglang/srt/layers/moe/rocm_moe_utils.py,sha256=07Z99bTV3B-b2Cbm-odhGpx2twxtnVpYvaDMBE1K3LM,4555
226
226
  sglang/srt/layers/moe/router.py,sha256=eUNu_Uz5VB2FOZzZyYuZo5pokCVBS17_fcjHQbmvDSE,12181
@@ -228,10 +228,13 @@ sglang/srt/layers/moe/topk.py,sha256=r8pE6eJ8dprfZxaw8VcfrRvSp6_xDNPkr0tajmG_CZc
228
228
  sglang/srt/layers/moe/utils.py,sha256=omh9E6sF-KtrELEsE9y01Ash7FfQlstyE-97UtpH8qk,5998
229
229
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
230
230
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=Acg6dW-zVDQ95vuBVuzxb5SUFFxhLCewk_tVSZeuma8,46158
231
- sglang/srt/layers/moe/ep_moe/layer.py,sha256=izQx9iy51r8nTla8_yfEnfQUzaW07RdIskXh2rJWAf8,27969
232
- sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=4NXZHbCw-G-uSnNUj4up0yh3xBDPnT-x0pdoIr0lku8,831
233
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=JKp89Cdl8a85tKzmJa4ah5VtxZzRNBSbY4cnsAtnvio,60803
234
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=jub7moptP9UjyRYJ2gPvY8HEnE41d_oFbZgjVvNsJxQ,39409
231
+ sglang/srt/layers/moe/ep_moe/layer.py,sha256=uAHuiAILb3XQi9t1JGMElfzvsG9cKIDMXs6ezEGhGvY,27776
232
+ sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=tis0ZJmih7gKHCurbLtY_o-bY3K4MOzQLYLC3ftIOf0,977
233
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=q_L-eZh9pBP3KZLipv9cRe2WpezNrHMz8NzMy97TV0Q,23178
234
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py,sha256=fFnRcXA6r0gnD_7EQmb8NacQJRJ8YHsmMZw2khlClTQ,7687
235
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py,sha256=z10ZJLrFFhgB5_cEFrvMt6bnwe46T5Vzz4nvMl4ErGU,27776
236
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=TI3p8FOJJjpoyE7VMngs9BPeiiN9M9XHZJRWVTWtiH0,39637
237
+ sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py,sha256=U93mxPPU2RP7d3QRSvoG3OCtUQIK8YhQfHf1ZGeREmY,3284
235
238
  sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=Ai06BZ7uxMnk0nPWQelgvi1rV9Z72FetRo6p7E3rsYs,10986
236
239
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
237
240
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
@@ -455,7 +458,7 @@ sglang/srt/layers/quantization/petit_utils.py,sha256=-gy4zMhqNoIA1R0n7-5C0efV54j
455
458
  sglang/srt/layers/quantization/qoq.py,sha256=jM96uJfKz3vo1B6SSIrQXNT3vnD3UJrepK2eY-tSQU4,8139
456
459
  sglang/srt/layers/quantization/unquant.py,sha256=CxsOEYqYIfanKHo0ooFArbVz2ueX3vBWnYHE8gtnzdQ,12996
457
460
  sglang/srt/layers/quantization/utils.py,sha256=d4eaS4-Z4q3GRgb4HDMr2EoWEdQCefrVdJufK6n_NQY,18509
458
- sglang/srt/layers/quantization/w4afp8.py,sha256=345w-gCDes3dGqCUAmjyzXZemsIEWc_swJ7FyJyzFTI,11392
461
+ sglang/srt/layers/quantization/w4afp8.py,sha256=Cm_KjJQu-XKqSKh6usJ5tNXARm6F5n_C2P1c1NuYdC0,11720
459
462
  sglang/srt/layers/quantization/w8a8_fp8.py,sha256=wzJi5jeTnbf-01iehOVTTCu_262rlMh9AQ2rogKWBmo,9981
460
463
  sglang/srt/layers/quantization/w8a8_int8.py,sha256=cOKbhWxFlv6hw5dSs8ExPBXFcTwudwR26m3CAoOhCSs,35342
461
464
  sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -619,7 +622,7 @@ sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a
619
622
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=FImA-TJ_tQDjqwoNWxS--sRDoKDXf9gamlME3tkxH58,3252
620
623
  "sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
621
624
  sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py,sha256=esJMd0Yuj68t6QYOpmIFuiWP2J2dxTMC4bRBNH0Xk6I,26
622
- sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=8BxWqX1WT7SvRRccBmx8CBVGiL_FIa923OlDYwzDcDM,7907
625
+ sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=PI9r3PHCMK9EgpoFuBgR4jvokA5sBz8zyC47ps3wet4,8164
623
626
  sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=8C4xyO58N3Zl8h-fYddUpXDs3mnSqjYbaLt12cPB2XA,778
624
627
  sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py,sha256=vCj5vdAshEB9mAgSUYXhgJ0bd1Ithmu_n4-m_IWUbd4,2531
625
628
  sglang/srt/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -646,14 +649,14 @@ sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=VqCAFvUtq_l-0RGIkx3W_fzD55QcW2
646
649
  sglang/srt/managers/cache_controller.py,sha256=WEaM01U0al13oSU7AHvwEBcmOy-1SmtPWbikgrPAx6g,33130
647
650
  sglang/srt/managers/configure_logging.py,sha256=8sNXZ2z9pBWOwn-X3wyz013Ob8Nbm1zDxRkxoZjH-l4,1633
648
651
  sglang/srt/managers/data_parallel_controller.py,sha256=VvEkidmYCGAFTv6upEHf_tyTvkM8xLhCyOnLJGWL6BQ,14592
649
- sglang/srt/managers/detokenizer_manager.py,sha256=YTl8ytxNf3an5sOAUjKTGcEKBJMKF7kQiViAw6eOoMw,13213
652
+ sglang/srt/managers/detokenizer_manager.py,sha256=mVEzkaCTu0MUe8QuwQC9uReOJgTWVQYXcItlDS2gYK4,11594
650
653
  sglang/srt/managers/io_struct.py,sha256=ebAKRqBYj4slW25-dWs51JAW6caPcvuIMlSoJJZd8wo,40503
651
654
  sglang/srt/managers/mm_utils.py,sha256=J5hahCJ7HAKhJtGzV6PRa5HB9B2NjYwWGYMqIgiGC60,29444
652
- sglang/srt/managers/multi_tokenizer_mixin.py,sha256=smdzSwH-EW4zDNTJtkl6nXq6O_xH5EfW3iU3iSScUoM,22851
655
+ sglang/srt/managers/multi_tokenizer_mixin.py,sha256=5HbUePDkWeEua_500sEJdKYp9CfRITKUAeFEBp8OR7k,24706
653
656
  sglang/srt/managers/multimodal_processor.py,sha256=cnWpu2G79v1a6FJB_FriLxESgGUbfC3GptLmeRVVgew,1801
654
657
  sglang/srt/managers/schedule_batch.py,sha256=9lhBnf-siQjsThD8FRyhiF50N-LOE7dfoPr1uzklnjQ,77170
655
658
  sglang/srt/managers/schedule_policy.py,sha256=vh9BQW9tBv80LW4JApLE6smU2m4gy6cAEI25HVXiS60,22383
656
- sglang/srt/managers/scheduler.py,sha256=7euNU5d089V4uCN7gDXHEz0eUzewQ5V7LrwAlvP5tKc,111061
659
+ sglang/srt/managers/scheduler.py,sha256=Ipdj1_nDJDUNWb9Hx2W4NNndvhwRkn6B9rBGmJhDklc,111243
657
660
  sglang/srt/managers/scheduler_input_blocker.py,sha256=zP8xU_UmU2H0AB6sEqvivDcDB1QDgTauNDYDIJ7Nez4,3683
658
661
  sglang/srt/managers/scheduler_metrics_mixin.py,sha256=31BbukSyUUPMNHLqRlS5sEiAv7Gi5VHFQ1TgxvcAdcw,10054
659
662
  sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=CLwF58GTV7PkyYUoC-R7ROA-cZt8di9_9n2pajacxUY,31511
@@ -672,10 +675,10 @@ sglang/srt/mem_cache/base_prefix_cache.py,sha256=hLS2ncTMAz7Kpdk5pNwn5c6g8b61_K9
672
675
  sglang/srt/mem_cache/chunk_cache.py,sha256=jbJeEEZ5_WYEF_AnDZIAu2sMD4hAGAd_24F980fjVwU,3199
673
676
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
674
677
  sglang/srt/mem_cache/hicache_storage.py,sha256=eidtVRRsT8efqaEECXRLcxwJ7OomstzOctbqTucOLNg,7307
675
- sglang/srt/mem_cache/hiradix_cache.py,sha256=aL05axnc7qDy6FWMLbj76GaYIYjqooMEFYbSKiypJMA,28527
678
+ sglang/srt/mem_cache/hiradix_cache.py,sha256=k7P_MHQnFfvFj41T3WseTQQR0Hpun4VXrqtqnjKZHtI,29155
676
679
  sglang/srt/mem_cache/lora_radix_cache.py,sha256=4NbK0Rc15kTiCOSsthyM6WsZarMK2MNhtsNqYMakJ5k,14369
677
680
  sglang/srt/mem_cache/memory_pool.py,sha256=EGZtQsgXDOuySeEBdV6jBIt9ZDvqMsErNPe6P5ifHh8,39540
678
- sglang/srt/mem_cache/memory_pool_host.py,sha256=d8TJqmOILaeub4maP09y20MwYf7KKobzv7pVrZ18CiA,26119
681
+ sglang/srt/mem_cache/memory_pool_host.py,sha256=Uy2sGFEVupj7ZBWRFBgnkn0s5Yb8NzmKre9IbBQ-ut8,26189
679
682
  sglang/srt/mem_cache/multimodal_cache.py,sha256=zPnQLQhBZ6zsUpCQPSoNkrB9EEvpoDQS4mU7c3sRWjE,2171
680
683
  sglang/srt/mem_cache/radix_cache.py,sha256=U0KEjQNcwhdFWnXYTMhDkxpiazAD1ttbADEkGPHiiTU,19116
681
684
  sglang/srt/mem_cache/radix_cache_cpp.py,sha256=YuYt4xNzfmNNSkwaAk3VLWcZI74_XQobIyEgt96f7Qs,9284
@@ -683,8 +686,8 @@ sglang/srt/mem_cache/swa_radix_cache.py,sha256=hV5OOmAqu4UT80BVsu7lhGewcbq-u__JK
683
686
  sglang/srt/mem_cache/cpp_radix_tree/radix_tree.py,sha256=tQZpz-H6HxNAQe9mYqt6aX8mfDZyu_fbJTcCNK-Ns1M,7179
684
687
  sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py,sha256=WV5M9UPPLUMMjU3Mpp6HBqyCQpGBuMyj-N5PwQcNo5k,5178
685
688
  sglang/srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp,sha256=dDYPtxmL6OFhEyv_GuTgzkTDpBjcD0EK8kphnn7eaGc,1133
686
- sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py,sha256=OfQMFwbbQg0pbC7rJktg5wz3YFhNsNOjsXCsAjrh6iU,16738
687
- sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py,sha256=RlVAloClfk_Cw4TjfokHl75YdgmtJ1FT7KdGA2QLXDk,13990
689
+ sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py,sha256=lJ9Jp2kOgQQaUihxlO9l-W8fYfOMa1h1y0WAtzzFGLM,17850
690
+ sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py,sha256=2sa0fgXRB0rkOqGbewRtO11A8ES0DhlKZi7nZmD330I,14873
688
691
  sglang/srt/mem_cache/storage/hf3fs/test_hf3fs_utils.py,sha256=g2h0Woa2CK39pRTigHCVR_hq095dTaC70lCIAIs9THM,1089
689
692
  sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py,sha256=S0UAG86sVMDc4-LvzSZrLKeW0fec5D7py_sqe3JtXEk,10000
690
693
  sglang/srt/mem_cache/storage/mooncake_store/unit_test.py,sha256=jMdJgU41VscKS-mn_AVZE8LQbb_3MWQ57i9V3OCWLI4,1007
@@ -712,7 +715,7 @@ sglang/srt/models/dbrx.py,sha256=117gwrB_o8VlACwDH5YlXkPfuS7t2Mh-nyJvpNpIYxs,159
712
715
  sglang/srt/models/deepseek.py,sha256=M7nyHcAbTIs8vc1g6u00oy22-pp704Xp4T3clBzu4xM,17460
713
716
  sglang/srt/models/deepseek_janus_pro.py,sha256=td8xGs6ARfJ8AQCYwUhMOZoWigrAs7m3trF5-kXCqik,70418
714
717
  sglang/srt/models/deepseek_nextn.py,sha256=uoiJxAVl9DX7bQcjyuouAZz6W-cB1HMubcirTBghyR4,6115
715
- sglang/srt/models/deepseek_v2.py,sha256=s5xrkzRtLujCKs-LUbvGl91c_QNZ4jO5Z0_6ER4tHdc,107544
718
+ sglang/srt/models/deepseek_v2.py,sha256=IOSLn1PV3GUsNmP10nd661EOlDI3ThGZBjHS7fSEnIE,107953
716
719
  sglang/srt/models/deepseek_vl2.py,sha256=7X3gI4DE7guUtNJvMLf06jO8MUHKa2Aan8evZg5hsXc,13061
717
720
  sglang/srt/models/ernie4.py,sha256=7dP7_d0i86tv8Mp21Ew9RdsFr6TskIkm8UtUd8zFo00,16067
718
721
  sglang/srt/models/ernie4_eagle.py,sha256=E7f-ygCcY8pIoyTMKFyVC2uRcG59-_RaQTO4gd5_uWI,7223
@@ -732,7 +735,7 @@ sglang/srt/models/glm4v.py,sha256=WtDvH4691H6IKrcGBrkBpweSm_iFK61bMEfvqpbblmc,23
732
735
  sglang/srt/models/glm4v_moe.py,sha256=qgJtWyRI0dy9qcTMXHxhBZp-ghfghV3PLhpOXgSAW60,17313
733
736
  sglang/srt/models/gpt2.py,sha256=kclhxEs8oJk1KCyhmAqo7rZqecVGGHYkc-a1WZi3aIk,9841
734
737
  sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtYhl4,10307
735
- sglang/srt/models/gpt_oss.py,sha256=NoqSVjOlTzPR-TRpfViAAGyrk3lMIgyjL4RYVQQlhRY,44283
738
+ sglang/srt/models/gpt_oss.py,sha256=6g7PE6EiOLDipmhV-XKmBcuoCGbhH6SU6NwsSoB_FJM,44469
736
739
  sglang/srt/models/granite.py,sha256=8q92shxVPAp_cJDohJATffSGd7Z0Oi-vF5jpY6DlK4s,19840
737
740
  sglang/srt/models/granitemoe.py,sha256=j1rgZ62CbBioECjUblDCw_NneDQgY_QJODsI0fqXVO8,13779
738
741
  sglang/srt/models/grok.py,sha256=8KCR13LtdnhswJrIwgb0sdFW0OxCA8GQ0fbN8gb856E,40518
@@ -753,8 +756,8 @@ sglang/srt/models/llama_embedding.py,sha256=zq-_lNu35VBFc7eemiam0zdkGIE8fzrgk5OW
753
756
  sglang/srt/models/llama_reward.py,sha256=LF2nqMV5XOrljGjAwJg43mBv3z6Q040I2EYlgZeCp8k,4681
754
757
  sglang/srt/models/llava.py,sha256=xzYip_BAwpzSIdZre43LZiyTpFISa0ZCLdO6LUSbaCg,37702
755
758
  sglang/srt/models/llavavid.py,sha256=-CSk0RJ2MQeb81sh-RISeVJFaI-XWY6nR6_I594MkME,12818
756
- sglang/srt/models/longcat_flash.py,sha256=ZtVmyxmEG_ckDnWEj-rwa2q99sgB59XHMqiSpnmcKdw,40705
757
- sglang/srt/models/longcat_flash_nextn.py,sha256=HxK_W0kcTrJu2WXMOJNYHajq8s-8qbHCWo7YBT-9e6E,29489
759
+ sglang/srt/models/longcat_flash.py,sha256=gyElibQ9q_qq4tBA7eah0f3bLSzvMEpgD_n6CcRyG9M,41055
760
+ sglang/srt/models/longcat_flash_nextn.py,sha256=oVy776-AE5z43C472oqHlcl9NfzDDEaQgGB1msZNdgc,29644
758
761
  sglang/srt/models/mimo.py,sha256=Mp-iFp4YHuiuq-H8enUF5K5QbMnVcvEa6mURH6vM3yM,6140
759
762
  sglang/srt/models/mimo_mtp.py,sha256=jSmqJAXu7G3OO7jW1oa2suI4H_Yl7u5ZT7w4lHFbHhg,7292
760
763
  sglang/srt/models/minicpm.py,sha256=CzBJyZtfMpp8jvlEl29DHI7HLVq-CxuqP1UHwxIbaUI,14567
@@ -857,7 +860,7 @@ sglang/test/test_block_fp8_deep_gemm_blackwell.py,sha256=Hnhq4kkyINHb4ONedkp5Kf7
857
860
  sglang/test/test_block_fp8_ep.py,sha256=n4X6ZKwuUUbV5Ofjg64ptlaFGI1LbRXDfFiJW1ELHgY,10546
858
861
  sglang/test/test_custom_ops.py,sha256=PenQ8zM1wj5xwiVEPVzD37pO-x90aOfFMpCRZenaKsY,5709
859
862
  sglang/test/test_cutlass_moe.py,sha256=ax-IYPm5tZjkZft0q8swHnzerOI4LB4JOLFaKGJVE-k,9629
860
- sglang/test/test_cutlass_w4a8_moe.py,sha256=E0ffQOR84bEFz6KFNbczH2LRhQqkeC9HPp76u0vzdwo,8809
863
+ sglang/test/test_cutlass_w4a8_moe.py,sha256=Ku9VCXEMJ3BwXtvb1A3FzY-zek-S-A4thWyX6m-1v-A,9219
861
864
  sglang/test/test_deepep_utils.py,sha256=749ysTBGNzh6rYUCJhhZBtZpeD15eWTeNHYCytcvZtc,7448
862
865
  sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
863
866
  sglang/test/test_fp4_moe.py,sha256=rJLkKW3glBMvI5Ed0LltOHi8zCReMa-WB50p5zGm6J4,10189
@@ -871,8 +874,8 @@ sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqn
871
874
  sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
872
875
  sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
873
876
  sglang/test/attention/test_trtllm_mla_backend.py,sha256=quZ6SYuEH7J1YMcF8YO1_bwSNMz1gecpWRGauYjbUeA,42055
874
- sglang-0.5.2rc0.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
875
- sglang-0.5.2rc0.dist-info/METADATA,sha256=6S-9JaZ2NMphKGxZVRyfEQWUwyjRY3I0a3HY4qNLU00,28157
876
- sglang-0.5.2rc0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
877
- sglang-0.5.2rc0.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
878
- sglang-0.5.2rc0.dist-info/RECORD,,
877
+ sglang-0.5.2rc1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
878
+ sglang-0.5.2rc1.dist-info/METADATA,sha256=mMYLEpjZJzf_6puNSLlKkwaPpNRJ9nJ7mAsmRgSvb3k,28151
879
+ sglang-0.5.2rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
880
+ sglang-0.5.2rc1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
881
+ sglang-0.5.2rc1.dist-info/RECORD,,