sglang 0.5.2rc0__py3-none-any.whl → 0.5.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/configs/model_config.py +2 -1
- sglang/srt/distributed/parallel_state.py +3 -1
- sglang/srt/entrypoints/engine.py +1 -1
- sglang/srt/layers/moe/cutlass_w4a8_moe.py +1 -9
- sglang/srt/layers/moe/ep_moe/layer.py +2 -7
- sglang/srt/layers/moe/fused_moe_triton/__init__.py +5 -3
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +5 -1048
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +212 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +796 -0
- sglang/srt/layers/moe/fused_moe_triton/layer.py +5 -2
- sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py +87 -0
- sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +8 -0
- sglang/srt/layers/quantization/w4afp8.py +30 -25
- sglang/srt/managers/detokenizer_manager.py +0 -34
- sglang/srt/managers/multi_tokenizer_mixin.py +44 -6
- sglang/srt/managers/scheduler.py +3 -0
- sglang/srt/mem_cache/hiradix_cache.py +19 -3
- sglang/srt/mem_cache/memory_pool_host.py +2 -0
- sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py +61 -34
- sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +27 -6
- sglang/srt/models/deepseek_v2.py +5 -0
- sglang/srt/models/gpt_oss.py +5 -4
- sglang/srt/models/longcat_flash.py +26 -15
- sglang/srt/models/longcat_flash_nextn.py +23 -15
- sglang/srt/utils.py +0 -10
- sglang/test/test_cutlass_w4a8_moe.py +24 -9
- sglang/version.py +1 -1
- {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/METADATA +2 -2
- {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/RECORD +32 -29
- {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/WHEEL +0 -0
- {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.5.2rc0.dist-info → sglang-0.5.2rc1.dist-info}/top_level.txt +0 -0
@@ -113,6 +113,8 @@ def synchronized():
|
|
113
113
|
|
114
114
|
|
115
115
|
class HiCacheHF3FS(HiCacheStorage):
|
116
|
+
"""HiCache backend that stores KV cache pages in HF3FS files."""
|
117
|
+
|
116
118
|
default_env_var: str = "SGLANG_HICACHE_HF3FS_CONFIG_PATH"
|
117
119
|
|
118
120
|
def __init__(
|
@@ -176,15 +178,32 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
176
178
|
dtype: torch.dtype,
|
177
179
|
storage_config: HiCacheStorageConfig = None,
|
178
180
|
) -> "HiCacheHF3FS":
|
181
|
+
"""Create a HiCacheHF3FS instance from environment configuration.
|
182
|
+
|
183
|
+
Environment:
|
184
|
+
- Uses env var stored in `HiCacheHF3FS.default_env_var` to locate a JSON config.
|
185
|
+
- Falls back to a local single-machine config when the env var is not set.
|
186
|
+
|
187
|
+
Raises:
|
188
|
+
ValueError: If MLA Model is requested without global metadata server or required keys are missing.
|
189
|
+
"""
|
179
190
|
from sglang.srt.mem_cache.storage.hf3fs.mini_3fs_metadata_server import (
|
180
191
|
Hf3fsGlobalMetadataClient,
|
181
192
|
Hf3fsLocalMetadataClient,
|
182
193
|
)
|
183
194
|
|
184
|
-
|
195
|
+
if storage_config is not None:
|
196
|
+
rank, is_mla_model = storage_config.tp_rank, storage_config.is_mla_model
|
197
|
+
else:
|
198
|
+
rank, is_mla_model = 0, False
|
199
|
+
|
200
|
+
mla_unsupported_msg = f"MLA model is not supported without global metadata server, please refer to https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/mem_cache/storage/hf3fs/docs/deploy_sglang_3fs_multinode.md"
|
185
201
|
|
186
202
|
config_path = os.getenv(HiCacheHF3FS.default_env_var)
|
187
203
|
if not config_path:
|
204
|
+
if is_mla_model:
|
205
|
+
raise ValueError(mla_unsupported_msg)
|
206
|
+
|
188
207
|
return HiCacheHF3FS(
|
189
208
|
rank=rank,
|
190
209
|
file_path=f"/data/hicache.{rank}.bin",
|
@@ -214,25 +233,27 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
214
233
|
raise ValueError(f"Missing required keys in config: {missing_keys}")
|
215
234
|
|
216
235
|
# Choose metadata client based on configuration
|
217
|
-
|
218
|
-
if "metadata_server_url" in config and config["metadata_server_url"]:
|
236
|
+
if config.get("metadata_server_url"):
|
219
237
|
# Use global metadata client to connect to metadata server
|
220
238
|
metadata_server_url = config["metadata_server_url"]
|
221
239
|
metadata_client = Hf3fsGlobalMetadataClient(metadata_server_url)
|
222
240
|
|
223
|
-
# Enable MLA optimization only when using the global metadata client
|
224
|
-
is_mla_model = storage_config.is_mla_model if storage_config else False
|
225
241
|
logger.info(
|
226
242
|
f"Using global metadata client with server url: {metadata_server_url}"
|
227
243
|
)
|
228
244
|
else:
|
245
|
+
# Enable MLA optimization only when using the global metadata client
|
246
|
+
if is_mla_model:
|
247
|
+
raise ValueError(mla_unsupported_msg)
|
248
|
+
|
229
249
|
# Use local metadata client for single-machine deployment
|
230
250
|
metadata_client = Hf3fsLocalMetadataClient()
|
231
251
|
|
252
|
+
rank_for_path = 0 if is_mla_model else rank
|
232
253
|
return HiCacheHF3FS(
|
233
254
|
rank=rank,
|
234
255
|
# Let all ranks use the same file path for MLA model
|
235
|
-
file_path=f"{config['file_path_prefix']}.{
|
256
|
+
file_path=f"{config['file_path_prefix']}.{rank_for_path}.bin",
|
236
257
|
file_size=int(config["file_size"]),
|
237
258
|
numjobs=int(config["numjobs"]),
|
238
259
|
bytes_per_page=bytes_per_page,
|
sglang/srt/models/deepseek_v2.py
CHANGED
@@ -2185,6 +2185,8 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|
2185
2185
|
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
|
2186
2186
|
elif get_moe_expert_parallel_world_size() > 1:
|
2187
2187
|
disable_reason = "Deepseek V3/R1 can not use shared experts fusion optimization under expert parallelism."
|
2188
|
+
elif self.quant_config.get_name() == "w4afp8":
|
2189
|
+
disable_reason = "Deepseek V3/R1 W4AFP8 model uses different quant method for routed experts and shared experts."
|
2188
2190
|
|
2189
2191
|
if disable_reason is not None:
|
2190
2192
|
global_server_args_dict["disable_shared_experts_fusion"] = True
|
@@ -2496,6 +2498,9 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|
2496
2498
|
ckpt_up_proj_name="up_proj",
|
2497
2499
|
num_experts=self.config.n_routed_experts + self.num_fused_shared_experts,
|
2498
2500
|
)
|
2501
|
+
# Params for special naming rules in mixed-precision models, for example:
|
2502
|
+
# model.layers.xx.mlp.experts.xx.w1.input_scale. For details,
|
2503
|
+
# see https://huggingface.co/Barrrrry/DeepSeek-R1-W4AFP8/blob/main.
|
2499
2504
|
if self.quant_config and self.quant_config.get_name() == "w4afp8":
|
2500
2505
|
expert_params_mapping += FusedMoE.make_expert_input_scale_params_mapping(
|
2501
2506
|
num_experts=self.config.n_routed_experts
|
sglang/srt/models/gpt_oss.py
CHANGED
@@ -193,8 +193,9 @@ class GptOssSparseMoeBlock(nn.Module):
|
|
193
193
|
return ans
|
194
194
|
|
195
195
|
|
196
|
-
def _enable_fused_set_kv_buffer():
|
197
|
-
|
196
|
+
def _enable_fused_set_kv_buffer(forward_batch: ForwardBatch):
|
197
|
+
"""Enable fused set_kv_buffer only on CUDA with bfloat16 KV cache."""
|
198
|
+
return _is_cuda and forward_batch.token_to_kv_pool.dtype == torch.bfloat16
|
198
199
|
|
199
200
|
|
200
201
|
# TODO maybe move to a model-common utils
|
@@ -341,7 +342,7 @@ class GptOssAttention(nn.Module):
|
|
341
342
|
layer=self.attn,
|
342
343
|
forward_batch=forward_batch,
|
343
344
|
)
|
344
|
-
if _enable_fused_set_kv_buffer()
|
345
|
+
if _enable_fused_set_kv_buffer(forward_batch)
|
345
346
|
else None
|
346
347
|
),
|
347
348
|
)
|
@@ -355,7 +356,7 @@ class GptOssAttention(nn.Module):
|
|
355
356
|
attn_output = self.attn(
|
356
357
|
*inner_state,
|
357
358
|
sinks=self.sinks,
|
358
|
-
save_kv_cache=not _enable_fused_set_kv_buffer(),
|
359
|
+
save_kv_cache=not _enable_fused_set_kv_buffer(forward_batch),
|
359
360
|
)
|
360
361
|
output, _ = self.o_proj(attn_output)
|
361
362
|
return output
|
@@ -651,9 +651,6 @@ class LongcatFlashForCausalLM(nn.Module):
|
|
651
651
|
).T
|
652
652
|
else:
|
653
653
|
w = self_attn.kv_b_proj.weight
|
654
|
-
# NOTE(HandH1998): Since `bmm_fp8` only supports per-tensor scale, we have to requantize `self_attn.kv_b_proj`.
|
655
|
-
# This may affect the accuracy of fp8 model.
|
656
|
-
# Fix deepseek v3 blockwise bmm by using deep_gemm
|
657
654
|
use_deep_gemm_bmm = False
|
658
655
|
|
659
656
|
if w.dtype in (
|
@@ -790,6 +787,9 @@ class LongcatFlashForCausalLM(nn.Module):
|
|
790
787
|
self.config.hidden_size / self.config.kv_lora_rank
|
791
788
|
) ** 0.5
|
792
789
|
|
790
|
+
# TODO(linguoyuan) EPMoE not support DEEPGEMM_BLACKWELL, DeepEP needs to be supported in the future
|
791
|
+
deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0 = False
|
792
|
+
|
793
793
|
if (
|
794
794
|
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
|
795
795
|
and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
|
@@ -804,24 +804,35 @@ class LongcatFlashForCausalLM(nn.Module):
|
|
804
804
|
for layer_id in range(self.config.num_hidden_layers):
|
805
805
|
layer = self.model.layers[layer_id]
|
806
806
|
for i in range(2):
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
)
|
807
|
+
self_attn = layer.self_attn[i]
|
808
|
+
module_list = [
|
809
|
+
self_attn.kv_b_proj,
|
810
|
+
self_attn.o_proj,
|
811
|
+
]
|
812
|
+
|
813
|
+
if self.config.q_lora_rank is not None:
|
814
|
+
module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
|
815
|
+
module_list.append(self_attn.q_b_proj)
|
816
|
+
else:
|
817
|
+
module_list.append(self_attn.kv_a_proj_with_mqa)
|
818
|
+
module_list.append(self_attn.q_proj)
|
819
|
+
|
820
|
+
for module in module_list:
|
821
|
+
if hasattr(module, "weight_scale_inv"):
|
822
|
+
requant_weight_ue8m0_inplace(
|
823
|
+
module.weight, module.weight_scale_inv, weight_block_size
|
824
|
+
)
|
825
|
+
|
816
826
|
mlp = layer.mlps[i]
|
817
827
|
assert isinstance(mlp, LongcatFlashMLP)
|
818
828
|
for module in [
|
819
829
|
mlp.gate_up_proj,
|
820
830
|
mlp.down_proj,
|
821
831
|
]:
|
822
|
-
|
823
|
-
|
824
|
-
|
832
|
+
if hasattr(module, "weight_scale_inv"):
|
833
|
+
requant_weight_ue8m0_inplace(
|
834
|
+
module.weight, module.weight_scale_inv, weight_block_size
|
835
|
+
)
|
825
836
|
|
826
837
|
for layer_id in range(self.config.num_hidden_layers):
|
827
838
|
experts = layer.mlp.experts
|
@@ -344,9 +344,6 @@ class LongcatFlashForCausalLMNextN(LongcatFlashForCausalLM):
|
|
344
344
|
).T
|
345
345
|
else:
|
346
346
|
w = self_attn.kv_b_proj.weight
|
347
|
-
# NOTE(HandH1998): Since `bmm_fp8` only supports per-tensor scale, we have to requantize `self_attn.kv_b_proj`.
|
348
|
-
# This may affect the accuracy of fp8 model.
|
349
|
-
# Fix deepseek v3 blockwise bmm by using deep_gemm
|
350
347
|
use_deep_gemm_bmm = False
|
351
348
|
if w.dtype in (
|
352
349
|
torch.float8_e4m3fn,
|
@@ -480,24 +477,35 @@ class LongcatFlashForCausalLMNextN(LongcatFlashForCausalLM):
|
|
480
477
|
def _weight_requant_ue8m0(self):
|
481
478
|
weight_block_size = self.quant_config.weight_block_size
|
482
479
|
layer = self.model.decoder
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
)
|
480
|
+
self_attn = layer.self_attn
|
481
|
+
module_list = [
|
482
|
+
self_attn.kv_b_proj,
|
483
|
+
self_attn.o_proj,
|
484
|
+
]
|
485
|
+
|
486
|
+
if self.config.q_lora_rank is not None:
|
487
|
+
module_list.append(self_attn.fused_qkv_a_proj_with_mqa)
|
488
|
+
module_list.append(self_attn.q_b_proj)
|
489
|
+
else:
|
490
|
+
module_list.append(self_attn.kv_a_proj_with_mqa)
|
491
|
+
module_list.append(self_attn.q_proj)
|
492
|
+
|
493
|
+
for module in module_list:
|
494
|
+
if hasattr(module, "weight_scale_inv"):
|
495
|
+
requant_weight_ue8m0_inplace(
|
496
|
+
module.weight, module.weight_scale_inv, weight_block_size
|
497
|
+
)
|
498
|
+
|
492
499
|
mlp = layer.mlps
|
493
500
|
assert isinstance(mlp, LongcatFlashMLP)
|
494
501
|
for module in [
|
495
502
|
mlp.gate_up_proj,
|
496
503
|
mlp.down_proj,
|
497
504
|
]:
|
498
|
-
|
499
|
-
|
500
|
-
|
505
|
+
if hasattr(module, "weight_scale_inv"):
|
506
|
+
requant_weight_ue8m0_inplace(
|
507
|
+
module.weight, module.weight_scale_inv, weight_block_size
|
508
|
+
)
|
501
509
|
|
502
510
|
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
|
503
511
|
stacked_params_mapping = [
|
sglang/srt/utils.py
CHANGED
@@ -2787,16 +2787,6 @@ def lru_cache_frozenset(maxsize=128):
|
|
2787
2787
|
return decorator
|
2788
2788
|
|
2789
2789
|
|
2790
|
-
def get_worker_ids_from_req_rids(rids):
|
2791
|
-
if isinstance(rids, list):
|
2792
|
-
worker_ids = [int(rid.split("_")[0]) for rid in rids]
|
2793
|
-
elif isinstance(rids, str):
|
2794
|
-
worker_ids = [int(rids.split("_")[0])]
|
2795
|
-
else:
|
2796
|
-
worker_ids = []
|
2797
|
-
return worker_ids
|
2798
|
-
|
2799
|
-
|
2800
2790
|
def get_origin_rid(rid):
|
2801
2791
|
return rid.split("_", 1)[1] if "_" in rid else rid
|
2802
2792
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
2
|
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Literal, Optional
|
4
4
|
|
5
5
|
import pytest
|
6
6
|
import torch
|
@@ -25,7 +25,7 @@ def pack_int4_values_to_int8(int4_values_interleaved: torch.Tensor) -> torch.Ten
|
|
25
25
|
return packed_tensor.to(torch.int8)
|
26
26
|
|
27
27
|
|
28
|
-
def pack_interleave(num_experts, ref_weight, ref_scale):
|
28
|
+
def pack_interleave(num_experts, ref_weight, ref_scale, alignment=4):
|
29
29
|
n, k = ref_weight.shape[1], ref_weight.shape[2]
|
30
30
|
|
31
31
|
weight = pack_int4_values_to_int8(ref_weight.cpu()).cuda()
|
@@ -33,11 +33,16 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
|
|
33
33
|
w_q = w_q.contiguous()
|
34
34
|
|
35
35
|
scale_interleaved = ref_scale.reshape(
|
36
|
-
ref_scale.shape[0],
|
36
|
+
ref_scale.shape[0],
|
37
|
+
ref_scale.shape[1],
|
38
|
+
(ref_scale.shape[2] // alignment),
|
39
|
+
alignment,
|
37
40
|
) # [E, N, K/4, 4]
|
38
41
|
scale_interleaved = scale_interleaved.permute(0, 2, 1, 3) # [E, K/4, N, 4]
|
39
42
|
scale_interleaved = scale_interleaved.reshape(
|
40
|
-
ref_scale.shape[0],
|
43
|
+
ref_scale.shape[0],
|
44
|
+
ref_scale.shape[2] // alignment,
|
45
|
+
ref_scale.shape[1] * alignment,
|
41
46
|
) # [E, K/4, N*4]
|
42
47
|
w_scale = scale_interleaved.contiguous()
|
43
48
|
|
@@ -48,12 +53,17 @@ def pack_interleave(num_experts, ref_weight, ref_scale):
|
|
48
53
|
@pytest.mark.parametrize("N", [2048])
|
49
54
|
@pytest.mark.parametrize("K", [7168])
|
50
55
|
@pytest.mark.parametrize("E", [256])
|
51
|
-
@pytest.mark.parametrize("
|
56
|
+
@pytest.mark.parametrize("tp_size", [8])
|
57
|
+
@pytest.mark.parametrize("use_ep_moe", [True, False])
|
52
58
|
@pytest.mark.parametrize("topk", [8])
|
53
59
|
@pytest.mark.parametrize("group_size", [128])
|
54
60
|
@pytest.mark.parametrize("dtype", [torch.bfloat16])
|
55
|
-
def test_cutlass_w4a8_moe(M, N, K, E,
|
56
|
-
|
61
|
+
def test_cutlass_w4a8_moe(M, N, K, E, tp_size, use_ep_moe, topk, group_size, dtype):
|
62
|
+
if use_ep_moe:
|
63
|
+
local_e = E // tp_size
|
64
|
+
else: # tp mode
|
65
|
+
local_e = E
|
66
|
+
N = N // tp_size
|
57
67
|
|
58
68
|
debug = False
|
59
69
|
if debug:
|
@@ -87,7 +97,10 @@ def test_cutlass_w4a8_moe(M, N, K, E, ep_size, topk, group_size, dtype):
|
|
87
97
|
)
|
88
98
|
|
89
99
|
w1_q, w1_scale = pack_interleave(local_e, ref_weight_1, scale_1)
|
90
|
-
|
100
|
+
if use_ep_moe:
|
101
|
+
w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2)
|
102
|
+
else:
|
103
|
+
w2_q, w2_scale = pack_interleave(local_e, ref_weight_2, scale_2, 1)
|
91
104
|
|
92
105
|
device = "cuda"
|
93
106
|
a_strides1 = torch.full((local_e, 3), K, device=device, dtype=torch.int64)
|
@@ -265,7 +278,9 @@ def ref(
|
|
265
278
|
|
266
279
|
gate, fc1 = fc1.chunk(2, dim=-1)
|
267
280
|
fc1 = fc1 * torch.nn.functional.silu(gate)
|
268
|
-
act = (fc1 / pre_quant_scale_2.float()).to(
|
281
|
+
act = torch.clamp((fc1 / pre_quant_scale_2.float()), -448.0, 448.0).to(
|
282
|
+
torch.float8_e4m3fn
|
283
|
+
)
|
269
284
|
act = act.to(dtype)
|
270
285
|
|
271
286
|
w2 = ref_weight_2[e_idx]
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.5.
|
1
|
+
__version__ = "0.5.2rc1"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.2rc1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -257,7 +257,7 @@ Requires-Dist: uvloop; extra == "runtime-common"
|
|
257
257
|
Requires-Dist: xgrammar==0.1.23; extra == "runtime-common"
|
258
258
|
Provides-Extra: srt
|
259
259
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
260
|
-
Requires-Dist: sgl-kernel==0.3.
|
260
|
+
Requires-Dist: sgl-kernel==0.3.8; extra == "srt"
|
261
261
|
Requires-Dist: torch==2.8.0; extra == "srt"
|
262
262
|
Requires-Dist: torchaudio==2.8.0; extra == "srt"
|
263
263
|
Requires-Dist: torchvision; extra == "srt"
|
@@ -9,7 +9,7 @@ sglang/global_config.py,sha256=ZMTux_PsGnvkyJ0kTFwhTdbnFwIjjpGDogut_9Lu4Vo,1732
|
|
9
9
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
10
10
|
sglang/profiler.py,sha256=JCpZzlDhahoiPlPi5IG3n7GFrQHxfHEB6ELie3Ck55w,4397
|
11
11
|
sglang/utils.py,sha256=dC2PNkKYTgDHsNrWdZJ74GvaXGSHCeIk_aZ-TA89OhY,16380
|
12
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/version.py,sha256=Yk9OPhzYNwVtFzj5deHSdkGtcxF0FXLnKJ9OFFikW9M,25
|
13
13
|
sglang/eval/llama3_eval.py,sha256=mLNRZJIqV4CfqrY8UGnJEcHw2Xsyr1eyYZgFSUFYr1g,9997
|
14
14
|
sglang/eval/loogle_eval.py,sha256=-CC2s2kh5qUoDrHRkQVkC_jNvBgNojXbf456ny5s78s,4557
|
15
15
|
sglang/lang/api.py,sha256=rcp3GeoyZhmJ0GDLPRkuZNcxd0TBJy_wfUDpcmQoqW8,7210
|
@@ -46,7 +46,7 @@ sglang/srt/reasoning_parser.py,sha256=HEWAeFzPA_Jn3a44BYCz61QNV6kAvX46Y0tR8csAUg
|
|
46
46
|
sglang/srt/server_args.py,sha256=qEh8ykOglDMHh3GvyUhG0oOSJq_tH8vUYtDzSukoOtk,104043
|
47
47
|
sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
|
48
48
|
sglang/srt/two_batch_overlap.py,sha256=UykF5nC2rja3Hvmu0D9glqKdVRIEhQGPV84Jm7veopQ,34150
|
49
|
-
sglang/srt/utils.py,sha256=
|
49
|
+
sglang/srt/utils.py,sha256=3qKij1k6uj0Ch-gErdsfXJFUiCU941L0ePoqnIqxZvk,95252
|
50
50
|
sglang/srt/warmup.py,sha256=zldxhMlXpclRAJXmfBjJNUJd1eDizVdysibBvQyTVuA,1782
|
51
51
|
sglang/srt/configs/__init__.py,sha256=3GdmJ2DUiNq1zNs3yOILwZzL0J8fK-h8k2P5YVgxEI0,833
|
52
52
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
@@ -60,7 +60,7 @@ sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew
|
|
60
60
|
sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
|
61
61
|
sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
|
62
62
|
sglang/srt/configs/longcat_flash.py,sha256=Qp25xJVLq2K72Z80cXhcJxtqhagAdiPySDoevuT0Sno,3589
|
63
|
-
sglang/srt/configs/model_config.py,sha256=
|
63
|
+
sglang/srt/configs/model_config.py,sha256=0oEbC4bxtfPckBuY_p6uXHB1vDXxhkvJCpr9cPbExwI,31712
|
64
64
|
sglang/srt/configs/step3_vl.py,sha256=_Otgnym57DVgB_kZ__8c1_Ys5gSalA_K0ZuVjcG51T0,4845
|
65
65
|
sglang/srt/configs/update_config.py,sha256=GEf-XhL8JPrbX9-Hz8V7S3M6YTg76DVdIhc_4YdMDtc,6291
|
66
66
|
sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
|
@@ -108,7 +108,7 @@ sglang/srt/disaggregation/nixl/conn.py,sha256=eSof87fG21Dd4COszfnbeXIxne3TWvw0mS
|
|
108
108
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
109
109
|
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
110
110
|
sglang/srt/distributed/naive_distributed.py,sha256=5Kcfapzz61G3TtScTZrHoWa4bf6Vr27GlMcBAGMz7tQ,3260
|
111
|
-
sglang/srt/distributed/parallel_state.py,sha256=
|
111
|
+
sglang/srt/distributed/parallel_state.py,sha256=NsWEw341ew7dElC9BQ3vBLzaLVTDKCmCkKIl37b72dg,65717
|
112
112
|
sglang/srt/distributed/utils.py,sha256=aaCxATncLGnVgB0WlGpBdee0behKW8Dy_dakqcuKSaQ,8497
|
113
113
|
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
114
114
|
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=Q1kkKPKFPV0QMmKLyjOBlOnX8-Pr4UeGBZYkG6j0gc0,16570
|
@@ -124,7 +124,7 @@ sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=IrSrnpZnii0E
|
|
124
124
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
125
125
|
sglang/srt/entrypoints/EngineBase.py,sha256=yKN76witT2jz1zhmLHmPNLGMpK2UiOTaKQ2KPD8l99U,2594
|
126
126
|
sglang/srt/entrypoints/context.py,sha256=aD-94xkD0komuGO5gtYUoJKCHdc4hAipMxQt04yVRGA,8030
|
127
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
127
|
+
sglang/srt/entrypoints/engine.py,sha256=wNEYxQTVFHt9EvMzQr5zutX9Cb7RDDn64c8Xckuwhsg,33490
|
128
128
|
sglang/srt/entrypoints/harmony_utils.py,sha256=01T-A5GBUm2b306PcxNEg2rfx4cykBcqNYrzcXTWBlc,13590
|
129
129
|
sglang/srt/entrypoints/http_server.py,sha256=_GEk6RgxlMWYUNXOx9he2OIFOs1-Qan1NrSm0EAGJ3M,49649
|
130
130
|
sglang/srt/entrypoints/http_server_engine.py,sha256=_--j4U04OeJLlnnv1f0XmCd_Ry0z1FlhkrbePX8rYV0,4938
|
@@ -220,7 +220,7 @@ sglang/srt/layers/attention/wave_ops/prefill_attention.py,sha256=viTUit0rxjVV5Ua
|
|
220
220
|
sglang/srt/layers/moe/__init__.py,sha256=63TxUpSiUpVg1SDY1zdlTg3WFJzAc7WSndOViOmUv4E,835
|
221
221
|
sglang/srt/layers/moe/cutlass_moe.py,sha256=JKJED-4709ndP5AwhQ7Vi04GJjw5d9Xl_mWOsZPZ3U4,14298
|
222
222
|
sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
|
223
|
-
sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=
|
223
|
+
sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=pUvYkbm3kD5IBjVuJeehU_hvEvdNcYcO2eRZXDc6iLI,7005
|
224
224
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=8SAToE4B-22H5JsENZgJ1Io6QfE5-D9ItWLtbGksFQQ,3372
|
225
225
|
sglang/srt/layers/moe/rocm_moe_utils.py,sha256=07Z99bTV3B-b2Cbm-odhGpx2twxtnVpYvaDMBE1K3LM,4555
|
226
226
|
sglang/srt/layers/moe/router.py,sha256=eUNu_Uz5VB2FOZzZyYuZo5pokCVBS17_fcjHQbmvDSE,12181
|
@@ -228,10 +228,13 @@ sglang/srt/layers/moe/topk.py,sha256=r8pE6eJ8dprfZxaw8VcfrRvSp6_xDNPkr0tajmG_CZc
|
|
228
228
|
sglang/srt/layers/moe/utils.py,sha256=omh9E6sF-KtrELEsE9y01Ash7FfQlstyE-97UtpH8qk,5998
|
229
229
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
230
230
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=Acg6dW-zVDQ95vuBVuzxb5SUFFxhLCewk_tVSZeuma8,46158
|
231
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
232
|
-
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=
|
233
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
234
|
-
sglang/srt/layers/moe/fused_moe_triton/
|
231
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=uAHuiAILb3XQi9t1JGMElfzvsG9cKIDMXs6ezEGhGvY,27776
|
232
|
+
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=tis0ZJmih7gKHCurbLtY_o-bY3K4MOzQLYLC3ftIOf0,977
|
233
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=q_L-eZh9pBP3KZLipv9cRe2WpezNrHMz8NzMy97TV0Q,23178
|
234
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py,sha256=fFnRcXA6r0gnD_7EQmb8NacQJRJ8YHsmMZw2khlClTQ,7687
|
235
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py,sha256=z10ZJLrFFhgB5_cEFrvMt6bnwe46T5Vzz4nvMl4ErGU,27776
|
236
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=TI3p8FOJJjpoyE7VMngs9BPeiiN9M9XHZJRWVTWtiH0,39637
|
237
|
+
sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py,sha256=U93mxPPU2RP7d3QRSvoG3OCtUQIK8YhQfHf1ZGeREmY,3284
|
235
238
|
sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=Ai06BZ7uxMnk0nPWQelgvi1rV9Z72FetRo6p7E3rsYs,10986
|
236
239
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
237
240
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
@@ -455,7 +458,7 @@ sglang/srt/layers/quantization/petit_utils.py,sha256=-gy4zMhqNoIA1R0n7-5C0efV54j
|
|
455
458
|
sglang/srt/layers/quantization/qoq.py,sha256=jM96uJfKz3vo1B6SSIrQXNT3vnD3UJrepK2eY-tSQU4,8139
|
456
459
|
sglang/srt/layers/quantization/unquant.py,sha256=CxsOEYqYIfanKHo0ooFArbVz2ueX3vBWnYHE8gtnzdQ,12996
|
457
460
|
sglang/srt/layers/quantization/utils.py,sha256=d4eaS4-Z4q3GRgb4HDMr2EoWEdQCefrVdJufK6n_NQY,18509
|
458
|
-
sglang/srt/layers/quantization/w4afp8.py,sha256=
|
461
|
+
sglang/srt/layers/quantization/w4afp8.py,sha256=Cm_KjJQu-XKqSKh6usJ5tNXARm6F5n_C2P1c1NuYdC0,11720
|
459
462
|
sglang/srt/layers/quantization/w8a8_fp8.py,sha256=wzJi5jeTnbf-01iehOVTTCu_262rlMh9AQ2rogKWBmo,9981
|
460
463
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=cOKbhWxFlv6hw5dSs8ExPBXFcTwudwR26m3CAoOhCSs,35342
|
461
464
|
sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -619,7 +622,7 @@ sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a
|
|
619
622
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=FImA-TJ_tQDjqwoNWxS--sRDoKDXf9gamlME3tkxH58,3252
|
620
623
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
621
624
|
sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py,sha256=esJMd0Yuj68t6QYOpmIFuiWP2J2dxTMC4bRBNH0Xk6I,26
|
622
|
-
sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=
|
625
|
+
sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py,sha256=PI9r3PHCMK9EgpoFuBgR4jvokA5sBz8zyC47ps3wet4,8164
|
623
626
|
sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py,sha256=8C4xyO58N3Zl8h-fYddUpXDs3mnSqjYbaLt12cPB2XA,778
|
624
627
|
sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py,sha256=vCj5vdAshEB9mAgSUYXhgJ0bd1Ithmu_n4-m_IWUbd4,2531
|
625
628
|
sglang/srt/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -646,14 +649,14 @@ sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=VqCAFvUtq_l-0RGIkx3W_fzD55QcW2
|
|
646
649
|
sglang/srt/managers/cache_controller.py,sha256=WEaM01U0al13oSU7AHvwEBcmOy-1SmtPWbikgrPAx6g,33130
|
647
650
|
sglang/srt/managers/configure_logging.py,sha256=8sNXZ2z9pBWOwn-X3wyz013Ob8Nbm1zDxRkxoZjH-l4,1633
|
648
651
|
sglang/srt/managers/data_parallel_controller.py,sha256=VvEkidmYCGAFTv6upEHf_tyTvkM8xLhCyOnLJGWL6BQ,14592
|
649
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
652
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=mVEzkaCTu0MUe8QuwQC9uReOJgTWVQYXcItlDS2gYK4,11594
|
650
653
|
sglang/srt/managers/io_struct.py,sha256=ebAKRqBYj4slW25-dWs51JAW6caPcvuIMlSoJJZd8wo,40503
|
651
654
|
sglang/srt/managers/mm_utils.py,sha256=J5hahCJ7HAKhJtGzV6PRa5HB9B2NjYwWGYMqIgiGC60,29444
|
652
|
-
sglang/srt/managers/multi_tokenizer_mixin.py,sha256=
|
655
|
+
sglang/srt/managers/multi_tokenizer_mixin.py,sha256=5HbUePDkWeEua_500sEJdKYp9CfRITKUAeFEBp8OR7k,24706
|
653
656
|
sglang/srt/managers/multimodal_processor.py,sha256=cnWpu2G79v1a6FJB_FriLxESgGUbfC3GptLmeRVVgew,1801
|
654
657
|
sglang/srt/managers/schedule_batch.py,sha256=9lhBnf-siQjsThD8FRyhiF50N-LOE7dfoPr1uzklnjQ,77170
|
655
658
|
sglang/srt/managers/schedule_policy.py,sha256=vh9BQW9tBv80LW4JApLE6smU2m4gy6cAEI25HVXiS60,22383
|
656
|
-
sglang/srt/managers/scheduler.py,sha256=
|
659
|
+
sglang/srt/managers/scheduler.py,sha256=Ipdj1_nDJDUNWb9Hx2W4NNndvhwRkn6B9rBGmJhDklc,111243
|
657
660
|
sglang/srt/managers/scheduler_input_blocker.py,sha256=zP8xU_UmU2H0AB6sEqvivDcDB1QDgTauNDYDIJ7Nez4,3683
|
658
661
|
sglang/srt/managers/scheduler_metrics_mixin.py,sha256=31BbukSyUUPMNHLqRlS5sEiAv7Gi5VHFQ1TgxvcAdcw,10054
|
659
662
|
sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=CLwF58GTV7PkyYUoC-R7ROA-cZt8di9_9n2pajacxUY,31511
|
@@ -672,10 +675,10 @@ sglang/srt/mem_cache/base_prefix_cache.py,sha256=hLS2ncTMAz7Kpdk5pNwn5c6g8b61_K9
|
|
672
675
|
sglang/srt/mem_cache/chunk_cache.py,sha256=jbJeEEZ5_WYEF_AnDZIAu2sMD4hAGAd_24F980fjVwU,3199
|
673
676
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
674
677
|
sglang/srt/mem_cache/hicache_storage.py,sha256=eidtVRRsT8efqaEECXRLcxwJ7OomstzOctbqTucOLNg,7307
|
675
|
-
sglang/srt/mem_cache/hiradix_cache.py,sha256=
|
678
|
+
sglang/srt/mem_cache/hiradix_cache.py,sha256=k7P_MHQnFfvFj41T3WseTQQR0Hpun4VXrqtqnjKZHtI,29155
|
676
679
|
sglang/srt/mem_cache/lora_radix_cache.py,sha256=4NbK0Rc15kTiCOSsthyM6WsZarMK2MNhtsNqYMakJ5k,14369
|
677
680
|
sglang/srt/mem_cache/memory_pool.py,sha256=EGZtQsgXDOuySeEBdV6jBIt9ZDvqMsErNPe6P5ifHh8,39540
|
678
|
-
sglang/srt/mem_cache/memory_pool_host.py,sha256=
|
681
|
+
sglang/srt/mem_cache/memory_pool_host.py,sha256=Uy2sGFEVupj7ZBWRFBgnkn0s5Yb8NzmKre9IbBQ-ut8,26189
|
679
682
|
sglang/srt/mem_cache/multimodal_cache.py,sha256=zPnQLQhBZ6zsUpCQPSoNkrB9EEvpoDQS4mU7c3sRWjE,2171
|
680
683
|
sglang/srt/mem_cache/radix_cache.py,sha256=U0KEjQNcwhdFWnXYTMhDkxpiazAD1ttbADEkGPHiiTU,19116
|
681
684
|
sglang/srt/mem_cache/radix_cache_cpp.py,sha256=YuYt4xNzfmNNSkwaAk3VLWcZI74_XQobIyEgt96f7Qs,9284
|
@@ -683,8 +686,8 @@ sglang/srt/mem_cache/swa_radix_cache.py,sha256=hV5OOmAqu4UT80BVsu7lhGewcbq-u__JK
|
|
683
686
|
sglang/srt/mem_cache/cpp_radix_tree/radix_tree.py,sha256=tQZpz-H6HxNAQe9mYqt6aX8mfDZyu_fbJTcCNK-Ns1M,7179
|
684
687
|
sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py,sha256=WV5M9UPPLUMMjU3Mpp6HBqyCQpGBuMyj-N5PwQcNo5k,5178
|
685
688
|
sglang/srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp,sha256=dDYPtxmL6OFhEyv_GuTgzkTDpBjcD0EK8kphnn7eaGc,1133
|
686
|
-
sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py,sha256=
|
687
|
-
sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py,sha256=
|
689
|
+
sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py,sha256=lJ9Jp2kOgQQaUihxlO9l-W8fYfOMa1h1y0WAtzzFGLM,17850
|
690
|
+
sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py,sha256=2sa0fgXRB0rkOqGbewRtO11A8ES0DhlKZi7nZmD330I,14873
|
688
691
|
sglang/srt/mem_cache/storage/hf3fs/test_hf3fs_utils.py,sha256=g2h0Woa2CK39pRTigHCVR_hq095dTaC70lCIAIs9THM,1089
|
689
692
|
sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py,sha256=S0UAG86sVMDc4-LvzSZrLKeW0fec5D7py_sqe3JtXEk,10000
|
690
693
|
sglang/srt/mem_cache/storage/mooncake_store/unit_test.py,sha256=jMdJgU41VscKS-mn_AVZE8LQbb_3MWQ57i9V3OCWLI4,1007
|
@@ -712,7 +715,7 @@ sglang/srt/models/dbrx.py,sha256=117gwrB_o8VlACwDH5YlXkPfuS7t2Mh-nyJvpNpIYxs,159
|
|
712
715
|
sglang/srt/models/deepseek.py,sha256=M7nyHcAbTIs8vc1g6u00oy22-pp704Xp4T3clBzu4xM,17460
|
713
716
|
sglang/srt/models/deepseek_janus_pro.py,sha256=td8xGs6ARfJ8AQCYwUhMOZoWigrAs7m3trF5-kXCqik,70418
|
714
717
|
sglang/srt/models/deepseek_nextn.py,sha256=uoiJxAVl9DX7bQcjyuouAZz6W-cB1HMubcirTBghyR4,6115
|
715
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
718
|
+
sglang/srt/models/deepseek_v2.py,sha256=IOSLn1PV3GUsNmP10nd661EOlDI3ThGZBjHS7fSEnIE,107953
|
716
719
|
sglang/srt/models/deepseek_vl2.py,sha256=7X3gI4DE7guUtNJvMLf06jO8MUHKa2Aan8evZg5hsXc,13061
|
717
720
|
sglang/srt/models/ernie4.py,sha256=7dP7_d0i86tv8Mp21Ew9RdsFr6TskIkm8UtUd8zFo00,16067
|
718
721
|
sglang/srt/models/ernie4_eagle.py,sha256=E7f-ygCcY8pIoyTMKFyVC2uRcG59-_RaQTO4gd5_uWI,7223
|
@@ -732,7 +735,7 @@ sglang/srt/models/glm4v.py,sha256=WtDvH4691H6IKrcGBrkBpweSm_iFK61bMEfvqpbblmc,23
|
|
732
735
|
sglang/srt/models/glm4v_moe.py,sha256=qgJtWyRI0dy9qcTMXHxhBZp-ghfghV3PLhpOXgSAW60,17313
|
733
736
|
sglang/srt/models/gpt2.py,sha256=kclhxEs8oJk1KCyhmAqo7rZqecVGGHYkc-a1WZi3aIk,9841
|
734
737
|
sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtYhl4,10307
|
735
|
-
sglang/srt/models/gpt_oss.py,sha256=
|
738
|
+
sglang/srt/models/gpt_oss.py,sha256=6g7PE6EiOLDipmhV-XKmBcuoCGbhH6SU6NwsSoB_FJM,44469
|
736
739
|
sglang/srt/models/granite.py,sha256=8q92shxVPAp_cJDohJATffSGd7Z0Oi-vF5jpY6DlK4s,19840
|
737
740
|
sglang/srt/models/granitemoe.py,sha256=j1rgZ62CbBioECjUblDCw_NneDQgY_QJODsI0fqXVO8,13779
|
738
741
|
sglang/srt/models/grok.py,sha256=8KCR13LtdnhswJrIwgb0sdFW0OxCA8GQ0fbN8gb856E,40518
|
@@ -753,8 +756,8 @@ sglang/srt/models/llama_embedding.py,sha256=zq-_lNu35VBFc7eemiam0zdkGIE8fzrgk5OW
|
|
753
756
|
sglang/srt/models/llama_reward.py,sha256=LF2nqMV5XOrljGjAwJg43mBv3z6Q040I2EYlgZeCp8k,4681
|
754
757
|
sglang/srt/models/llava.py,sha256=xzYip_BAwpzSIdZre43LZiyTpFISa0ZCLdO6LUSbaCg,37702
|
755
758
|
sglang/srt/models/llavavid.py,sha256=-CSk0RJ2MQeb81sh-RISeVJFaI-XWY6nR6_I594MkME,12818
|
756
|
-
sglang/srt/models/longcat_flash.py,sha256=
|
757
|
-
sglang/srt/models/longcat_flash_nextn.py,sha256=
|
759
|
+
sglang/srt/models/longcat_flash.py,sha256=gyElibQ9q_qq4tBA7eah0f3bLSzvMEpgD_n6CcRyG9M,41055
|
760
|
+
sglang/srt/models/longcat_flash_nextn.py,sha256=oVy776-AE5z43C472oqHlcl9NfzDDEaQgGB1msZNdgc,29644
|
758
761
|
sglang/srt/models/mimo.py,sha256=Mp-iFp4YHuiuq-H8enUF5K5QbMnVcvEa6mURH6vM3yM,6140
|
759
762
|
sglang/srt/models/mimo_mtp.py,sha256=jSmqJAXu7G3OO7jW1oa2suI4H_Yl7u5ZT7w4lHFbHhg,7292
|
760
763
|
sglang/srt/models/minicpm.py,sha256=CzBJyZtfMpp8jvlEl29DHI7HLVq-CxuqP1UHwxIbaUI,14567
|
@@ -857,7 +860,7 @@ sglang/test/test_block_fp8_deep_gemm_blackwell.py,sha256=Hnhq4kkyINHb4ONedkp5Kf7
|
|
857
860
|
sglang/test/test_block_fp8_ep.py,sha256=n4X6ZKwuUUbV5Ofjg64ptlaFGI1LbRXDfFiJW1ELHgY,10546
|
858
861
|
sglang/test/test_custom_ops.py,sha256=PenQ8zM1wj5xwiVEPVzD37pO-x90aOfFMpCRZenaKsY,5709
|
859
862
|
sglang/test/test_cutlass_moe.py,sha256=ax-IYPm5tZjkZft0q8swHnzerOI4LB4JOLFaKGJVE-k,9629
|
860
|
-
sglang/test/test_cutlass_w4a8_moe.py,sha256=
|
863
|
+
sglang/test/test_cutlass_w4a8_moe.py,sha256=Ku9VCXEMJ3BwXtvb1A3FzY-zek-S-A4thWyX6m-1v-A,9219
|
861
864
|
sglang/test/test_deepep_utils.py,sha256=749ysTBGNzh6rYUCJhhZBtZpeD15eWTeNHYCytcvZtc,7448
|
862
865
|
sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
|
863
866
|
sglang/test/test_fp4_moe.py,sha256=rJLkKW3glBMvI5Ed0LltOHi8zCReMa-WB50p5zGm6J4,10189
|
@@ -871,8 +874,8 @@ sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqn
|
|
871
874
|
sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
|
872
875
|
sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
|
873
876
|
sglang/test/attention/test_trtllm_mla_backend.py,sha256=quZ6SYuEH7J1YMcF8YO1_bwSNMz1gecpWRGauYjbUeA,42055
|
874
|
-
sglang-0.5.
|
875
|
-
sglang-0.5.
|
876
|
-
sglang-0.5.
|
877
|
-
sglang-0.5.
|
878
|
-
sglang-0.5.
|
877
|
+
sglang-0.5.2rc1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
878
|
+
sglang-0.5.2rc1.dist-info/METADATA,sha256=mMYLEpjZJzf_6puNSLlKkwaPpNRJ9nJ7mAsmRgSvb3k,28151
|
879
|
+
sglang-0.5.2rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
880
|
+
sglang-0.5.2rc1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
881
|
+
sglang-0.5.2rc1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|