sglang 0.4.4.post4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. sglang/lang/chat_template.py +24 -0
  2. sglang/srt/configs/model_config.py +4 -0
  3. sglang/srt/conversation.py +29 -4
  4. sglang/srt/layers/attention/flashattention_backend.py +286 -9
  5. sglang/srt/layers/moe/fused_moe_native.py +5 -0
  6. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  7. sglang/srt/layers/moe/fused_moe_triton/configs/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  8. sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  9. sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H200.json +146 -0
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  11. sglang/srt/layers/moe/fused_moe_triton/configs/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  12. sglang/srt/layers/moe/fused_moe_triton/configs/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  13. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +13 -3
  14. sglang/srt/layers/moe/fused_moe_triton/layer.py +7 -0
  15. sglang/srt/layers/quantization/__init__.py +1 -0
  16. sglang/srt/layers/quantization/blockwise_int8.py +2 -0
  17. sglang/srt/layers/quantization/fp8.py +3 -1
  18. sglang/srt/layers/quantization/moe_wna16.py +2 -0
  19. sglang/srt/layers/quantization/w8a8_int8.py +2 -0
  20. sglang/srt/layers/radix_attention.py +2 -0
  21. sglang/srt/layers/rotary_embedding.py +63 -0
  22. sglang/srt/managers/multimodal_processors/mllama4.py +161 -0
  23. sglang/srt/model_executor/model_runner.py +1 -0
  24. sglang/srt/models/llama.py +12 -4
  25. sglang/srt/models/llama4.py +420 -0
  26. sglang/srt/models/mllama4.py +154 -0
  27. sglang/version.py +1 -1
  28. {sglang-0.4.4.post4.dist-info → sglang-0.4.5.dist-info}/METADATA +1 -1
  29. {sglang-0.4.4.post4.dist-info → sglang-0.4.5.dist-info}/RECORD +32 -22
  30. {sglang-0.4.4.post4.dist-info → sglang-0.4.5.dist-info}/WHEEL +0 -0
  31. {sglang-0.4.4.post4.dist-info → sglang-0.4.5.dist-info}/licenses/LICENSE +0 -0
  32. {sglang-0.4.4.post4.dist-info → sglang-0.4.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,154 @@
1
+ # TODO: add Aapted from vllm/mllama4.py
2
+ from collections.abc import Iterable
3
+ from typing import Optional, Set, Tuple
4
+
5
+ import torch
6
+ from torch import nn
7
+ from transformers import Llama4Config
8
+
9
+ from sglang.srt.layers.logits_processor import LogitsProcessor
10
+ from sglang.srt.layers.quantization import QuantizationConfig
11
+ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
12
+ from sglang.srt.model_loader.weight_utils import default_weight_loader
13
+ from sglang.srt.utils import add_prefix
14
+
15
+
16
+ class Llama4ForConditionalGeneration(nn.Module):
17
+ packed_modules_mapping = {
18
+ "qkv_proj": ["q_proj", "k_proj", "v_proj"],
19
+ }
20
+
21
+ def __init__(
22
+ self,
23
+ config: Llama4Config,
24
+ quant_config: Optional[QuantizationConfig] = None,
25
+ prefix: str = "",
26
+ ):
27
+ super().__init__()
28
+ self.config = config
29
+ self.quant_config = quant_config
30
+
31
+ # Initialize the language model
32
+ from sglang.srt.models.llama4 import Llama4ForCausalLM
33
+
34
+ self.language_model = Llama4ForCausalLM(
35
+ config.text_config,
36
+ quant_config=quant_config,
37
+ prefix=add_prefix("language_model", prefix),
38
+ )
39
+
40
+ self.logits_processor = LogitsProcessor(config.text_config)
41
+
42
+ def forward(
43
+ self,
44
+ input_ids: torch.Tensor,
45
+ positions: torch.Tensor,
46
+ forward_batch: ForwardBatch,
47
+ **kwargs: object,
48
+ ) -> torch.Tensor:
49
+
50
+ return self.language_model(input_ids, positions, forward_batch)
51
+
52
+ def permute_qk_weight_for_rotary(
53
+ self,
54
+ name: str,
55
+ loaded_weight: torch.Tensor,
56
+ ) -> Tuple[str, torch.Tensor]:
57
+
58
+ def permute(w: torch.Tensor, n_heads: int):
59
+ attn_in = self.language_model.config.head_dim * n_heads
60
+ attn_out = self.language_model.config.hidden_size
61
+
62
+ return (
63
+ w.view(n_heads, attn_in // n_heads // 2, 2, attn_out)
64
+ .transpose(1, 2)
65
+ .reshape(attn_in, attn_out)
66
+ )
67
+
68
+ modules = name.split(".")
69
+
70
+ # rotary embeds should be sliced
71
+ if ("wk" in modules or "k_proj" in modules) and modules[-1] == "weight":
72
+ loaded_weight = permute(
73
+ loaded_weight, self.language_model.config.num_key_value_heads
74
+ )
75
+ elif ("wq" in modules or "q_proj" in modules) and modules[-1] == "weight":
76
+ loaded_weight = permute(
77
+ loaded_weight, self.language_model.config.num_attention_heads
78
+ )
79
+
80
+ return name, loaded_weight
81
+
82
+ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) -> Set[str]:
83
+
84
+ stacked_params_mapping = [
85
+ # (param_name, shard_name, shard_id)
86
+ (".self_attn.qkv_proj", ".self_attn.q_proj", "q"),
87
+ (".self_attn.qkv_proj", ".self_attn.k_proj", "k"),
88
+ (".self_attn.qkv_proj", ".self_attn.v_proj", "v"),
89
+ (".shared_expert.gate_up_proj", ".shared_expert.gate_proj", 0),
90
+ (".shared_expert.gate_up_proj", ".shared_expert.up_proj", 1),
91
+ (".feed_forward.gate_up_proj", ".feed_forward.gate_proj", 0),
92
+ (".feed_forward.gate_up_proj", ".feed_forward.up_proj", 1),
93
+ ]
94
+
95
+ params_dict = dict(self.named_parameters())
96
+
97
+ num_experts = self.config.text_config.num_local_experts
98
+
99
+ for name, loaded_weight in weights:
100
+
101
+ if name.startswith("vision_model") or name.startswith(
102
+ "multi_modal_projector"
103
+ ):
104
+ continue
105
+
106
+ name, loaded_weight = self.permute_qk_weight_for_rotary(name, loaded_weight)
107
+
108
+ for param_name, weight_name, shard_id in stacked_params_mapping:
109
+ if weight_name not in name:
110
+ continue
111
+ name = name.replace(weight_name, param_name)
112
+ param = params_dict[name]
113
+ weight_loader = param.weight_loader
114
+ weight_loader(param, loaded_weight, shard_id)
115
+ break
116
+ else:
117
+ if ".experts" in name:
118
+ if ".gate_up_proj" in name:
119
+ name_list = [
120
+ name.replace(".experts.gate_up_proj", ".experts.w13_weight")
121
+ ] * 2
122
+ loaded_weight_list = loaded_weight.chunk(2, dim=-1)
123
+ shard_id_list = ["w1", "w3"]
124
+ else:
125
+ name_list = [
126
+ name.replace(".experts.down_proj", ".experts.w2_weight")
127
+ ]
128
+ shard_id_list = ["w2"]
129
+ loaded_weight_list = [loaded_weight]
130
+ for name, loaded_weight, shard_id in zip(
131
+ name_list, loaded_weight_list, shard_id_list
132
+ ):
133
+ param = params_dict[name]
134
+ weight_loader = param.weight_loader
135
+ for expert_id in range(num_experts):
136
+ weight_loader(
137
+ param,
138
+ loaded_weight[expert_id].T,
139
+ name,
140
+ shard_id=shard_id,
141
+ expert_id=expert_id,
142
+ )
143
+ else:
144
+ # Skip loading extra bias for GPTQ models.
145
+ if name.endswith(".bias") and name not in params_dict:
146
+ continue
147
+ param = params_dict[name]
148
+ weight_loader = getattr(
149
+ param, "weight_loader", default_weight_loader
150
+ )
151
+ weight_loader(param, loaded_weight)
152
+
153
+
154
+ EntryClass = Llama4ForConditionalGeneration
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.4.post4"
1
+ __version__ = "0.4.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.4.4.post4
3
+ Version: 0.4.5
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -9,9 +9,9 @@ sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
9
9
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
10
10
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
11
11
  sglang/utils.py,sha256=GIcgiRHkZ-gyPxXOdn1qFF41jkg4-YdDxbPc4mzO-qk,16159
12
- sglang/version.py,sha256=63MgDRjbuREW1iqTD8A1g5qw4uGX6I-V8Zgt5c1nnys,28
12
+ sglang/version.py,sha256=ErkLkI2TDBX1OIqi2GGa20CPeu4ZculEi-XffRbLU6M,22
13
13
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- sglang/lang/chat_template.py,sha256=xZ-kQpgb4-NY6QOqgRjOODZW5G4EvJnplaqYF6Ng2Ow,18952
14
+ sglang/lang/chat_template.py,sha256=MwNL5dNTe8g_l2ljZubnrazEgT2xEv-9O2D0Ezwxy4I,19658
15
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
16
16
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
17
17
  sglang/lang/interpreter.py,sha256=OH1SFCm4rUCPO32MTo8j5V2Z13Jic7_r1GQOP1-aHaw,33234
@@ -27,7 +27,7 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
27
27
  sglang/srt/_custom_ops.py,sha256=lUBwC5R2UfjFMA1EtC5Kh2IngsqBJM9IuMW46kJWcjE,3647
28
28
  sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
29
29
  sglang/srt/code_completion_parser.py,sha256=HhEUzdL-FVBsOot9tKDKA1l8Gdx8qsF1RRg-zHNpmLQ,5400
30
- sglang/srt/conversation.py,sha256=mzNPZX7ndgmm1E8azuK8eytN6bNCEu3WbcPReRFfhd0,27819
30
+ sglang/srt/conversation.py,sha256=WP72AZrZpiqc5RowucT2tW3jVCb1pb4veW_kpwYS4yY,28785
31
31
  sglang/srt/custom_op.py,sha256=bIZ__3FiZvkbsN9O_jeLy_49X7ZbYbw0VxoL80uWwaI,3715
32
32
  sglang/srt/function_call_parser.py,sha256=buYENeNEP5bhsvD424yGCa9wOqSfVOZSRn6zLiSJp5I,23733
33
33
  sglang/srt/hf_transformers_utils.py,sha256=_QYTl9LpU0jmKPlYooHi1etwMvb5v40JIrG_t_Fx06w,9215
@@ -48,7 +48,7 @@ sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51D
48
48
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
49
49
  sglang/srt/configs/janus_pro.py,sha256=-QtJ4ZGZiAJb0AkOEcuCHzIKLw23nF8nRk3rdCcoUO0,19016
50
50
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
51
- sglang/srt/configs/model_config.py,sha256=-6SEpQw-U_KWONdNYqEe8nXopqDcDtp4B4g-RZQPhHo,20581
51
+ sglang/srt/configs/model_config.py,sha256=ZioUnc5UzsBVEYHE_GgCofYL97MByZm2NfHikS9HwLo,20771
52
52
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
53
53
  sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
54
54
  sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
@@ -91,14 +91,14 @@ sglang/srt/layers/linear.py,sha256=HYIGxpRYL6x-jNOkyNtGAw5Ak9Nq8jkntddgTBER_1w,5
91
91
  sglang/srt/layers/logits_processor.py,sha256=Vp8ibljVEezTr54xzeOcjiJR7JdYO8ItkO5nLIIMVu0,24206
92
92
  sglang/srt/layers/parameter.py,sha256=0OTMtmsNds42e3z3wHTRJiUfxCWFwSL6DHrqgeTgGt8,15151
93
93
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
94
- sglang/srt/layers/radix_attention.py,sha256=UDL0y4Zasay_Rk-_XmIU4kaGbaF26ONvEHX5EQzLrqI,2260
95
- sglang/srt/layers/rotary_embedding.py,sha256=srAyHlM18hgEQKW7MotYKt1xGkqA_j1PU8g0PO5t3lA,42770
94
+ sglang/srt/layers/radix_attention.py,sha256=4xRq0w9yDfAVdNlBToQpmc7irq-pomJm-GlIfMtpYtk,2328
95
+ sglang/srt/layers/rotary_embedding.py,sha256=1nznPQ6EfVMDSRabKqifEE2xtMFwSri-kEepMaWdZeI,45340
96
96
  sglang/srt/layers/sampler.py,sha256=yipSyN5UWGwGS-BC-WzWMmelys4CCDtK_8b1OpaK6sM,11622
97
97
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
98
98
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
99
99
  sglang/srt/layers/attention/base_attn_backend.py,sha256=X_GIbQuU9njtUEGdUP7E_KRhmGxj3UyPHNESlL3QaQ8,3264
100
100
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
101
- sglang/srt/layers/attention/flashattention_backend.py,sha256=IwrJRmBFWLUdYqe7n4Dd2rX4zu9V55qBLjCdNQEmkd0,30306
101
+ sglang/srt/layers/attention/flashattention_backend.py,sha256=ORtcSJUDbV2qfKGkq9ohiy8JJ1SU9R2I5fSMizF4EhI,42572
102
102
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=3fxS2NQzCBw7h_gLxBjHcyDkf2quWqBxr_N01lYmfJo,45865
103
103
  sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=pnVhvVEK87iFW8gUb1G7X7c1tqro8R2DSEOFCnlV8Bo,30301
104
104
  sglang/srt/layers/attention/flashmla_backend.py,sha256=1RPFNtQOBw6BWxIjrzfJgA9Nx92udLbR-S5KXmqjxS8,10536
@@ -111,7 +111,7 @@ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=BXUY8
111
111
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=hbBvnhc2zqu-E3HNROVXyNOZbtDkVRuFus-yTjmE0Sg,13668
112
112
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
113
113
  sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
114
- sglang/srt/layers/moe/fused_moe_native.py,sha256=KF0J5W5biWYWbERUHAduQFnUV5om9VaYv0I9avsVfgU,4330
114
+ sglang/srt/layers/moe/fused_moe_native.py,sha256=bf0po921lY9xnlZivdJly0bGIYFlLqp5v8Mz7tG5bdg,4451
115
115
  sglang/srt/layers/moe/router.py,sha256=gvyK7hXlujfCZCmAIFc3oxfgjuAjzlpPe3mp1Blc6Y0,10419
116
116
  sglang/srt/layers/moe/topk.py,sha256=iUb-64CaNAUfvBZ1pkgsedcLRQs2sVSIzQ5300WmdXI,10242
117
117
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -119,8 +119,8 @@ sglang/srt/layers/moe/ep_moe/kernels.py,sha256=ijqRzS-tb0LGnDU5hW-g0JH104ppADrWa
119
119
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=1TmWnxv-bW1Qbgru-V-vGnt3ruuTIwHQy0Y5ZA_xzvE,36824
120
120
  sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=jnr6KSM8YooftTjZ3gYe0eWpOd1dmkXqk4hKRvLTwCo,19708
121
121
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
122
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=_ufAA7m6alppU4vVxh7QrgSgFOcBSekLQ-UQdHq7TmQ,53988
123
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJADUOBJoRHMIWmf-DU4,24100
122
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=sjAXnjUmLXPpvFFL4VShBce_9xygWY2twAQJ74OJ_ZQ,54500
123
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=43-UL9KEMoaiC0cRSzWFbg2PADtcoxfZqjZ6TOvQ7Vk,24551
124
124
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
125
125
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
126
126
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -132,6 +132,10 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
132
132
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=yf33YmWlVSjjyg0Q4OMAWvc9gjRxvttMrQBUEOfPl4I,4153
133
133
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ZWMClYN1moVRUP2f0hYac38di_pUgZggyl9d2D5rnoc,4136
134
134
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=C65Q2Mv1LxFQ_qDnv11IZ9nwl7sGZo72nWDflMttu4g,4147
135
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=FsWbV4Q6AzAtgegVuENBDz2ZcSJsqNiwUIVfQbpP7hQ,3244
136
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=pk6VZChh2Y0CsJSzjtUhOnlta1QLTUEWy33aKQU47XY,3244
137
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Gmk24hc5lVIfQtqSa5wLOcWKedMN8aZUe93DBh6J1AY,3249
138
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H200.json",sha256=uY_XMPomaXMXxIkTR4ctU_Ybri_jMv2VvCcV-f6O_bw,3255
135
139
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
136
140
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
137
141
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
@@ -139,6 +143,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
139
143
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=0ejgnIQ_mzJhKjQpnT-I1Vj9-rPfGlTcQ8u0cXgekUw,2746
140
144
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=lU4fjngJmpzOafi-3_q0vj2pLfZQVVagFnZNoI97etk,4128
141
145
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=ilUf76TmbtaL3W0bara6JIBEiV_iPIs29UjLbH_GYtc,4145
146
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=4ye1VakIIzrGUTxGNb_DM5v59djLgok8SeBeU_YI6Go,3252
142
147
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
143
148
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
144
149
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=OeJUCPVBD1z9CSooZpy3hRyAasjKqFAQaTLcWK6PWno,2741
@@ -152,6 +157,8 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
152
157
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
153
158
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
154
159
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=fRzKfVFIcnxqu6DvGJQNltuFRRGz8F-eaL73bIzBzo8,3255
160
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=zq1x3KwZ3YuzE42OPPsElXT_VS9sK135CdB1RsotLU4,3252
161
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=lJbEa7VsBVKL_iRoHniB-2nceMrPctkqtjKszPnX4pk,3248
155
162
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=RuUDK9XfgXs1eZESWQR9ba4tu-rCRG_UCYwjaJ568sI,3264
156
163
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json",sha256=wlCi9aoYp7Zc1GThEutvWDbse0kKnNaQgFJsd_L8be0,3259
157
164
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=tPYxeo_xUOkjQrZMdf9v4IaFrw0RGaZNLGLJPOhjE_g,3260
@@ -243,11 +250,11 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=CYT3ujh5ifonhqQc1uYSa6maJ
243
250
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
244
251
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
245
252
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
246
- sglang/srt/layers/quantization/__init__.py,sha256=TxMVIyn2PiP_xrZQugOmzrfZ7DEKEjdNOuFhDNBWBys,12107
253
+ sglang/srt/layers/quantization/__init__.py,sha256=yokDLpqQZ6eIIeaBZggJG-oS4h3TmroXZHLL40YykeM,12159
247
254
  sglang/srt/layers/quantization/awq.py,sha256=VImnVCU_QBLFba6S88T0dJ-vLy6SMm3OLIMEdllDfVI,6663
248
255
  sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
249
- sglang/srt/layers/quantization/blockwise_int8.py,sha256=BS4nmo18QaC3vyCe9Wwe0Te-7FmFUe-udl7xbhRaU0s,14887
250
- sglang/srt/layers/quantization/fp8.py,sha256=J9P6SwZ1PIb3dEJoP7X2Uw5VOtU35vU5jugkoYyaZC4,41081
256
+ sglang/srt/layers/quantization/blockwise_int8.py,sha256=yE8ARplbha1sW1Szl-mgsRDzGTRpEZY_zAKkCJIu680,15010
257
+ sglang/srt/layers/quantization/fp8.py,sha256=J5D_KdRYiOQ4NCbjoKfYDHdIgCGMy-tQwHlTiG44pJc,41189
251
258
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=JRalHJ-btDpzl3oXu2R_ZoJBu5TzBBmW_wKZDFs-usQ,24384
252
259
  sglang/srt/layers/quantization/fp8_utils.py,sha256=CDR2fLrZa_mZ86n5S2dDjYMpVCGa2n7gCXd2BYZjXcM,21391
253
260
  sglang/srt/layers/quantization/gptq.py,sha256=e4rMz374-yQQqeAI77WPxfcAaRk38GeN2akEpvnC_Do,15141
@@ -255,10 +262,10 @@ sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7Bettjg
255
262
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
256
263
  sglang/srt/layers/quantization/kv_cache.py,sha256=rJi6amyLZsquUMo_V5iLlPMqdsGTLgxh4popN1xUHCQ,4236
257
264
  sglang/srt/layers/quantization/modelopt_quant.py,sha256=mne4uKF0R-K0OvWN7X5ZxD4LdXKBc6GvmpZzIW6gkmM,6969
258
- sglang/srt/layers/quantization/moe_wna16.py,sha256=JlYCiy825MEMlBeQqqWwPuhoH_FbEcgTYlJUzrWj5IE,19248
265
+ sglang/srt/layers/quantization/moe_wna16.py,sha256=3Z8Eq4_ehTN5EEotlYC09FpUNmF8VO8uv7QzUqJa0QI,19371
259
266
  sglang/srt/layers/quantization/utils.py,sha256=QqGFwRnFenOm5HfyLoS4D06_LyvNWgOggAiFtZXTpQ4,5637
260
267
  sglang/srt/layers/quantization/w8a8_fp8.py,sha256=XcQdgqXA3eKbAf-4_0I81Y5Nvjns3bQTocovnN8141w,6234
261
- sglang/srt/layers/quantization/w8a8_int8.py,sha256=V5vxn0wmUL1szj38lsJOKeNNEvFHisU7hZZLO4FfoNc,8733
268
+ sglang/srt/layers/quantization/w8a8_int8.py,sha256=oLURfgMpsES8qLf0CIJ-4rfQgBGf452Lo0U6tvq6jH0,8856
262
269
  sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
263
270
  sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=ngKVSHfQUNSZzrLMu4Iv_4Fzt2eOoOIZKcO2RNDiwAM,25353
264
271
  sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=roqRrIJybA9YuN3kqSeoLTJhXfTHOOtJd5MkenpOL8E,25835
@@ -458,6 +465,7 @@ sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=wZs4HZhPov7yvV2VU2
458
465
  sglang/srt/managers/multimodal_processors/llava.py,sha256=8mac3vUUpVd12o43k1TyMaLEySZB915ks8Q5epeZmbg,6209
459
466
  sglang/srt/managers/multimodal_processors/minicpm.py,sha256=Mq-iH2j90VrGAbSaF3ayYWhTEm9RvWNI6ZhBb6G23dQ,5684
460
467
  sglang/srt/managers/multimodal_processors/mlama.py,sha256=MLiGS606LzVtdoXvjWGANx-K_7nE9J_fMVmkXN7Gz8k,1661
468
+ sglang/srt/managers/multimodal_processors/mllama4.py,sha256=K6OKhSZOoaHwrRt0ZVi3gi2vnzMVHWJb5n3fUoStwIs,6188
461
469
  sglang/srt/managers/multimodal_processors/qwen_vl.py,sha256=67EmFiAkvZncU-eqiiS0Q4dr3pWcfI-RofYiQnNWvu0,5722
462
470
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=NY62Zo0A0tLJ7ObRLOQqQcXCxoJUDZsK8f5U4dNQjKc,973
463
471
  sglang/srt/mem_cache/chunk_cache.py,sha256=it5SfL1FwMbrdeOH-I-Eu_i-I9hFB1xL-z_brIUoCkk,1835
@@ -470,7 +478,7 @@ sglang/srt/metrics/collector.py,sha256=aCxHqgsQ6P8ZxsAvq_MoEVsr3KUvIUSOBpGYMgBxm
470
478
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
471
479
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=bDLOqlxdwRUyKitG8JyZygnm05N00q-TdNiAayG_T8o,23223
472
480
  sglang/srt/model_executor/forward_batch_info.py,sha256=8VI1VxSmyH26lIHnCNeGqYw2XxslbqN_cuSUIEPUtRU,19468
473
- sglang/srt/model_executor/model_runner.py,sha256=f2e0Ts7eTy5_xCIp7q2If4Mi6Oi3nw45zyBPvNgR_-0,45006
481
+ sglang/srt/model_executor/model_runner.py,sha256=4Xi-1u1tTC34uK_DtYEaj7VtvPjDDgMzRaXeJ5kpsQE,45076
474
482
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
475
483
  sglang/srt/model_loader/loader.py,sha256=AUS4SqSFghbQjs29C65lg7_zxR9h1t7N5G0gERjc0Rc,54238
476
484
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -497,7 +505,8 @@ sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,
497
505
  sglang/srt/models/grok.py,sha256=pQOXtpHOYVntwt5QQRLffYsnMHmMfPMmGyKMfR0k0Ic,27994
498
506
  sglang/srt/models/internlm2.py,sha256=4eh9WVgK4yg13IsnH5qB2xUCWnixj_aLLz7qa_4m2_Q,13017
499
507
  sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
500
- sglang/srt/models/llama.py,sha256=FRdD-oCQl1M4hkYwIrWuECQ1tgxBq9rEIYOr-rlgswE,24616
508
+ sglang/srt/models/llama.py,sha256=gcl2YtnM54J_fZQx2Z26LMm7vPbWN7N1CjzlaBEA3zk,24893
509
+ sglang/srt/models/llama4.py,sha256=4WqHX6YPBrlJVA7HoQTMUfdoU_mEhpWSgoFaeKdhdCE,15018
501
510
  sglang/srt/models/llama_classification.py,sha256=4QWTFaUZIFKYZvEzs8bx8VkOZNIwdYCLrnwrdAw4QK0,3108
502
511
  sglang/srt/models/llama_eagle.py,sha256=OB2lKsjn7BcfCZljklnhk83me8j0PuQmYLou7baNcq4,4866
503
512
  sglang/srt/models/llama_eagle3.py,sha256=v3bftBVDIGjnzngQYnu19cy0J_3w7yruHqLP5nsAQDM,6642
@@ -513,6 +522,7 @@ sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,
513
522
  sglang/srt/models/mixtral.py,sha256=6Fse2J-20IMylP-yzpEihIinaH37TmmslATbLcWBRYY,14926
514
523
  sglang/srt/models/mixtral_quant.py,sha256=MSa6UKPbgv8Rn8Iv8o1dQhcstAHLNQzE0eepFx_hYSw,15221
515
524
  sglang/srt/models/mllama.py,sha256=SsK_cEolaeoXh_HkyXsSF2ueYR3sPv1NvnGH2k6Aqx0,38461
525
+ sglang/srt/models/mllama4.py,sha256=E2mCxJ1zCt6Io4LL4Rtt5uqMj7Jy971234ZcuyJZxSo,5800
516
526
  sglang/srt/models/olmo.py,sha256=FJk8A3T3TF5QcTV6rMP8np94QtvxpMWlgCsv_5VwpVE,12632
517
527
  sglang/srt/models/olmo2.py,sha256=U0ScFzWazOrb_Q90sfXkpVNAsXT-pgZbNgGh80R40VE,14288
518
528
  sglang/srt/models/olmoe.py,sha256=tx5OKWLOr6_pohe2eBcIodCmcuSjtpteHq_tG_QVYCY,15910
@@ -569,8 +579,8 @@ sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,
569
579
  sglang/test/test_utils.py,sha256=jUkIDxJ7I8hCPk0XF7F_IWJkOtn6O7eXJG5pI0cduwo,30463
570
580
  sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
571
581
  sglang/test/attention/test_flashattn_backend.py,sha256=OxS1KsPs19nwZcDtdURj7_liT1cIfEXb6W4FH9KMaaE,10808
572
- sglang-0.4.4.post4.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
573
- sglang-0.4.4.post4.dist-info/METADATA,sha256=gHc8ZNyeSnw_PrtGOQ7SAVJwCqonaOP4k0vADEpMqv4,25067
574
- sglang-0.4.4.post4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
575
- sglang-0.4.4.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
576
- sglang-0.4.4.post4.dist-info/RECORD,,
582
+ sglang-0.4.5.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
583
+ sglang-0.4.5.dist-info/METADATA,sha256=dFvXPJ-aE-juLKgxD5l8wflGgO1cHg2jHjScLX_Ftjw,25061
584
+ sglang-0.4.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
585
+ sglang-0.4.5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
586
+ sglang-0.4.5.dist-info/RECORD,,