sglang 0.4.6__py3-none-any.whl → 0.4.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. sglang/srt/disaggregation/decode.py +8 -2
  2. sglang/srt/disaggregation/fake/__init__.py +1 -0
  3. sglang/srt/disaggregation/fake/conn.py +88 -0
  4. sglang/srt/disaggregation/prefill.py +12 -3
  5. sglang/srt/disaggregation/utils.py +16 -2
  6. sglang/srt/entrypoints/engine.py +9 -0
  7. sglang/srt/entrypoints/http_server.py +27 -2
  8. sglang/srt/layers/attention/cutlass_mla_backend.py +278 -0
  9. sglang/srt/layers/attention/utils.py +1 -1
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  11. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H20.json +146 -0
  12. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H200.json +146 -0
  13. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  14. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20.json +146 -0
  15. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  16. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200.json +146 -0
  17. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  18. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20.json +146 -0
  19. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  20. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200.json +146 -0
  21. sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=96,device_name=NVIDIA_H20.json +146 -0
  22. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +2 -2
  23. sglang/srt/layers/moe/fused_moe_triton/layer.py +15 -17
  24. sglang/srt/layers/quantization/fp8.py +20 -22
  25. sglang/srt/layers/quantization/fp8_utils.py +2 -2
  26. sglang/srt/managers/schedule_batch.py +9 -0
  27. sglang/srt/managers/scheduler.py +10 -8
  28. sglang/srt/managers/scheduler_output_processor_mixin.py +25 -9
  29. sglang/srt/managers/tp_worker.py +3 -3
  30. sglang/srt/managers/tp_worker_overlap_thread.py +9 -4
  31. sglang/srt/model_executor/model_runner.py +8 -1
  32. sglang/srt/openai_api/adapter.py +32 -3
  33. sglang/srt/openai_api/protocol.py +2 -0
  34. sglang/srt/reasoning_parser.py +25 -1
  35. sglang/srt/server_args.py +16 -2
  36. sglang/srt/utils.py +3 -0
  37. sglang/test/send_one.py +84 -28
  38. sglang/test/test_utils.py +38 -0
  39. sglang/version.py +1 -1
  40. {sglang-0.4.6.dist-info → sglang-0.4.6.post1.dist-info}/METADATA +2 -2
  41. {sglang-0.4.6.dist-info → sglang-0.4.6.post1.dist-info}/RECORD +44 -29
  42. {sglang-0.4.6.dist-info → sglang-0.4.6.post1.dist-info}/WHEEL +0 -0
  43. {sglang-0.4.6.dist-info → sglang-0.4.6.post1.dist-info}/licenses/LICENSE +0 -0
  44. {sglang-0.4.6.dist-info → sglang-0.4.6.post1.dist-info}/top_level.txt +0 -0
sglang/test/send_one.py CHANGED
@@ -6,11 +6,56 @@ python3 -m sglang.test.send_one
6
6
  """
7
7
 
8
8
  import argparse
9
+ import dataclasses
9
10
  import json
10
11
 
11
12
  import requests
12
13
 
13
14
 
15
+ @dataclasses.dataclass
16
+ class BenchArgs:
17
+ host: str = "localhost"
18
+ port: int = 30000
19
+ batch_size: int = 1
20
+ temperature: float = 0.0
21
+ max_new_tokens: int = 512
22
+ frequency_penalty: float = 0.0
23
+ presence_penalty: float = 0.0
24
+ json: bool = False
25
+ return_logprob: bool = False
26
+ prompt: str = (
27
+ "Human: Give me a fully functional FastAPI server. Show the python code.\n\nAssistant:"
28
+ )
29
+ image: bool = False
30
+ stream: bool = False
31
+
32
+ @staticmethod
33
+ def add_cli_args(parser: argparse.ArgumentParser):
34
+ parser.add_argument("--host", type=str, default=BenchArgs.host)
35
+ parser.add_argument("--port", type=int, default=BenchArgs.port)
36
+ parser.add_argument("--batch-size", type=int, default=BenchArgs.batch_size)
37
+ parser.add_argument("--temperature", type=float, default=BenchArgs.temperature)
38
+ parser.add_argument(
39
+ "--max-new-tokens", type=int, default=BenchArgs.max_new_tokens
40
+ )
41
+ parser.add_argument(
42
+ "--frequency-penalty", type=float, default=BenchArgs.frequency_penalty
43
+ )
44
+ parser.add_argument(
45
+ "--presence-penalty", type=float, default=BenchArgs.presence_penalty
46
+ )
47
+ parser.add_argument("--json", action="store_true")
48
+ parser.add_argument("--return-logprob", action="store_true")
49
+ parser.add_argument("--prompt", type=str, default=BenchArgs.prompt)
50
+ parser.add_argument("--image", action="store_true")
51
+ parser.add_argument("--stream", action="store_true")
52
+
53
+ @classmethod
54
+ def from_cli_args(cls, args: argparse.Namespace):
55
+ attrs = [attr.name for attr in dataclasses.fields(cls)]
56
+ return cls(**{attr: getattr(args, attr) for attr in attrs})
57
+
58
+
14
59
  def send_one_prompt(args):
15
60
  if args.image:
16
61
  args.prompt = (
@@ -20,20 +65,42 @@ def send_one_prompt(args):
20
65
  else:
21
66
  image_data = None
22
67
 
23
- response = requests.post(
24
- "http://localhost:30000/generate",
25
- json={
26
- "text": args.prompt,
27
- "image_data": image_data,
28
- "sampling_params": {
29
- "temperature": args.temperature,
30
- "max_new_tokens": args.max_new_tokens,
31
- "frequency_penalty": args.frequency_penalty,
32
- "presence_penalty": args.presence_penalty,
33
- },
34
- "return_logprob": args.return_logprob,
35
- "stream": args.stream,
68
+ prompt = args.prompt
69
+
70
+ if args.json:
71
+ prompt = (
72
+ "Human: What is the capital of France and how is that city like. "
73
+ "Give me 3 trivial information about that city. "
74
+ "Write in a format of json.\nAssistant:"
75
+ )
76
+ json_schema = "$$ANY$$"
77
+ json_schema = (
78
+ '{"type": "object", "properties": {"population": {"type": "integer"}}}'
79
+ )
80
+ else:
81
+ json_schema = None
82
+
83
+ if args.batch_size > 1:
84
+ prompt = [prompt] * args.batch_size
85
+
86
+ json_data = {
87
+ "text": prompt,
88
+ "image_data": image_data,
89
+ "sampling_params": {
90
+ "temperature": args.temperature,
91
+ "max_new_tokens": args.max_new_tokens,
92
+ "frequency_penalty": args.frequency_penalty,
93
+ "presence_penalty": args.presence_penalty,
94
+ "json_schema": json_schema,
95
+ "stop": ["Question", "Assistant:", "<|separator|>", "<|eos|>"],
36
96
  },
97
+ "return_logprob": args.return_logprob,
98
+ "stream": args.stream,
99
+ }
100
+
101
+ response = requests.post(
102
+ f"http://{args.host}:{args.port}/generate",
103
+ json=json_data,
37
104
  stream=args.stream,
38
105
  )
39
106
 
@@ -47,6 +114,9 @@ def send_one_prompt(args):
47
114
  else:
48
115
  ret = response.json()
49
116
 
117
+ if args.batch_size > 1:
118
+ ret = ret[0]
119
+
50
120
  latency = ret["meta_info"]["e2e_latency"]
51
121
 
52
122
  if "spec_verify_ct" in ret["meta_info"]:
@@ -68,21 +138,7 @@ def send_one_prompt(args):
68
138
 
69
139
  if __name__ == "__main__":
70
140
  parser = argparse.ArgumentParser()
71
- parser.add_argument("--temperature", type=float, default=0.0)
72
- parser.add_argument("--max-new-tokens", type=int, default=512)
73
- parser.add_argument("--frequency-penalty", type=float, default=0.0)
74
- parser.add_argument("--presence-penalty", type=float, default=0.0)
75
- parser.add_argument("--return-logprob", action="store_true")
76
- parser.add_argument(
77
- "--prompt",
78
- type=str,
79
- default="Human: Give me a fully functional FastAPI server. Show the python code.\n\nAssistant:",
80
- )
81
- parser.add_argument(
82
- "--image",
83
- action="store_true",
84
- )
85
- parser.add_argument("--stream", action="store_true")
141
+ BenchArgs.add_cli_args(parser)
86
142
  args = parser.parse_args()
87
143
 
88
144
  send_one_prompt(args)
sglang/test/test_utils.py CHANGED
@@ -732,6 +732,44 @@ def run_bench_one_batch(model, other_args):
732
732
  return output_throughput
733
733
 
734
734
 
735
+ def run_bench_offline_throughput(model, other_args):
736
+ command = [
737
+ "python3",
738
+ "-m",
739
+ "sglang.bench_offline_throughput",
740
+ "--num-prompts",
741
+ "1",
742
+ "--dataset-name",
743
+ "random",
744
+ "--random-input-len",
745
+ "256",
746
+ "--random-output-len",
747
+ "256",
748
+ "--model-path",
749
+ model,
750
+ *[str(x) for x in other_args],
751
+ ]
752
+
753
+ print(f"{command=}")
754
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
755
+
756
+ try:
757
+ stdout, stderr = process.communicate()
758
+ output = stdout.decode()
759
+ error = stderr.decode()
760
+ print(f"Output: {output}", flush=True)
761
+ print(f"Error: {error}", flush=True)
762
+
763
+ output_throughput = -1
764
+ for line in output.split("\n"):
765
+ if "Last generation throughput (tok/s):" in line:
766
+ output_throughput = float(line.split(":")[-1])
767
+ finally:
768
+ kill_process_tree(process.pid)
769
+
770
+ return output_throughput
771
+
772
+
735
773
  def lcs(X, Y):
736
774
  m = len(X)
737
775
  n = len(Y)
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.6"
1
+ __version__ = "0.4.6.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.4.6
3
+ Version: 0.4.6.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -245,7 +245,7 @@ Requires-Dist: uvloop; extra == "runtime-common"
245
245
  Requires-Dist: xgrammar==0.1.17; extra == "runtime-common"
246
246
  Provides-Extra: srt
247
247
  Requires-Dist: sglang[runtime_common]; extra == "srt"
248
- Requires-Dist: sgl-kernel==0.0.9.post2; extra == "srt"
248
+ Requires-Dist: sgl-kernel==0.1.0; extra == "srt"
249
249
  Requires-Dist: flashinfer_python==0.2.3; extra == "srt"
250
250
  Requires-Dist: torch==2.6.0; extra == "srt"
251
251
  Requires-Dist: torchvision==0.21.0; extra == "srt"
@@ -10,7 +10,7 @@ sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
12
12
  sglang/utils.py,sha256=GIcgiRHkZ-gyPxXOdn1qFF41jkg4-YdDxbPc4mzO-qk,16159
13
- sglang/version.py,sha256=bbBpXE_PBbo_SaI807mDML0QJywD0_ufCDPgAMlDHaE,22
13
+ sglang/version.py,sha256=mb7cZWFtBTYPgotnX_1oAZadFITLHrAXwTSs2Eb1dvU,28
14
14
  sglang/lang/chat_template.py,sha256=MwNL5dNTe8g_l2ljZubnrazEgT2xEv-9O2D0Ezwxy4I,19658
15
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
16
16
  sglang/lang/compiler.py,sha256=MAuzoOOpb98njJ7Io2SDmFkhTroDYiq0te0ZpfHkMY4,7597
@@ -34,10 +34,10 @@ sglang/srt/hf_transformers_utils.py,sha256=N2f-gA8yUq-UP_TJT276gNbDNzmddWsmWnq3p
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
35
35
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
36
36
  sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
37
- sglang/srt/reasoning_parser.py,sha256=JnaEVW0KG1yJpn9uxmrjwErb9imzni05QDFjExryoqM,5584
38
- sglang/srt/server_args.py,sha256=ojKNFeiZ1W7nQScImdlG4xuHVR1yQdugwrlHj30WVK8,54804
37
+ sglang/srt/reasoning_parser.py,sha256=8AMk3XI8mfvz4AUuRHf_pNYpM_Mr64uT9EZ3o90cqQ8,6341
38
+ sglang/srt/server_args.py,sha256=jRHEskSyfmHbCnyqRzp3deI5HizDenDiyLjF65ZUqvg,55149
39
39
  sglang/srt/torch_memory_saver_adapter.py,sha256=HYlgYJ2sgmjs2RSjU2KbCaXijRg3mTDZ0ZcCB5Bt6Ps,2211
40
- sglang/srt/utils.py,sha256=FsLjbxNf1K1hmQTTpQ7PyZxze0m1nfyKfNhngfC8atU,62944
40
+ sglang/srt/utils.py,sha256=u_YB-FXi3AY6mhRmk8wdPcKKAo1sZY0bMUgnjq8BtJI,63033
41
41
  sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
42
42
  sglang/srt/configs/__init__.py,sha256=vulncVn70WqIT6s0HaB8p_Q6FjOiaLwNZWpoJS9FIuQ,399
43
43
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -64,12 +64,14 @@ sglang/srt/constrained/outlines_jump_forward.py,sha256=Gyubp-FVetxd6wP4FA_kD6cCX
64
64
  sglang/srt/constrained/reasoner_grammar_backend.py,sha256=XFxdZqvPofmtCeIMqR10NOyph06HwbdXfiVI8rIoV5s,3646
65
65
  sglang/srt/constrained/xgrammar_backend.py,sha256=oc3BTTe8mB5Szv_O-5nZzWbKEKFb22oUniqTUZhewLQ,7409
66
66
  sglang/srt/constrained/triton_ops/bitmask_ops.py,sha256=WjTen9iuuFWLzkE1mAHQZB9_7aIy5QH8Wjf-lB-Fams,4614
67
- sglang/srt/disaggregation/decode.py,sha256=G9iYiCGEyI-bFFuLQhgsEN75bzhYtb--QCvlzbbSm1o,24884
67
+ sglang/srt/disaggregation/decode.py,sha256=nSHCBfEtD3a6c2a7XPAcCh4c0jw3BLG9EL-L3LlW_V0,25139
68
68
  sglang/srt/disaggregation/mini_lb.py,sha256=zyJo20GI6G1ZIoBVY3ltcr0dDcH5qOJrtMfiGKGnBLI,10959
69
- sglang/srt/disaggregation/prefill.py,sha256=VE0t6SasG_4QNeEpBkNbwicEpR-SyEsyYSMPROERH2k,15100
70
- sglang/srt/disaggregation/utils.py,sha256=6ackrcitNF94ur9bRvPerF29E7g8uU_0a4-nSdw1EMs,3078
69
+ sglang/srt/disaggregation/prefill.py,sha256=4wLYQtPMbKWMQvF3mGnvr8ygd9xRLO9zTwLKeM5BIf8,15424
70
+ sglang/srt/disaggregation/utils.py,sha256=7gO734GOr4u03qwOf2UvFsfj4n-I37iyzQh7lFKbJRk,3501
71
71
  sglang/srt/disaggregation/base/__init__.py,sha256=KR8xXoRCDAy2U623mfP6ujXu42m1_F9EiudjrKu2I_A,130
72
72
  sglang/srt/disaggregation/base/conn.py,sha256=gpf32bhYXWm_iaYB6WcrDaJ-UoL1ZzPI_xpi5pMhRQo,2443
73
+ sglang/srt/disaggregation/fake/__init__.py,sha256=zmfeKYXjonRhfFOck1c_mP7Q4cW5G0f1RsTwRivKu0s,47
74
+ sglang/srt/disaggregation/fake/conn.py,sha256=DKEVBgmzV3CNzZ0-r7rFV4orue2iP_7apEtgn-fcTEA,2552
73
75
  sglang/srt/disaggregation/mooncake/__init__.py,sha256=1vacEHmWjf7zgbMPzsXKB08FqNKNCquJdUiDlO41BOk,122
74
76
  sglang/srt/disaggregation/mooncake/conn.py,sha256=DQ_PTxq_nZHFZ4LxHDhCIvQFPA1xUbaw1Sleyqhkq6U,28224
75
77
  sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=MxDAB9ZetRF1pFS2LP3FVHPtQ1HjIt_SK3UMaYHZ94o,2604
@@ -88,8 +90,8 @@ sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6
88
90
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
89
91
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
90
92
  sglang/srt/entrypoints/EngineBase.py,sha256=xoyvp6XAeDLY2_Q2Ng33H-fRhrXHv2ldJJKd-HuDhqE,1870
91
- sglang/srt/entrypoints/engine.py,sha256=-qHfYFDremLj8GhwQa7GuWHQKop7Tyq2HvmBXULtCos,22282
92
- sglang/srt/entrypoints/http_server.py,sha256=4fbn2KfUs2fjSvDulnhGfcEGA5a9ahUEre59TazwVA4,28508
93
+ sglang/srt/entrypoints/engine.py,sha256=e5BPBIewPTQpYlk1c2eC4C_xyQgJ3mgNEm2Sg3GfV2s,22518
94
+ sglang/srt/entrypoints/http_server.py,sha256=vvyvCosUp5aTFlD8k4IyZDzj2yXQIsndhPkTl4u1nGc,29573
93
95
  sglang/srt/entrypoints/http_server_engine.py,sha256=ihA6y3GXRs28Y9U3SgdQcJQjnw_SVIby7QrVgiafX04,4846
94
96
  sglang/srt/entrypoints/verl_engine.py,sha256=XLYdwTwhH0jTjw8xczgZXWfBXMRb_ur2bg4TN0dTwfI,6975
95
97
  sglang/srt/layers/activation.py,sha256=oSkdo8B8najXFcVay3Y__CEvgXh87lAIhG0CMp2Ugqs,5954
@@ -106,6 +108,7 @@ sglang/srt/layers/sampler.py,sha256=PNgMXm2vsNsE6Rt89R5GLDC3lDxdIujoWli8F3vldng,
106
108
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
107
109
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=QUxd4sELx6p3dHvEKmccPZ-phdd_9EjNdwjH3SJ9zxI,22238
108
110
  sglang/srt/layers/attention/base_attn_backend.py,sha256=lGujcYJ_CxHJy0Q9L6Phn3ds-nBGMy0OGj3R54R65iQ,3334
111
+ sglang/srt/layers/attention/cutlass_mla_backend.py,sha256=kqtTVCIgDhcW5y9iWP8xcwGPuev-V5ipAUG-Ae3ot7g,9883
109
112
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
110
113
  sglang/srt/layers/attention/flashattention_backend.py,sha256=ysJt9pJ8pg_kVxvVUTvUL22-O7ABHCenLGGcqCotD6A,83206
111
114
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=YtMTgMhxxNrAbSoWTPJczgY4SR3WjnAPXPoJ2d5PUZY,46394
@@ -113,7 +116,7 @@ sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=pnVhvVEK87iFW8gUb1G
113
116
  sglang/srt/layers/attention/flashmla_backend.py,sha256=IyE4w7GcNOxjjy3mQeuAMjPtBNvI-6JkoxvBlFxFvec,10270
114
117
  sglang/srt/layers/attention/torch_native_backend.py,sha256=K5hUqBgakk2COSQqsaxWs0yEVOHS-7BlOygZTOeI8kE,9444
115
118
  sglang/srt/layers/attention/triton_backend.py,sha256=oEEiUwHbm4rNw5ExbQ2c3n0TwAgkk77yuLFenj9bHOo,26902
116
- sglang/srt/layers/attention/utils.py,sha256=J9mA-cbZT3uTlaKXo0HEAaeMei_TS2o4McTna9LVDCE,2750
119
+ sglang/srt/layers/attention/utils.py,sha256=rxB4sbNIHDTges78bDbnpd_hUgtyb3e16wUwgI4WmoU,2751
117
120
  sglang/srt/layers/attention/vision.py,sha256=CtFU1wyz5191LcuyDzGJ01mB-mM-upPj2pXg4DO6wh4,11985
118
121
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=DPu_aCPgwPqKWZPEQmp_xA7MPbpV2ip-MEICCB470Ao,19120
119
122
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=vsDZZ5QGb8-KBzADgKshnVQbsW8zRJF1h5hgdPGW5lU,31124
@@ -128,8 +131,8 @@ sglang/srt/layers/moe/ep_moe/kernels.py,sha256=ijqRzS-tb0LGnDU5hW-g0JH104ppADrWa
128
131
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=SZ0shPwgDp7xj-TCv9bfg5O7f2AXjF6xmBP5xkZ0Ips,36440
129
132
  sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=zQV7Qr-Zrcr3D3efVvZepRQM02bj5djHPsijPssavk8,20430
130
133
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
131
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=bgKelgR3IPgdK4ihasWL4EaFi_z4mVnOKDeMLAio8hc,55951
132
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=XofcceHnkmqhVczEIVqGSbsxps5LiYTGcaCk1Say9YM,24951
134
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=13ygSeBoRkiqsERSHOIbIxLplVsSl-SUT6JxYPB-ViM,55968
135
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=BMOV76fabrZcoyDmRpRbH11Jc0ogWH2k2QAQwvZIpgI,25084
133
136
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
134
137
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
135
138
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -141,7 +144,19 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=XofcceHnkmqhVczEIVqGSbsxp
141
144
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=yf33YmWlVSjjyg0Q4OMAWvc9gjRxvttMrQBUEOfPl4I,4153
142
145
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ZWMClYN1moVRUP2f0hYac38di_pUgZggyl9d2D5rnoc,4136
143
146
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=C65Q2Mv1LxFQ_qDnv11IZ9nwl7sGZo72nWDflMttu4g,4147
147
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I3k416HbXU_rYb8scD8gAI4fuBlElHl06PM347Qa11w,3253
148
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H20.json",sha256=RgV8C4F1LO09h01YsgF_eqX6GNoBtC7ulPfJRUUbg_g,3241
149
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=192,device_name=NVIDIA_H200.json",sha256=nsNEuDNks0tVLfQfIm7xxFwEeptTfQcoa9fJy0NS8xQ,3247
150
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qbqjisJ4oKmcYzumHPRk5UyOzsdi8J6xas82UWHMeAI,3263
151
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H20.json",sha256=vS2DRIDOqWyiBvbG6H746ownfkD1F8Aj2YZ0ET9xll8,3232
152
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=1n5XyZZ5sKAi-Z1duWOhLUfr6gkvnOpvxfbqIT6iU_4,3265
153
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=384,device_name=NVIDIA_H200.json",sha256=xqhl748it8GV2KXX0XixitE_ywnsKksqK8AGL7tAgT8,3254
144
154
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=FsWbV4Q6AzAtgegVuENBDz2ZcSJsqNiwUIVfQbpP7hQ,3244
155
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=IuvyC8TNhCVAmUZfLSoETsyCKsmejKXrs_0zuwFLPAU,3265
156
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H20.json",sha256=10Ntu2aVD5vGLonx-jW0qNw-tgZWdZmzMGx7utDVeng,3237
157
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=pdQ1RvXvdWDn8Y8-8MAX3vn-T-wbtkZvHV9GZZvNjnc,3266
158
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=768,device_name=NVIDIA_H200.json",sha256=JraM-Nvbg5V_TJkSl6UPFYZN1zHHoIbr2pAcksenoTY,3248
159
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=128,N=96,device_name=NVIDIA_H20.json",sha256=JtcHRlPz8xQEAqJ9EWI63oYvdmjQFG6VTHqtt85VOSA,3221
145
160
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=pk6VZChh2Y0CsJSzjtUhOnlta1QLTUEWy33aKQU47XY,3244
146
161
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Gmk24hc5lVIfQtqSa5wLOcWKedMN8aZUe93DBh6J1AY,3249
147
162
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=1024,device_name=NVIDIA_H200.json",sha256=uY_XMPomaXMXxIkTR4ctU_Ybri_jMv2VvCcV-f6O_bw,3255
@@ -270,9 +285,9 @@ sglang/srt/layers/quantization/awq.py,sha256=KemDG55U3B6YZVjMV71awVAIj0islFvtxcU
270
285
  sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScLFjB5_sywFlAfE5J0,4776
271
286
  sglang/srt/layers/quantization/blockwise_int8.py,sha256=cu9-JiCZDfMfvB97Kv_-eEG87VX5bRFIllFkzpO_xIg,15122
272
287
  sglang/srt/layers/quantization/deep_gemm.py,sha256=UFzsd0iiqVTBo0Ow_6ylVVFK8B9EUWTNQQYGvsNfm2s,13129
273
- sglang/srt/layers/quantization/fp8.py,sha256=Niu89OfZd4pIpkwZ1zd2Hrlffx0c5L5zkix6Lzi0Vys,40970
288
+ sglang/srt/layers/quantization/fp8.py,sha256=da-6ji_HBISKwIgMMX-JGlDKMLi-qL9j2XLer5cFAsU,40945
274
289
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=C2_hOLRO27-Yvjy-Nm2niehD2gWSMuP6TnNX07ESqh4,32018
275
- sglang/srt/layers/quantization/fp8_utils.py,sha256=1AmKsAFlpPcwbqpFR6P2CaCOvpp8dYY7lzXLoKjVikE,18631
290
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=vqH-bMb2DD0A7Y7hZjN-TGTg5h6aJ-cLW9H2adyZzqk,18651
276
291
  sglang/srt/layers/quantization/gptq.py,sha256=gyGMOPXHzozK7pPWSjKgLdFX9h7MCEww7n8FqEVEVac,15364
277
292
  sglang/srt/layers/quantization/int8_kernel.py,sha256=CR-VuTTR4GYluOZTpS5mmEz3hYrsY4GOX-G-h3XAYKc,12163
278
293
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
@@ -464,14 +479,14 @@ sglang/srt/managers/expert_distribution.py,sha256=r3o5RGI0gnV7xb60AApqKYa0oiSB37
464
479
  sglang/srt/managers/io_struct.py,sha256=9mdBGOkblguT1x6Ds9wL3j0MWAQiUQVdVRL4a7IUnA4,31631
465
480
  sglang/srt/managers/mm_utils.py,sha256=JTu5B7jZWTtZi8LCpVa6ITvSToxcuf5PDbb3FJC9M6o,18089
466
481
  sglang/srt/managers/multimodal_processor.py,sha256=XlRYvNhF6XOssreRX9DZPhLSpps_VE62gSKw3EGdNPo,2088
467
- sglang/srt/managers/schedule_batch.py,sha256=NXeAXZVvXcB8ApByiDLNaFww3aNQl_rZDxQKujhD5uo,63553
482
+ sglang/srt/managers/schedule_batch.py,sha256=zUQGVjLbi9TK5tfyzHNMSAnPeNeFi9GFI2AC8Fr2pbo,63824
468
483
  sglang/srt/managers/schedule_policy.py,sha256=E1qVq2G3jptKdX9nlqfayeRBUll9xB6bK8nBf3EW32E,19469
469
- sglang/srt/managers/scheduler.py,sha256=3EJ__bAck1xF8GWR6hFlt0HU7VwHMO7uM2eXKHOf3KI,80627
470
- sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=u2sj6MViFTov0lVZSysZ-wph2pEqRCtCjwA1UdttZ7I,26338
484
+ sglang/srt/managers/scheduler.py,sha256=7o03npmnu775d1DRDAkTJjl8OuJlE_xuM3BQji6BYLI,80808
485
+ sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=GxdkTR24_P_2C3ib0dc7Xqklrz8SiHtUTlM0c7AlKlk,26754
471
486
  sglang/srt/managers/session_controller.py,sha256=o-ifit0n4_xHLNmyD0Ams8FxGRgxFybX-Vz1hwgr3UQ,5755
472
487
  sglang/srt/managers/tokenizer_manager.py,sha256=4l4PAvfQrJqlYADQbl7cgpLhBBY52pzI5AzRYIzAjLs,50693
473
- sglang/srt/managers/tp_worker.py,sha256=OV0Zc-oK-HXSWO6x26ItDEf-qB2IqyJq2z0TFuBZz0s,8970
474
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=3_ZJ8Rq7v2ZDaRNTRu5Dy8AbqiAlJQp3IAKnn_WAwd8,9127
488
+ sglang/srt/managers/tp_worker.py,sha256=LhbhCovDvab6Cx4faR88s4q_3D-Di9s5sKCndsDxF9E,8966
489
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=lkG_yN6_UEv5mhmZ7cKP7_A5sIVMQw1GPwkqM91EWSE,9304
475
490
  sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
476
491
  sglang/srt/managers/multimodal_processors/base_processor.py,sha256=ata9H6Ry4QfqBoA_g0auG0sMnKfGrlZn74lM77ihtiA,10172
477
492
  sglang/srt/managers/multimodal_processors/clip.py,sha256=lRc2mcuDbAhZVf-0EfkO81pqDiol9zLvTpDqtPIBQ2k,1525
@@ -494,7 +509,7 @@ sglang/srt/metrics/collector.py,sha256=zHg4twFQJvuK1mSme3-EYQa9PJryfp_u7a4RxQ5Rc
494
509
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
495
510
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=iFryO9dglpnFCoNWxZqKdUhQycT8In29C0kIba3G1Dw,23687
496
511
  sglang/srt/model_executor/forward_batch_info.py,sha256=T9B5vWaJwlKUH0fQTPe3XdbkTYEUI6iKxBxUHs-cAMM,26632
497
- sglang/srt/model_executor/model_runner.py,sha256=Eei9JGCX-SQaTF1T4iBS0zg4UPdyjQ67UkKwgbK_200,46136
512
+ sglang/srt/model_executor/model_runner.py,sha256=O4vKZ4c-u69ZeKPBjAfiunvtnQHskZVmbUUK4fKFb5E,46417
498
513
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
499
514
  sglang/srt/model_loader/loader.py,sha256=YYmtvkQw0B1qgPw0_gN-K4yy7CEYbTSR__0Dl1Fnm6k,55342
500
515
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -561,8 +576,8 @@ sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-K
561
576
  sglang/srt/models/xverse.py,sha256=DsNVI9JpzN4jj0Ry6aTrj7r-xq5YLOoDX2kH4YLJA-I,14035
562
577
  sglang/srt/models/xverse_moe.py,sha256=7KCM2-j12towDMNvXkuuYiBOmNauH6NG4Ip40x0khqA,16782
563
578
  sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
564
- sglang/srt/openai_api/adapter.py,sha256=MMHLsz6Thepwtr4rtOlYwOlaKk212vgpC0p5nmqGt5U,75550
565
- sglang/srt/openai_api/protocol.py,sha256=1IVfYt0LUNLqFz0DNZyqxU02IRYVHcL7jnpMG9bNq34,13532
579
+ sglang/srt/openai_api/adapter.py,sha256=7WMplmT0SWJXo5F8s1s3Q9_6WV_cTscMS1Bodbl9Xes,76746
580
+ sglang/srt/openai_api/protocol.py,sha256=8Iu4t9JlH99QggKl55PYQWTW81u5mpOj0aA-bs44A_c,13621
566
581
  sglang/srt/platforms/interface.py,sha256=hym3iooBB4C8if5hDZezgVN6h4NIOu7sg2ZUBIV6XmM,11246
567
582
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
568
583
  sglang/srt/sampling/sampling_batch_info.py,sha256=4LCowU2bk0TOSfIGpEy90N1SpTsiOKK8Rx1ZYcklUFQ,11988
@@ -582,7 +597,7 @@ sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw
582
597
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
583
598
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
584
599
  sglang/test/runners.py,sha256=vSOl38rVDR3l2ezVCs672vE-LcOA2rJHjlkhLgEjcz8,30260
585
- sglang/test/send_one.py,sha256=6FhbJ3c8RpXxvFTELRXaF97GpT7zXXsCDYZh1DqG22E,2550
600
+ sglang/test/send_one.py,sha256=_l72sRfuXRUldyD3PD63hg_WxNvvhW5unNnbe4XuAwk,4380
586
601
  sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
587
602
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
588
603
  sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
@@ -596,13 +611,13 @@ sglang/test/test_custom_ops.py,sha256=2bSo9P5_rJZYFq8Y8IKRimDfFyZZGJluhL7Ngny0Pf
596
611
  sglang/test/test_dynamic_grad_mode.py,sha256=L76yUCuk_ymNpXD2CmO8r2GiGjIvD_gtTsuFDs2NolI,1638
597
612
  sglang/test/test_layernorm.py,sha256=2GMWqqNDuGvSMSsEBF5eDCzwVSYA9E6hGhRo6s4ecKg,3764
598
613
  sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
599
- sglang/test/test_utils.py,sha256=OPTl_PKVlHitrRJW5DFClpQA6xN6FvxatiOpgngCuJU,31153
614
+ sglang/test/test_utils.py,sha256=1U4Jtx_oz_UtS3SSJdqGuh3ujnj2g8pZjN5MYsbsBwI,32164
600
615
  sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
601
616
  sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
602
617
  sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
603
618
  sglang/test/attention/test_prefix_chunk_info.py,sha256=er0i3KGHMkw-4UZB1GCFd4oYwRcXfU5wpO1ORqpNGGA,7626
604
- sglang-0.4.6.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
605
- sglang-0.4.6.dist-info/METADATA,sha256=ITL_ELbps-k7hM4JnrS_YQCgwSO1glI5-dE9kX0Hft0,25361
606
- sglang-0.4.6.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
607
- sglang-0.4.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
608
- sglang-0.4.6.dist-info/RECORD,,
619
+ sglang-0.4.6.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
620
+ sglang-0.4.6.post1.dist-info/METADATA,sha256=UTh1TF2jiAdQunwLv7_bmww5_18c4uD7FCaeO-Z3gAs,25361
621
+ sglang-0.4.6.post1.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
622
+ sglang-0.4.6.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
623
+ sglang-0.4.6.post1.dist-info/RECORD,,