sglang 0.4.1.post3__py3-none-any.whl → 0.4.1.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sglang/bench_one_batch.py +2 -0
  2. sglang/srt/layers/attention/__init__.py +14 -5
  3. sglang/srt/layers/attention/double_sparsity_backend.py +0 -52
  4. sglang/srt/layers/attention/flashinfer_backend.py +211 -81
  5. sglang/srt/layers/attention/torch_native_backend.py +1 -38
  6. sglang/srt/layers/attention/triton_backend.py +20 -11
  7. sglang/srt/layers/attention/triton_ops/decode_attention.py +4 -0
  8. sglang/srt/layers/logits_processor.py +167 -212
  9. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200.json +146 -0
  11. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  12. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200.json +146 -0
  13. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  14. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200.json +146 -0
  15. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  16. sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json +146 -0
  17. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  18. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json +146 -0
  19. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  20. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200.json +146 -0
  21. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  22. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json +146 -0
  23. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  24. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json +146 -0
  25. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  26. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json +146 -0
  27. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  28. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json +146 -0
  29. sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  30. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +187 -29
  31. sglang/srt/layers/moe/fused_moe_triton/layer.py +14 -6
  32. sglang/srt/layers/quantization/fp8.py +2 -2
  33. sglang/srt/layers/sampler.py +57 -21
  34. sglang/srt/layers/torchao_utils.py +17 -3
  35. sglang/srt/managers/io_struct.py +1 -2
  36. sglang/srt/managers/schedule_batch.py +26 -2
  37. sglang/srt/managers/schedule_policy.py +159 -90
  38. sglang/srt/managers/scheduler.py +62 -26
  39. sglang/srt/managers/tokenizer_manager.py +22 -20
  40. sglang/srt/managers/tp_worker.py +16 -4
  41. sglang/srt/managers/tp_worker_overlap_thread.py +3 -4
  42. sglang/srt/model_executor/cuda_graph_runner.py +118 -73
  43. sglang/srt/model_executor/forward_batch_info.py +33 -8
  44. sglang/srt/model_executor/model_runner.py +63 -61
  45. sglang/srt/models/deepseek_v2.py +34 -7
  46. sglang/srt/models/grok.py +97 -26
  47. sglang/srt/openai_api/adapter.py +0 -17
  48. sglang/srt/openai_api/protocol.py +3 -3
  49. sglang/srt/sampling/sampling_batch_info.py +21 -0
  50. sglang/srt/sampling/sampling_params.py +9 -1
  51. sglang/srt/server.py +9 -5
  52. sglang/srt/server_args.py +108 -57
  53. sglang/srt/speculative/build_eagle_tree.py +347 -0
  54. sglang/srt/speculative/eagle_utils.py +618 -0
  55. sglang/srt/speculative/eagle_worker.py +170 -0
  56. sglang/srt/speculative/spec_info.py +5 -0
  57. sglang/srt/utils.py +15 -2
  58. sglang/version.py +1 -1
  59. {sglang-0.4.1.post3.dist-info → sglang-0.4.1.post4.dist-info}/METADATA +9 -8
  60. {sglang-0.4.1.post3.dist-info → sglang-0.4.1.post4.dist-info}/RECORD +63 -39
  61. {sglang-0.4.1.post3.dist-info → sglang-0.4.1.post4.dist-info}/WHEEL +1 -1
  62. {sglang-0.4.1.post3.dist-info → sglang-0.4.1.post4.dist-info}/LICENSE +0 -0
  63. {sglang-0.4.1.post3.dist-info → sglang-0.4.1.post4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
1
+ from typing import List, Optional, Union
2
+
3
+ import torch
4
+
5
+ from sglang.srt.layers.logits_processor import LogitsProcessorOutput
6
+ from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
7
+ from sglang.srt.managers.tp_worker import TpModelWorker
8
+ from sglang.srt.model_executor.forward_batch_info import (
9
+ CaptureHiddenMode,
10
+ ForwardBatch,
11
+ ForwardMode,
12
+ )
13
+ from sglang.srt.model_executor.model_runner import ModelRunner
14
+ from sglang.srt.server_args import ServerArgs
15
+ from sglang.srt.speculative.eagle_utils import EAGLEDraftInput
16
+
17
+
18
+ class EAGLEWorker(TpModelWorker):
19
+
20
+ def __init__(
21
+ self,
22
+ server_args: ServerArgs,
23
+ gpu_id: int,
24
+ tp_rank: int,
25
+ dp_rank: Optional[int],
26
+ nccl_port: int,
27
+ target_worker: TpModelWorker,
28
+ ):
29
+ # Do not capture cuda graph in `super().__init__()`
30
+ # We will capture it later
31
+ backup_disable_cuda_graph = server_args.disable_cuda_graph
32
+ server_args.disable_cuda_graph = True
33
+ super().__init__(
34
+ gpu_id=gpu_id,
35
+ tp_rank=tp_rank,
36
+ server_args=server_args,
37
+ nccl_port=nccl_port,
38
+ dp_rank=dp_rank,
39
+ is_draft_worker=True,
40
+ )
41
+ self.target_worker = target_worker
42
+ self.server_args = server_args
43
+
44
+ # Share the embedding and lm_head
45
+ embed, head = self.target_worker.model_runner.model.get_embed_and_head()
46
+ self.model_runner.model.set_embed_and_head(embed, head)
47
+ self.model_runner.server_args.disable_cuda_graph = backup_disable_cuda_graph
48
+ self.model_runner.init_cuda_graphs()
49
+
50
+ def forward_draft_decode(self, batch: ScheduleBatch):
51
+ batch.spec_info.prepare_for_decode(batch)
52
+ model_worker_batch = batch.get_model_worker_batch()
53
+ forward_batch = ForwardBatch.init_new(model_worker_batch, self.model_runner)
54
+ forward_batch.spec_info.capture_hidden_mode = CaptureHiddenMode.LAST
55
+ logits_output = self.model_runner.forward(forward_batch)
56
+ self.capture_for_decode(logits_output, forward_batch)
57
+
58
+ def forward_draft_extend(self, batch: ScheduleBatch):
59
+ self._swap_mem_pool(batch, self.model_runner)
60
+ batch.spec_info.prepare_for_extend(batch)
61
+ model_worker_batch = batch.get_model_worker_batch()
62
+ forward_batch = ForwardBatch.init_new(model_worker_batch, self.model_runner)
63
+ forward_batch.spec_info.capture_hidden_mode = CaptureHiddenMode.LAST
64
+ logits_output = self.model_runner.forward(forward_batch)
65
+ self.capture_for_decode(logits_output, forward_batch)
66
+ self._swap_mem_pool(batch, self.target_worker.model_runner)
67
+
68
+ def forward_batch_speculative_generation(self, batch: ScheduleBatch):
69
+ if batch.forward_mode.is_decode():
70
+ prev_spec_info = batch.spec_info
71
+ self._swap_mem_pool(batch, self.model_runner)
72
+ for i in range(self.server_args.speculative_num_steps):
73
+ self.forward_draft_decode(batch)
74
+ batch.spec_info.clear_draft_cache(batch)
75
+ self._swap_mem_pool(batch, self.target_worker.model_runner)
76
+ (
77
+ next_draft_input,
78
+ logits_output,
79
+ verified_id,
80
+ self.finish_extend_len,
81
+ model_worker_batch,
82
+ ) = self.verify(batch)
83
+ next_draft_input.init(self.server_args)
84
+ batch.spec_info = next_draft_input
85
+ # if it is None, means all requsets are finished
86
+ if batch.spec_info.verified_id is not None:
87
+ self.forward_extend_after_decode(batch)
88
+ batch.spec_info = prev_spec_info
89
+ return logits_output, verified_id, model_worker_batch, next_draft_input
90
+
91
+ else:
92
+ spec_info = EAGLEDraftInput()
93
+ spec_info.init(self.server_args)
94
+ model_worker_batch = batch.get_model_worker_batch()
95
+ model_worker_batch.spec_info = spec_info
96
+ spec_info.capture_hidden_mode = CaptureHiddenMode.FULL
97
+ logits_output, next_token_ids = self.target_worker.forward_batch_generation(
98
+ model_worker_batch
99
+ )
100
+ model_worker_batch.spec_info.verified_id = next_token_ids
101
+ model_worker_batch.spec_info.hidden_states = logits_output.hidden_states
102
+ batch.spec_info = spec_info
103
+ self.forward_draft_extend(batch)
104
+ batch.spec_info = None
105
+ return logits_output, next_token_ids, model_worker_batch, spec_info
106
+
107
+ def verify(self, batch: ScheduleBatch):
108
+ verify_input = batch.spec_info.prepare_for_verify(batch)
109
+ batch.forward_mode = ForwardMode.TARGET_VERIFY
110
+ verify_input.prepare_for_verify(batch)
111
+ batch.spec_info = verify_input
112
+ batch.spec_info.capture_hidden_mode = CaptureHiddenMode.FULL
113
+ model_worker_batch = batch.get_model_worker_batch()
114
+ logits_output, _ = self.target_worker.forward_batch_generation(
115
+ model_worker_batch, skip_sample=True
116
+ )
117
+ verify_input.hidden_states = logits_output.hidden_states
118
+ res = verify_input.verify(batch, logits_output)
119
+ batch.forward_mode = ForwardMode.DECODE
120
+ return res + (model_worker_batch,)
121
+
122
+ def _swap_mem_pool(self, batch: ScheduleBatch, runner: ModelRunner):
123
+ batch.token_to_kv_pool = runner.token_to_kv_pool
124
+ batch.req_to_token_pool = runner.req_to_token_pool
125
+
126
+ def forward_extend_after_decode(self, batch: ScheduleBatch):
127
+ self._swap_mem_pool(batch, self.model_runner)
128
+ batch.forward_mode = ForwardMode.DRAFT_EXTEND
129
+ if batch.spec_info.has_finished:
130
+ index = batch.spec_info.unfinished_index
131
+ seq_lens = batch.seq_lens
132
+ batch.seq_lens = batch.seq_lens[index]
133
+ batch.spec_info.prepare_extend_after_decode(batch)
134
+ model_worker_batch = batch.get_model_worker_batch()
135
+ forward_batch = ForwardBatch.init_new(model_worker_batch, self.model_runner)
136
+ forward_batch.spec_info.capture_hidden_mode = CaptureHiddenMode.LAST
137
+ logits_output = self.model_runner.forward(forward_batch)
138
+ batch.spec_info.hidden_states = logits_output.hidden_states
139
+ self.capture_for_decode(logits_output, forward_batch)
140
+ batch.forward_mode = ForwardMode.DECODE
141
+ if batch.spec_info.has_finished:
142
+ batch.seq_lens = seq_lens
143
+ self._swap_mem_pool(batch, self.target_worker.model_runner)
144
+
145
+ def capture_for_decode(self, logits_output, forward_batch):
146
+ if isinstance(logits_output, LogitsProcessorOutput):
147
+ logits = logits_output.next_token_logits
148
+ sample_output = torch.softmax(
149
+ logits, dim=-1
150
+ ) # TODO: Support more sampling method @kavioyu
151
+ forward_batch.spec_info.capture_for_decode(
152
+ sample_output, logits_output.hidden_states, forward_batch.forward_mode
153
+ )
154
+
155
+ # Don't support prefix share now.
156
+ def finish_request(self, reqs: Union[Req, List[Req]]):
157
+ if not isinstance(reqs, List):
158
+ reqs = [reqs]
159
+ for req in reqs:
160
+ req_len = (
161
+ len(req.origin_input_ids)
162
+ + len(req.output_ids)
163
+ - self.finish_extend_len[req.rid]
164
+ - 1
165
+ )
166
+ kv_indices = self.model_runner.req_to_token_pool.req_to_token[
167
+ req.req_pool_idx
168
+ ][:req_len]
169
+ self.model_runner.token_to_kv_pool.free(kv_indices)
170
+ self.model_runner.req_to_token_pool.free(req.req_pool_idx)
@@ -2,8 +2,12 @@ from enum import IntEnum, auto
2
2
 
3
3
 
4
4
  class SpeculativeAlgorithm(IntEnum):
5
+ NONE = auto()
5
6
  EAGLE = auto()
6
7
 
8
+ def is_none(self):
9
+ return self == SpeculativeAlgorithm.NONE
10
+
7
11
  def is_eagle(self):
8
12
  return self == SpeculativeAlgorithm.EAGLE
9
13
 
@@ -11,6 +15,7 @@ class SpeculativeAlgorithm(IntEnum):
11
15
  def from_string(name: str):
12
16
  name_map = {
13
17
  "EAGLE": SpeculativeAlgorithm.EAGLE,
18
+ None: SpeculativeAlgorithm.NONE,
14
19
  }
15
20
  return name_map[name]
16
21
 
sglang/srt/utils.py CHANGED
@@ -15,6 +15,7 @@
15
15
 
16
16
  import base64
17
17
  import dataclasses
18
+ import io
18
19
  import ipaddress
19
20
  import itertools
20
21
  import json
@@ -34,6 +35,7 @@ import warnings
34
35
  from functools import lru_cache
35
36
  from importlib.metadata import PackageNotFoundError, version
36
37
  from io import BytesIO
38
+ from multiprocessing.reduction import ForkingPickler
37
39
  from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, Union
38
40
 
39
41
  import numpy as np
@@ -60,7 +62,6 @@ from triton.runtime.cache import (
60
62
 
61
63
  logger = logging.getLogger(__name__)
62
64
 
63
-
64
65
  show_time_cost = False
65
66
  time_infos = {}
66
67
 
@@ -1206,7 +1207,6 @@ def _cuda_device_count_stateless(cuda_visible_devices: Optional[str] = None) ->
1206
1207
  # https://github.com/pytorch/pytorch/blob/
1207
1208
  # c1cd946818442aca8c7f812b16d187ce1586c3bc/
1208
1209
  # torch/cuda/__init__.py#L831C1-L831C17
1209
- import torch.cuda
1210
1210
  import torch.version
1211
1211
 
1212
1212
  if not torch.cuda._is_compiled():
@@ -1335,3 +1335,16 @@ def parse_tool_response(text, tools, **kwargs):
1335
1335
  for call_info in call_info_list
1336
1336
  ]
1337
1337
  return text, call_info_list
1338
+
1339
+
1340
+ class MultiprocessingSerializer:
1341
+ @staticmethod
1342
+ def serialize(obj):
1343
+ buf = io.BytesIO()
1344
+ ForkingPickler(buf).dump(obj)
1345
+ buf.seek(0)
1346
+ return buf.read()
1347
+
1348
+ @staticmethod
1349
+ def deserialize(data):
1350
+ return ForkingPickler.loads(data)
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.1.post3"
1
+ __version__ = "0.4.1.post4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.4.1.post3
3
+ Version: 0.4.1.post4
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -243,11 +243,11 @@ Requires-Dist: torch; extra == "srt"
243
243
  Requires-Dist: vllm<=0.6.4.post1,>=0.6.3.post1; extra == "srt"
244
244
  Requires-Dist: cuda-python; extra == "srt"
245
245
  Requires-Dist: flashinfer==0.1.6; extra == "srt"
246
- Requires-Dist: sgl-kernel>=0.0.2.post10; extra == "srt"
246
+ Requires-Dist: sgl-kernel>=0.0.2.post11; extra == "srt"
247
247
  Provides-Extra: srt-hip
248
248
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
249
249
  Requires-Dist: torch; extra == "srt-hip"
250
- Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
250
+ Requires-Dist: vllm==0.6.3.post2.dev1; extra == "srt-hip"
251
251
  Provides-Extra: srt-xpu
252
252
  Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
253
253
  Provides-Extra: srt-hpu
@@ -315,7 +315,7 @@ Requires-Dist: sglang[test]; extra == "dev-hpu"
315
315
 
316
316
  | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
317
317
  | [**Documentation**](https://sgl-project.github.io/)
318
- | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2tmmp6flg-89dOlJW2TjnBrTRk1I_~GA)
318
+ | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2um0ad92q-LkU19KQTxCGzlCgRiOiQEw)
319
319
  | [**Join Bi-Weekly Development Meeting**](https://docs.google.com/document/d/1xEow4eIM152xNcRxqZz9VEcOiTQo8-CEuuQ5qTmkt-E/edit?usp=sharing)
320
320
  | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
321
321
 
@@ -347,9 +347,10 @@ The core features include:
347
347
 
348
348
  ## Getting Started
349
349
  - [Install SGLang](https://sgl-project.github.io/start/install.html)
350
- - [Send requests](https://sgl-project.github.io/start/send_request.html)
351
- - [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html)
352
- - [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html)
350
+ - [Quick Start](https://sgl-project.github.io/start/send_request.html)
351
+ - [Backend Tutorial](https://sgl-project.github.io/backend/openai_api_completions.html)
352
+ - [Frontend Tutorial](https://sgl-project.github.io/frontend/frontend.html)
353
+ - [Contribution Guide](https://sgl-project.github.io/references/contribution_guide.html)
353
354
 
354
355
  ## Benchmark and Performance
355
356
  Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/), [v0.4 blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)
@@ -361,5 +362,5 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
361
362
  The project is supported by (alphabetically): AMD, Baseten, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
362
363
 
363
364
  ## Acknowledgment and Citation
364
- We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
365
+ We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
365
366
  Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
@@ -2,7 +2,7 @@ sglang/__init__.py,sha256=b2oIdWzp5P8SzieeOs2TzJoN3Do3tfJbV8gZS_imVcs,1619
2
2
  sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
3
3
  sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
4
  sglang/bench_offline_throughput.py,sha256=r-uBvpnx-30mAnVwQB4WlqiXxy2fn5a1NUARwZcaIo4,12533
5
- sglang/bench_one_batch.py,sha256=jkyMhK0lqn5dRCYgAh30qZrNHP4gAbXODymBMNXK86I,15859
5
+ sglang/bench_one_batch.py,sha256=uw__0H3e3lY_6EDz4IAZUoYxq9kQIOPbbcyguYxttSA,15975
6
6
  sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
7
7
  sglang/bench_serving.py,sha256=YQiCZreejCPBTqMmZsCB99RMi1N-Jx-dZtaafcQ8-14,53377
8
8
  sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
@@ -11,7 +11,7 @@ sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
11
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
12
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
13
13
  sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
14
- sglang/version.py,sha256=FT2VkJCvJQmaJgb_t19PhogLhJaJvLV2NK6x3Bt9CeQ,28
14
+ sglang/version.py,sha256=efEbFOIgkOX__fKbqiqjj6UK2e0KofwnPDZo0VFdehs,28
15
15
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
17
17
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -32,9 +32,9 @@ sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21
32
32
  sglang/srt/hf_transformers_utils.py,sha256=38Ms0H2-VMerOS6jnczcFtZMS6lhw9B5rSWKAfxVUfQ,7945
33
33
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
34
34
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
35
- sglang/srt/server.py,sha256=sDERAZlRa6OTaUk-SfW5aKJbPui1COpPG34HDlMHMNc,34916
36
- sglang/srt/server_args.py,sha256=wkafWcLHqm-p52psy75WYvTZ1-fIlkfB0YUr_g-vgjY,34210
37
- sglang/srt/utils.py,sha256=i8MjcaSQjPPfPZ0txufTtqLr4Q7YhHQ86L1i9j-y5yY,44131
35
+ sglang/srt/server.py,sha256=2HPaIwN8-KijB44ADUnpWD4A2mTKwW9CPl4NJBu9yzE,35068
36
+ sglang/srt/server_args.py,sha256=oIwBCTwSoj6qyKJ5zD38dPfgdXcNol64wV7l6UjmJNo,36208
37
+ sglang/srt/utils.py,sha256=Xn5Zf_HzRVSsGF6_lvd85hS-0I2MdQXbe0Yo46WsTOc,44447
38
38
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
39
39
  sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
40
40
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
@@ -63,19 +63,19 @@ sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwx
63
63
  sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
64
64
  sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
65
65
  sglang/srt/layers/linear.py,sha256=KyRFU0VcoNuN-hnQB9QQcBN9NCpeqPtLzzufIHUpV6w,47064
66
- sglang/srt/layers/logits_processor.py,sha256=Imh-qY1D9J80DZVSVV0LfTiHMEw6oQ3JbY9lXxPZAXE,15656
66
+ sglang/srt/layers/logits_processor.py,sha256=Yd7GisSfEgSq3cLMzz5lYiB5Cv-YgE4AMmVcACMBBZ4,12991
67
67
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
68
68
  sglang/srt/layers/radix_attention.py,sha256=E4cmvkcCdCtb6VyLNrCKy1D6VwHQ063oH3JQXPaRy6w,2178
69
69
  sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
70
- sglang/srt/layers/sampler.py,sha256=k4Op_HMkQfT7t9wgQwBVotfTUXEocrzRyQqEFnff1pc,5511
71
- sglang/srt/layers/torchao_utils.py,sha256=dQVuWNXxAvOPjr2G5BBMWqC2oKcS2B52rx-fEc_elmc,3545
70
+ sglang/srt/layers/sampler.py,sha256=HQWi1zb1gmD9pHMQyEP3WPjnL8vy-ncZDVMENbjQW7c,6944
71
+ sglang/srt/layers/torchao_utils.py,sha256=8c2vzt106iP_QKbJtfN1GuABW8nCuP5dElQLUeci6qg,3934
72
72
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=slGwLiWjuFLCUdRe-GTlfumyZpqVX9VF6No_UGOT-hA,21624
73
- sglang/srt/layers/attention/__init__.py,sha256=lNLfWqePc5NMej-AcXl97vxVXsxQOgP7dNNb2ibyUWI,2562
74
- sglang/srt/layers/attention/double_sparsity_backend.py,sha256=RQdEKRykSLf9ilnaHmR6T7RFqh4emH_adfB3aJN2BUU,10920
75
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=8nH4EIEXvNk9yZVl7mSn78w5Dli5UiWL-ZCeYykG9HI,27280
76
- sglang/srt/layers/attention/torch_native_backend.py,sha256=nQdeqWEMMH_wrod5wssDCJG-uPKm0uslvkALKqPRPQ8,10509
77
- sglang/srt/layers/attention/triton_backend.py,sha256=-TobyZHwlbJ5HhbFg-jgCqVOw4Y-opgEuFo-EusASQc,6264
78
- sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=oJ_UK1t229zF3hbTDiQe7t-X-IbM2dOxx4U2ch-vmjA,17847
73
+ sglang/srt/layers/attention/__init__.py,sha256=GUoygIsXzDFt9I1w9p0GO7leaDK6l2J3cBRQfpp4oDI,2869
74
+ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
75
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=I8b3Dq1O9PijLN40lEK0Gjj8GNS46WF4K-QVOtVccTg,33218
76
+ sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
77
+ sglang/srt/layers/attention/triton_backend.py,sha256=44ScKsVs-rFvqsaAZG_mREEpczhGaUBvaflvWqrukVE,6743
78
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
79
79
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
80
80
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
81
81
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
@@ -85,8 +85,8 @@ sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
85
85
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
86
86
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=6iQU5ZjQ8IXGoQ8ZlBuJqyQxYTEem9vXI6rbVIWKlZw,22303
87
87
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
88
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=KvOy544x_4nRqg50o5YHQpHvF8TUD7q9LXDAWPGJlAA,31796
89
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=BclDj5JyCy-8Bfue4broL1-IG6a4dUyggE9WQLa06sg,20575
88
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=JjSn3fNTHgXFDxcAINZUYAttbYOxl9VSIF305NT73Wg,36255
89
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MZF6BHJVjduz-XerTrHvCP3qSZ3NW0pUK2p8zNwDuac,20798
90
90
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
91
91
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
92
92
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -123,43 +123,64 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=BclDj5JyCy-8Bfue4broL1-IG
123
123
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=uv-RwTNZT2n264dLo4eWxUpB3g7QqUyf2MFEGiRvoqQ,3251
124
124
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=XbCRIOmiNqVKh89p-0UxvvspINRDA1iV83f9l5yORwA,3254
125
125
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=4uQnjGPWokscrxiXDIvexOA8OkK5vkoIulmvvMFIEog,3250
126
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=i5AXj26mWI-xEoOnLCZDXUzz8jk2RjDcGuaiT1QYSbY,3263
127
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_H200.json",sha256=fJKk5LEM_LSnq1yc3ekLqAfbUWzPojQA6yX3XgSFo-o,3254
126
128
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=iJVlnCYTNDMb6U1UnV46ZuL_8LcpOv_XFaYWIeRFeNA,3263
129
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=KfvSYCMG48vnRb35d9WOxYyZulI-RBrUGXUHQxXi4hk,3264
130
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=2560,device_name=NVIDIA_H200.json",sha256=60yuOluuk6q88Ze0toPJB8hzMBvF7ZWyMZpriMdQf3g,3252
127
131
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=DA4PrCu_BNLSWWVTwOicNfbyqUNW7BTZC2dyFz9DVbU,3265
128
132
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=prj-QxdfS9Ns5WRPvahY_Tr7CyqlaVgNHPT89SS5zzg,3239
133
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=AUgoHK1PmAFehSNmsbxunlBdzM50Q5nFvdnG9FSOjOw,3265
134
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=320,device_name=NVIDIA_H200.json",sha256=pC5fdtEFc5aVNzpj_REHhz1QPrGvgI9iQCvlodDP7J8,3244
129
135
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249
130
136
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=bfr70r6PmM95w7raabQOaOOSPiwU2OQCOZh-kKXIehY,3248
131
137
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4g9lABUJnB-iVwXfYPYcI05XFPG4jY8o0yJUK7kSPZM,3253
132
138
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=RGLqrAGvPCFZ0jMPBCJ0TqsnrSdW-EbUaSZu61cWGN8,3265
133
139
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=sjEVPVTgUAlp4s8tZLGSyeNzbW6zTtUm2IioH3nZsIg,3254
140
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=eD3Y9sOwHFcgVdOfya8KxPhvLx_b4whfEWm4d8Y2HW8,3268
141
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=KIfpZydSl31FOEqq0EBfxTyWRj1QTDwTjkPHFjNO3_A,3253
134
142
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=OnadAdmDbX17Ni9VPrNXYSsxYhbtBeniCxxhhb0UmUk,4733
135
143
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
144
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=LD4Z5MRR5Ivi4bYB5hMgymtvmFyVJwq6gmehA7fzecc,3271
145
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=GLIH4egg-pE-NWU5XqKuJCoRXciHN6GSc3NaE4PaeYg,3261
136
146
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=bKsYVJm-IvWHWpxUG-lMPkyNz0nQpDb4UEIv895c9JI,4730
137
147
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
138
148
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
139
149
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
150
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=obNtHlqs6l6EBqGm0e0TD2wR9TYoQV_N9Y7om847WJk,3268
151
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H200.json",sha256=juxJYeWYFHeLb-83_IDgrHEpoeSEursjXD43mTHBdLE,3246
140
152
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
141
153
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266
142
154
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
155
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=4O4VeMpgFNrqWyWqWgYgcYAgBQnOlAXvt26CRSXK-sY,3270
156
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=qfjbXqbl902TuiyzzomUy2sMvs-Dud8ZphDRY5WIPBM,3260
143
157
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=_bw1_oads8tz51i4RVQUAjNi8r3b2Q2jPbi50TLFzlY,4732
144
158
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
145
159
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
146
160
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4Q_-yITMfijOMoguUM2n96clARh-DUFsS-4oW_a3Jpc,3252
147
161
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264
148
162
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
163
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=BXjSVGdvgP_-7xTvbHOO6ZrXWe0qSXiQChxoHGgWL7o,3263
164
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=Pi2coJlJlpgqXiPRd77B_eCmmi7sCdBuoSGK1RA5YO8,3258
149
165
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=p2qlRhTt7owWB8keEmoCrPZpo39IAxsKnULFQ7R38SI,3873
150
166
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=AKIX43JVc26ERb862pNOMEfGhsgyk1OGa42EptAfG1s,4409
151
167
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
152
168
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
153
169
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
170
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=rR8b-OuQ3watb8b2zuNlxKDSZpzlAagm9nb-FdKkt7s,3270
171
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=8G_QqV_DhvZ6xSavMSpeE6qcXPVpsVjEtJabydybKqY,3263
154
172
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=54KpHTMGt_zDQHqbdopuVHPpiI44ZsN_5LBUBZ_woY4,4733
155
173
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
156
174
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
157
175
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
176
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=3YQakSmUKhpw1KO7Hn-tEc-yyD1fEj01_6JlSYnrrlI,3274
177
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=W2ka_U8pzwjzX62NEGKXR32uuSR_zfHD1XjXYf5bgBs,3262
158
178
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
159
179
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
180
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
160
181
  sglang/srt/layers/quantization/__init__.py,sha256=VPYXShHvbvkOgVBlkIqic4RhdJ1y6EZ3r34T-nZMT1k,4606
161
182
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
162
- sglang/srt/layers/quantization/fp8.py,sha256=k4mw-iKxlaEWRkGgaoxCLzZ_dYydyRj0y1N1B_umMwU,32668
183
+ sglang/srt/layers/quantization/fp8.py,sha256=ypIb8wUN18trzMhot8QKUj9sSdCXgPC1i2Qi-ESToWw,32670
163
184
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
164
185
  sglang/srt/layers/quantization/fp8_utils.py,sha256=qBVJXxbxqmf8-Juq0t-IXWjlaZoePJqFNYcs9-oT5Yo,4150
165
186
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
@@ -206,14 +227,14 @@ sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rds
206
227
  sglang/srt/managers/data_parallel_controller.py,sha256=psI4FAuBGjtdnEuwagnGdtRqvqSSxOROfNKQqVDqlVA,8382
207
228
  sglang/srt/managers/detokenizer_manager.py,sha256=XvyxUhY_SNXlAcVsx9zczrGllpEMzj7p2Vbh6M_yHy8,8555
208
229
  sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
209
- sglang/srt/managers/io_struct.py,sha256=TUCHR9J0BGoN-ybFgINehHqK-x3BGjeu6202AYh-c0k,16166
210
- sglang/srt/managers/schedule_batch.py,sha256=KnoVuWgINnyard-BOXCo0jm3IMdXN9wIwnftMKcag-s,46097
211
- sglang/srt/managers/schedule_policy.py,sha256=QxjQ8-le062AMHHxool6CxkhvB4FIwhOQPzTX_JwL6U,15447
212
- sglang/srt/managers/scheduler.py,sha256=RFQfgP6Wy0DEzksNF7MkevT4ZOLFx_RYxyBlGG37eXE,63873
230
+ sglang/srt/managers/io_struct.py,sha256=02NMBHRCjs9TUSdhKJmvMp3HculMC-50SkCGOEaYEHg,16197
231
+ sglang/srt/managers/schedule_batch.py,sha256=WlJstk0MP4QA434zu6_VZ4FhDByEPjSggFDT8em4GvQ,46851
232
+ sglang/srt/managers/schedule_policy.py,sha256=aHkIL9pZtc4Kdmy8XU9tsjaDzdChVN2dnGKvJkSyqFg,17965
233
+ sglang/srt/managers/scheduler.py,sha256=sTnZDLN8gLLHkiQW4UvMZsdKBMkgKfUk-rAHUZ9zNR0,65272
213
234
  sglang/srt/managers/session_controller.py,sha256=3laMRIXEYWDjfytCjPs0vw_Tw__k-nKBY-bYzycYbfc,5482
214
- sglang/srt/managers/tokenizer_manager.py,sha256=9umteGE5dsE9TE7nIt5Ju_qRCZsXpfv_atkhzFC-8s4,33368
215
- sglang/srt/managers/tp_worker.py,sha256=8RVBLQaS3TnX7Z4J35RVrFN0M6PVnRBhct3sczBL4dY,7644
216
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=JQfrVPeE56ZGJ3nozkhZR-RSb2oePsY7iuedM7XCtdQ,9157
235
+ sglang/srt/managers/tokenizer_manager.py,sha256=Xryex_dgdZzRmPtSe16WUz1p9IlGysjVWmocaHjOJz4,33686
236
+ sglang/srt/managers/tp_worker.py,sha256=-bvUFCo544QQSEHqPPjeOvCWMEFn01Bva6AeO39Qe3o,8043
237
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=rdHz2thdGSmceDedrolHOqjNPhrralyDTuNREL56oNI,9067
217
238
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
218
239
  sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
219
240
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
@@ -221,9 +242,9 @@ sglang/srt/mem_cache/memory_pool.py,sha256=oxk3UtiiFA3_1iIP6eFsk8HIcRI_8Z1-FE2KO
221
242
  sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
222
243
  sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
223
244
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
224
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=1n5WxoE9-0B3unwkkcR355K_D290h2LGt_7EvH02DQM,16246
225
- sglang/srt/model_executor/forward_batch_info.py,sha256=vqF8XrHQPk3ZL7HqPvvkfP53oqBx0Fajb5lAIkdifBo,13961
226
- sglang/srt/model_executor/model_runner.py,sha256=TjvAwwr7EqZdmE-5HbuQMeEa0e0FqY6LeqqzEAHXMPU,30012
245
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=PFK4aRu8ffOBQw0zU_yYofUK_poi5C1vCc3ePixj0JY,18243
246
+ sglang/srt/model_executor/forward_batch_info.py,sha256=Z4VrcyQt3f4jPYvvm1vimMx3hSNNeUEONAFvSziy6N0,14788
247
+ sglang/srt/model_executor/model_runner.py,sha256=MZv0CNevcZ85L2JYVc1MnXRKbvUWoSuCFcShES07YN4,29889
227
248
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
228
249
  sglang/srt/model_loader/loader.py,sha256=7OG_8-66vFDFZ9kVKGNK1BFBjZ6ql449dlyvdCbMqvE,43876
229
250
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -233,7 +254,7 @@ sglang/srt/models/chatglm.py,sha256=DOrEhmb0s-yPId88R6nJeLOTUEtogk-vkB69qT2JdWc,
233
254
  sglang/srt/models/commandr.py,sha256=PNXgfOZF84h-rSH0edEECUmEGW8YLb44V75Z_oDhFiA,14223
234
255
  sglang/srt/models/dbrx.py,sha256=okIpIwdr8Cfrz_thzc1F75XqCUfHhFLvZ1B6BaswKoA,14585
235
256
  sglang/srt/models/deepseek.py,sha256=_cVOvR6eSEgRf6TUBpTD5uMdijDWFw4sSt4lGzl8tbg,15697
236
- sglang/srt/models/deepseek_v2.py,sha256=-v_OJr2c3gJ0NMxQjvT3Jknz1XPGkzKx0TVR3NIiC6A,37284
257
+ sglang/srt/models/deepseek_v2.py,sha256=vbRhgI8yD7EmHUpq5pzI_sVpGLnkeyJ7ew-3Pl6D8F4,38499
237
258
  sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
238
259
  sglang/srt/models/gemma.py,sha256=ydRqsG-7004r1fAiz01LHUmcj_6XN0Tn4xO1keJnMQk,12126
239
260
  sglang/srt/models/gemma2.py,sha256=-bFN-Te3YWAunLCrF-XFk_6fJS7gHM4Ca6h6aesXUTM,16362
@@ -241,7 +262,7 @@ sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb
241
262
  sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
242
263
  sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
243
264
  sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,20409
244
- sglang/srt/models/grok.py,sha256=J9lgNbFebvXgF19nfZyHwlGPlGWY_m0LgP506YvOYrU,15668
265
+ sglang/srt/models/grok.py,sha256=PbLmYP-UEbImJgbEpkHhTjkqibIvt0oENPlAIrK0qSE,17751
245
266
  sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
246
267
  sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
247
268
  sglang/srt/models/llama.py,sha256=4UPKF7erp7qqBD11uvvQkO1Fo_wDs71BmA8Y2csXRcA,20302
@@ -271,17 +292,20 @@ sglang/srt/models/torch_native_llama.py,sha256=YeXHorFm6QfnczLXwPb5TG9a-He0uiA9R
271
292
  sglang/srt/models/xverse.py,sha256=Oq--KqvbYu2H4TMVGEHpSnJLEwXBpxlncR9ilsQeckc,13579
272
293
  sglang/srt/models/xverse_moe.py,sha256=7E60YIST4ELYwLRgjtHiLRI5Uyc7XqQTM7jQXiWaQs4,15541
273
294
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
274
- sglang/srt/openai_api/adapter.py,sha256=HvgeFPWv-v8LOiYF2iNCo-14BIZLAPznNTCUbubB2Rg,57091
275
- sglang/srt/openai_api/protocol.py,sha256=anWGr2Br8gVYm6Z0yvDwjXLaPCPuvJZ28gr5rV2dhVQ,11613
276
- sglang/srt/sampling/sampling_batch_info.py,sha256=s--zNjk-LErZ5lMqnZ7KiuJltaziKRbQAU5qYpKIxAc,8564
277
- sglang/srt/sampling/sampling_params.py,sha256=BkgCJAOSmQXwJrNXg26zSjKfMy0d5mMN6oHRk_ZuESI,5499
295
+ sglang/srt/openai_api/adapter.py,sha256=Yv-rEA0Jd54iFlnkVy-OZM4EnPqkW_NLtDPGCiPWVWo,56386
296
+ sglang/srt/openai_api/protocol.py,sha256=v_YUwH1PF4vIVqSE5rj1ODdSglprTe_vGiXoS99cOV4,11613
297
+ sglang/srt/sampling/sampling_batch_info.py,sha256=4FZIt_w5pDerRbny3uUplQO23xxnU8lmtG91OPCB_4w,9254
298
+ sglang/srt/sampling/sampling_params.py,sha256=KjUhZzRJvNTQZgJul2zSq3U8r352WzMKLbXfhP3V-nU,5685
278
299
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
279
300
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
280
301
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
281
302
  sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
282
303
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
283
304
  sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
284
- sglang/srt/speculative/spec_info.py,sha256=d-82uWEC-QBqAgv3XGDNDW8DlHv4MtUsZghFqzGwV7U,352
305
+ sglang/srt/speculative/build_eagle_tree.py,sha256=SIKuOFUOIzMLyanL5vViPmFBEiUHm_ezwiGuIyLmauE,9886
306
+ sglang/srt/speculative/eagle_utils.py,sha256=VI8P8j9f1R5fRBIjiLXC8iYteMpqqeTQmaZ9OEudFuE,22983
307
+ sglang/srt/speculative/eagle_worker.py,sha256=cAqw91E-5Tl6Sgltf0q2eYwUEH_trWHdibMWqy1rPKQ,7548
308
+ sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
285
309
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
286
310
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
287
311
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
@@ -298,8 +322,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
298
322
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
299
323
  sglang/test/test_utils.py,sha256=HJG7kUQOk6n9FBbH89PDtQ41C3kt1cfJODhAEcFT0AQ,23823
300
324
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
301
- sglang-0.4.1.post3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
302
- sglang-0.4.1.post3.dist-info/METADATA,sha256=ICKRXupko-hmKxBCtsLUPYN6f48dMrs03J0Q6zWDPqE,22544
303
- sglang-0.4.1.post3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
304
- sglang-0.4.1.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
305
- sglang-0.4.1.post3.dist-info/RECORD,,
325
+ sglang-0.4.1.post4.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
326
+ sglang-0.4.1.post4.dist-info/METADATA,sha256=nI0C5ivDIygS7D_lOrLwV_xqvHAnlthIEA6zXmf_-54,22601
327
+ sglang-0.4.1.post4.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
328
+ sglang-0.4.1.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
329
+ sglang-0.4.1.post4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5