sglang 0.4.9.post3__py3-none-any.whl → 0.4.9.post4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/_custom_ops.py +29 -1
- sglang/srt/configs/model_config.py +1 -1
- sglang/srt/conversation.py +1 -1
- sglang/srt/disaggregation/common/conn.py +34 -6
- sglang/srt/disaggregation/mini_lb.py +3 -2
- sglang/srt/disaggregation/mooncake/conn.py +49 -20
- sglang/srt/disaggregation/mooncake/transfer_engine.py +4 -2
- sglang/srt/disaggregation/nixl/conn.py +17 -13
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -91
- sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +96 -1
- sglang/srt/distributed/device_communicators/quick_all_reduce.py +273 -0
- sglang/srt/distributed/device_communicators/shm_broadcast.py +12 -5
- sglang/srt/distributed/parallel_state.py +70 -15
- sglang/srt/entrypoints/engine.py +2 -8
- sglang/srt/entrypoints/http_server.py +20 -32
- sglang/srt/entrypoints/openai/protocol.py +3 -3
- sglang/srt/entrypoints/openai/serving_chat.py +27 -4
- sglang/srt/function_call/base_format_detector.py +74 -12
- sglang/srt/function_call/deepseekv3_detector.py +26 -11
- sglang/srt/function_call/ebnf_composer.py +95 -63
- sglang/srt/function_call/function_call_parser.py +4 -4
- sglang/srt/function_call/kimik2_detector.py +41 -16
- sglang/srt/function_call/llama32_detector.py +6 -3
- sglang/srt/function_call/mistral_detector.py +11 -3
- sglang/srt/function_call/pythonic_detector.py +16 -14
- sglang/srt/function_call/qwen25_detector.py +12 -3
- sglang/srt/function_call/{qwen3_detector.py → qwen3_coder_detector.py} +10 -9
- sglang/srt/layers/activation.py +11 -3
- sglang/srt/layers/attention/base_attn_backend.py +3 -1
- sglang/srt/layers/communicator.py +12 -12
- sglang/srt/layers/dp_attention.py +72 -24
- sglang/srt/layers/logits_processor.py +34 -24
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +25 -224
- sglang/srt/layers/moe/topk.py +5 -13
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +2 -9
- sglang/srt/layers/quantization/modelopt_quant.py +8 -4
- sglang/srt/layers/quantization/utils.py +0 -9
- sglang/srt/layers/radix_attention.py +5 -3
- sglang/srt/lora/lora_manager.py +133 -169
- sglang/srt/lora/lora_registry.py +124 -0
- sglang/srt/lora/mem_pool.py +2 -2
- sglang/srt/managers/cache_controller.py +53 -6
- sglang/srt/managers/io_struct.py +19 -1
- sglang/srt/managers/schedule_batch.py +13 -3
- sglang/srt/managers/scheduler.py +13 -25
- sglang/srt/managers/tokenizer_manager.py +28 -25
- sglang/srt/managers/tp_worker.py +2 -4
- sglang/srt/mem_cache/allocator.py +67 -7
- sglang/srt/mem_cache/hicache_storage.py +17 -1
- sglang/srt/mem_cache/hiradix_cache.py +30 -16
- sglang/srt/mem_cache/memory_pool_host.py +3 -0
- sglang/srt/model_executor/cuda_graph_runner.py +61 -25
- sglang/srt/model_executor/forward_batch_info.py +201 -29
- sglang/srt/model_executor/model_runner.py +41 -23
- sglang/srt/models/deepseek_v2.py +1 -2
- sglang/srt/models/mllama4.py +10 -3
- sglang/srt/models/qwen2_moe.py +0 -4
- sglang/srt/models/qwen3_moe.py +1 -6
- sglang/srt/reasoning_parser.py +46 -4
- sglang/srt/sampling/sampling_batch_info.py +6 -5
- sglang/srt/server_args.py +76 -55
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +33 -28
- sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +37 -36
- sglang/srt/speculative/eagle_utils.py +51 -23
- sglang/srt/speculative/eagle_worker.py +59 -44
- sglang/srt/two_batch_overlap.py +9 -5
- sglang/srt/utils.py +17 -68
- sglang/test/test_activation.py +50 -1
- sglang/version.py +1 -1
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/METADATA +5 -5
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/RECORD +75 -72
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/WHEEL +0 -0
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/top_level.txt +0 -0
sglang/srt/two_batch_overlap.py
CHANGED
@@ -341,15 +341,18 @@ class TboDPAttentionPreparer:
|
|
341
341
|
|
342
342
|
@staticmethod
|
343
343
|
def _compute_global_forward_mode(forward_modes):
|
344
|
-
|
345
|
-
|
346
|
-
for x in forward_modes
|
344
|
+
forward_modes_excluding_idle = [
|
345
|
+
x for x in forward_modes if x != ForwardMode.IDLE.value
|
347
346
|
]
|
347
|
+
|
348
|
+
if not forward_modes_excluding_idle:
|
349
|
+
return ForwardMode.IDLE, False
|
350
|
+
|
348
351
|
forward_mode_agree = TboDPAttentionPreparer._is_all_same(
|
349
|
-
|
352
|
+
forward_modes_excluding_idle
|
350
353
|
)
|
351
354
|
global_forward_mode = (
|
352
|
-
ForwardMode(
|
355
|
+
ForwardMode(forward_modes_excluding_idle[0]) if forward_mode_agree else None
|
353
356
|
)
|
354
357
|
return global_forward_mode, forward_mode_agree
|
355
358
|
|
@@ -542,6 +545,7 @@ class TboForwardBatchPreparer:
|
|
542
545
|
tbo_children=None,
|
543
546
|
global_num_tokens_gpu=None,
|
544
547
|
global_num_tokens_cpu=None,
|
548
|
+
dp_padding_mode=None,
|
545
549
|
gathered_buffer=gathered_buffer,
|
546
550
|
global_num_tokens_for_logprob_gpu=None,
|
547
551
|
global_num_tokens_for_logprob_cpu=None,
|
sglang/srt/utils.py
CHANGED
@@ -744,9 +744,13 @@ def load_image(
|
|
744
744
|
image = Image.open(BytesIO(image_file))
|
745
745
|
elif image_file.startswith("http://") or image_file.startswith("https://"):
|
746
746
|
timeout = int(os.getenv("REQUEST_TIMEOUT", "3"))
|
747
|
-
response = requests.get(image_file, stream=True, timeout=timeout)
|
748
|
-
|
749
|
-
|
747
|
+
response = requests.get(image_file, stream=True, timeout=timeout)
|
748
|
+
try:
|
749
|
+
response.raise_for_status()
|
750
|
+
image = Image.open(response.raw)
|
751
|
+
image.load() # Force loading to avoid issues after closing the stream
|
752
|
+
finally:
|
753
|
+
response.close()
|
750
754
|
elif image_file.lower().endswith(("png", "jpg", "jpeg", "webp", "gif")):
|
751
755
|
image = Image.open(image_file)
|
752
756
|
elif image_file.startswith("data:"):
|
@@ -933,71 +937,6 @@ def monkey_patch_vllm_gguf_config():
|
|
933
937
|
setattr(GGUFConfig, "get_quant_method", get_quant_method_with_embedding_replaced)
|
934
938
|
|
935
939
|
|
936
|
-
def maybe_set_triton_cache_manager() -> None:
|
937
|
-
"""Set environment variable to tell Triton to use a
|
938
|
-
custom cache manager"""
|
939
|
-
cache_manger = os.environ.get("TRITON_CACHE_MANAGER", None)
|
940
|
-
if cache_manger is None:
|
941
|
-
manager = "sglang.srt.utils:CustomCacheManager"
|
942
|
-
logger.debug("Setting Triton cache manager to: %s", manager)
|
943
|
-
os.environ["TRITON_CACHE_MANAGER"] = manager
|
944
|
-
|
945
|
-
|
946
|
-
class CustomCacheManager(FileCacheManager):
|
947
|
-
# Adapted from: https://github.com/tdoublep/vllm/blob/3307522289fdfefe323b6c00d0db696651989a2f/vllm/triton_utils/custom_cache_manager.py
|
948
|
-
def __init__(self, key, override=False, dump=False):
|
949
|
-
from sglang.srt.distributed.parallel_state import get_tp_group
|
950
|
-
|
951
|
-
self.key = key
|
952
|
-
self.lock_path = None
|
953
|
-
|
954
|
-
try:
|
955
|
-
module_path = "triton.runtime.cache"
|
956
|
-
cache_module = importlib.import_module(module_path)
|
957
|
-
|
958
|
-
default_cache_dir = getattr(cache_module, "default_cache_dir", None)
|
959
|
-
default_dump_dir = getattr(cache_module, "default_dump_dir", None)
|
960
|
-
default_override_dir = getattr(cache_module, "default_override_dir", None)
|
961
|
-
except (ModuleNotFoundError, AttributeError) as e:
|
962
|
-
default_cache_dir = None
|
963
|
-
default_dump_dir = None
|
964
|
-
default_override_dir = None
|
965
|
-
|
966
|
-
if dump:
|
967
|
-
self.cache_dir = (
|
968
|
-
default_dump_dir()
|
969
|
-
if default_dump_dir is not None
|
970
|
-
else os.path.join(Path.home(), ".triton", "dump")
|
971
|
-
)
|
972
|
-
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
973
|
-
self.lock_path = os.path.join(self.cache_dir, "lock")
|
974
|
-
os.makedirs(self.cache_dir, exist_ok=True)
|
975
|
-
elif override:
|
976
|
-
self.cache_dir = (
|
977
|
-
default_override_dir()
|
978
|
-
if default_override_dir is not None
|
979
|
-
else os.path.join(Path.home(), ".triton", "override")
|
980
|
-
)
|
981
|
-
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
982
|
-
else:
|
983
|
-
# create cache directory if it doesn't exist
|
984
|
-
self.cache_dir = os.getenv("TRITON_CACHE_DIR", "").strip() or (
|
985
|
-
default_cache_dir()
|
986
|
-
if default_cache_dir is not None
|
987
|
-
else os.path.join(Path.home(), ".triton", "cache")
|
988
|
-
)
|
989
|
-
if self.cache_dir:
|
990
|
-
try:
|
991
|
-
self.cache_dir = f"{self.cache_dir}_{get_tp_group().local_rank}"
|
992
|
-
except:
|
993
|
-
self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
|
994
|
-
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
995
|
-
self.lock_path = os.path.join(self.cache_dir, "lock")
|
996
|
-
os.makedirs(self.cache_dir, exist_ok=True)
|
997
|
-
else:
|
998
|
-
raise RuntimeError("Could not create or locate cache dir")
|
999
|
-
|
1000
|
-
|
1001
940
|
def set_ulimit(target_soft_limit=65535):
|
1002
941
|
# number of open files
|
1003
942
|
resource_type = resource.RLIMIT_NOFILE
|
@@ -2061,6 +2000,16 @@ def is_valid_ipv6_address(address: str) -> bool:
|
|
2061
2000
|
return False
|
2062
2001
|
|
2063
2002
|
|
2003
|
+
def maybe_wrap_ipv6_address(address: str) -> str:
|
2004
|
+
if is_valid_ipv6_address(address):
|
2005
|
+
return f"[{address}]"
|
2006
|
+
return address
|
2007
|
+
|
2008
|
+
|
2009
|
+
def format_tcp_address(ip: str, port: int) -> str:
|
2010
|
+
return f"tcp://{maybe_wrap_ipv6_address(ip)}:{port}"
|
2011
|
+
|
2012
|
+
|
2064
2013
|
def configure_ipv6(dist_init_addr):
|
2065
2014
|
addr = dist_init_addr
|
2066
2015
|
end = addr.find("]")
|
sglang/test/test_activation.py
CHANGED
@@ -3,9 +3,12 @@ import unittest
|
|
3
3
|
|
4
4
|
import torch
|
5
5
|
|
6
|
-
from sglang.srt.layers.activation import GeluAndMul
|
6
|
+
from sglang.srt.layers.activation import GeluAndMul, QuickGELU
|
7
|
+
from sglang.srt.utils import is_hip
|
7
8
|
from sglang.test.test_utils import CustomTestCase
|
8
9
|
|
10
|
+
_is_hip = is_hip()
|
11
|
+
|
9
12
|
|
10
13
|
class TestGeluAndMul(CustomTestCase):
|
11
14
|
DTYPES = [torch.half, torch.bfloat16]
|
@@ -52,5 +55,51 @@ class TestGeluAndMul(CustomTestCase):
|
|
52
55
|
self._run_gelu_and_mul_test(*params)
|
53
56
|
|
54
57
|
|
58
|
+
class TestQuickGELU(CustomTestCase):
|
59
|
+
DTYPES = [torch.half, torch.bfloat16]
|
60
|
+
NUM_TOKENS = [7, 83, 2048] # batch = sequence length
|
61
|
+
DIMS = [512, 4096, 5120, 13824] # all multiples of 16 bytes
|
62
|
+
SEEDS = [0]
|
63
|
+
|
64
|
+
@classmethod
|
65
|
+
def setUpClass(cls):
|
66
|
+
if not torch.cuda.is_available():
|
67
|
+
raise unittest.SkipTest("CUDA is not available")
|
68
|
+
torch.set_default_device("cuda")
|
69
|
+
|
70
|
+
def _run_gelu_quick_test(self, n_tok: int, dim: int, dtype: torch.dtype, seed: int):
|
71
|
+
torch.manual_seed(seed)
|
72
|
+
|
73
|
+
layer = QuickGELU().to(dtype=dtype)
|
74
|
+
|
75
|
+
x = torch.randn(n_tok, dim, dtype=dtype, device="cuda")
|
76
|
+
|
77
|
+
with torch.inference_mode():
|
78
|
+
ref = layer.forward_native(x) # x * sigmoid(1.702 * x), fp32 math
|
79
|
+
if _is_hip:
|
80
|
+
out = layer.forward_hip(x) # 128-bit vectorised kernel from sgl-kernel
|
81
|
+
else:
|
82
|
+
out = layer.forward_cuda(x)
|
83
|
+
|
84
|
+
tol = 1e-2 if dtype is torch.bfloat16 else 1e-3
|
85
|
+
self.assertTrue(
|
86
|
+
torch.allclose(out, ref, atol=tol, rtol=tol),
|
87
|
+
msg=f"Mismatch @ B={n_tok}, D={dim}, dtype={dtype}",
|
88
|
+
)
|
89
|
+
print(f"Match @ B={n_tok}, D={dim}, dtype={dtype}")
|
90
|
+
|
91
|
+
def test_quick_gelu(self):
|
92
|
+
for params in itertools.product(
|
93
|
+
self.NUM_TOKENS, self.DIMS, self.DTYPES, self.SEEDS
|
94
|
+
):
|
95
|
+
with self.subTest(
|
96
|
+
num_tokens=params[0],
|
97
|
+
dim=params[1],
|
98
|
+
dtype=params[2],
|
99
|
+
seed=params[3],
|
100
|
+
):
|
101
|
+
self._run_gelu_quick_test(*params)
|
102
|
+
|
103
|
+
|
55
104
|
if __name__ == "__main__":
|
56
105
|
unittest.main(verbosity=2)
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.9.
|
1
|
+
__version__ = "0.4.9.post4"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.9.
|
3
|
+
Version: 0.4.9.post4
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -253,13 +253,13 @@ Requires-Dist: uvloop; extra == "runtime-common"
|
|
253
253
|
Requires-Dist: xgrammar==0.1.21; extra == "runtime-common"
|
254
254
|
Provides-Extra: srt
|
255
255
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
256
|
-
Requires-Dist: sgl-kernel==0.2.
|
256
|
+
Requires-Dist: sgl-kernel==0.2.7; extra == "srt"
|
257
257
|
Requires-Dist: torch==2.7.1; extra == "srt"
|
258
258
|
Requires-Dist: torchaudio==2.7.1; extra == "srt"
|
259
259
|
Requires-Dist: torchvision==0.22.1; extra == "srt"
|
260
260
|
Requires-Dist: cuda-python; extra == "srt"
|
261
261
|
Requires-Dist: einops; extra == "srt"
|
262
|
-
Requires-Dist: flashinfer_python==0.2.
|
262
|
+
Requires-Dist: flashinfer_python==0.2.9rc1; extra == "srt"
|
263
263
|
Provides-Extra: blackwell
|
264
264
|
Requires-Dist: sglang[runtime_common]; extra == "blackwell"
|
265
265
|
Requires-Dist: sgl-kernel; extra == "blackwell"
|
@@ -268,11 +268,11 @@ Requires-Dist: torchaudio==2.7.1; extra == "blackwell"
|
|
268
268
|
Requires-Dist: torchvision==0.22.1; extra == "blackwell"
|
269
269
|
Requires-Dist: cuda-python; extra == "blackwell"
|
270
270
|
Requires-Dist: einops; extra == "blackwell"
|
271
|
-
Requires-Dist: flashinfer_python==0.2.
|
271
|
+
Requires-Dist: flashinfer_python==0.2.9rc1; extra == "blackwell"
|
272
272
|
Provides-Extra: srt-hip
|
273
273
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
274
274
|
Requires-Dist: torch; extra == "srt-hip"
|
275
|
-
Requires-Dist: petit_kernel; extra == "srt-hip"
|
275
|
+
Requires-Dist: petit_kernel==0.0.2; extra == "srt-hip"
|
276
276
|
Provides-Extra: srt-xpu
|
277
277
|
Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
|
278
278
|
Provides-Extra: srt-hpu
|
@@ -10,7 +10,7 @@ sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
|
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
|
12
12
|
sglang/utils.py,sha256=leKs-YyX1_Jk216yoKX-KRYx_EbLiuemsHbD31xSjMw,16445
|
13
|
-
sglang/version.py,sha256=
|
13
|
+
sglang/version.py,sha256=7hKPeJG6JhOmqx_uKkIvWkBNoTZDwS93b5EIfp7zQXQ,28
|
14
14
|
sglang/eval/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
15
15
|
sglang/eval/loogle_eval.py,sha256=-CC2s2kh5qUoDrHRkQVkC_jNvBgNojXbf456ny5s78s,4557
|
16
16
|
sglang/lang/chat_template.py,sha256=HKlx7snSWFED8GKF5ex79sQrPWFw5TSXQM0_LsiD9Bc,20552
|
@@ -26,11 +26,11 @@ sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThc
|
|
26
26
|
sglang/lang/backend/openai.py,sha256=YFs7pCDK6_DoYmjTa-vgDVdaeE4PYr7E-sIC1q7vAns,16422
|
27
27
|
sglang/lang/backend/runtime_endpoint.py,sha256=oxLCLXCa26VfKyPV-wbRjLrQvqTG8NzoaVgwczzyIYc,17481
|
28
28
|
sglang/lang/backend/vertexai.py,sha256=gz0uNYyBb88jbPYz6ZIJ774fefrcbuVdoK33bphUZpI,4827
|
29
|
-
sglang/srt/_custom_ops.py,sha256=
|
29
|
+
sglang/srt/_custom_ops.py,sha256=9GPNUyqnybgvUnzdQ8n5_NgluHmQNw0Gptos0iLPfrY,5342
|
30
30
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
31
31
|
sglang/srt/code_completion_parser.py,sha256=KFa95OU0TeVjJkOqIgS4xV3kaJ5dFWOmAAgISyc1oEc,3803
|
32
32
|
sglang/srt/constants.py,sha256=0i-tEwG2BSYNDy96MxnGHV5HnBELkYcnsVGsE-R18o0,93
|
33
|
-
sglang/srt/conversation.py,sha256=
|
33
|
+
sglang/srt/conversation.py,sha256=8Ne6Wcjhdo9gSqIDNEcRCiPIIUawGjfDUimp5IHCeps,40396
|
34
34
|
sglang/srt/custom_op.py,sha256=7OPQAZe3PqDXk-xM0Wxl7pm9cazilNJfkNiYIWztWgI,3198
|
35
35
|
sglang/srt/debug_utils.py,sha256=slaFOY4BYDBFatkfu8FZlzai-u4LFS-5GUzdr-t50zE,2241
|
36
36
|
sglang/srt/hf_transformers_utils.py,sha256=UiGjp70ZpTaZDJ99Roc-2DTkxV3gR-2oJjjaNY9Ekh4,12391
|
@@ -39,11 +39,11 @@ sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,
|
|
39
39
|
sglang/srt/operations.py,sha256=ddQ8KO63L73OciaR8MZ9h2h83gKVY4-WuWgeEGowPJA,5346
|
40
40
|
sglang/srt/operations_strategy.py,sha256=Pwd2sKeRtKh9WJXgzlNr2tU9y6YMcI3MDLbatHqlMws,7145
|
41
41
|
sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
|
42
|
-
sglang/srt/reasoning_parser.py,sha256=
|
43
|
-
sglang/srt/server_args.py,sha256=
|
42
|
+
sglang/srt/reasoning_parser.py,sha256=8S7oLj5a1jPPwgs9Yl4sZ2A_wn_qobrmPjUJU0BswCQ,9224
|
43
|
+
sglang/srt/server_args.py,sha256=Qdg567MVXoy2spYojSMY8MbcwFsALS7AnHCGXfqazAM,84056
|
44
44
|
sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
|
45
|
-
sglang/srt/two_batch_overlap.py,sha256=
|
46
|
-
sglang/srt/utils.py,sha256=
|
45
|
+
sglang/srt/two_batch_overlap.py,sha256=Lx4bIXJb3hd9vzuJbmp3Crf4I8_NrT9fUbs-4LRzQXk,28711
|
46
|
+
sglang/srt/utils.py,sha256=CSiP9vU3mxsAKQAK9DvgE4Ygd0LiHrl4zNJP5nuhqkU,89974
|
47
47
|
sglang/srt/warmup.py,sha256=zldxhMlXpclRAJXmfBjJNUJd1eDizVdysibBvQyTVuA,1782
|
48
48
|
sglang/srt/configs/__init__.py,sha256=8EcVRP95epZ49DxBa6LgKWt7eO3Qe7Hrr3V1c6HkMnY,553
|
49
49
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
@@ -56,7 +56,7 @@ sglang/srt/configs/janus_pro.py,sha256=yI2c_aWBIKiTcK6RZxK6hq8CIxyYRH_NN8uaSNKsr
|
|
56
56
|
sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew,1358
|
57
57
|
sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
|
58
58
|
sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
|
59
|
-
sglang/srt/configs/model_config.py,sha256=
|
59
|
+
sglang/srt/configs/model_config.py,sha256=zLuzY5QlldWYuEb9zHlARTOuXYqZbXijhJHWMuy2lZA,28413
|
60
60
|
sglang/srt/configs/update_config.py,sha256=D-myV9Ombwp6wTF0RX9qUO6m5rxFiUcyse32FQADG28,4787
|
61
61
|
sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
|
62
62
|
sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
|
@@ -78,7 +78,7 @@ sglang/srt/disaggregation/decode.py,sha256=P-0OyFjjDfthEVhlJugdscWST_uoOVmHZlSlx
|
|
78
78
|
sglang/srt/disaggregation/decode_schedule_batch_mixin.py,sha256=8UdNaj8KKMLV5Cydhw8gnHM-zRrnKM_AAd3Qc-SRfWg,5648
|
79
79
|
sglang/srt/disaggregation/kv_events.py,sha256=yFbtwOeblcCmOXTjg00TopxnyjkmCBQIVz46KB8jetY,13555
|
80
80
|
sglang/srt/disaggregation/launch_lb.py,sha256=mcbAztN4gnHevw_T5R2_nWsymsDEY9vHkm7OJ1vr6cc,4211
|
81
|
-
sglang/srt/disaggregation/mini_lb.py,sha256=
|
81
|
+
sglang/srt/disaggregation/mini_lb.py,sha256=bpnz6d90nv7vAoiPxmxFKlM1Gjct0R4IPkF9SwwBk9I,14504
|
82
82
|
sglang/srt/disaggregation/prefill.py,sha256=3u6WCrjogZSqd6yWdBZe1HO4lf6w2e9M7sQtGHyax-Y,23512
|
83
83
|
sglang/srt/disaggregation/utils.py,sha256=ql5p9yjBCRcR0YMgcgRK6tH0kPGvLNTUgeXBr_BUcoI,12198
|
84
84
|
sglang/srt/disaggregation/ascend/__init__.py,sha256=-lxnica6fZYmYNpKwmE8yESUpQ5Sxf2DiZoBHzboKc8,146
|
@@ -87,37 +87,38 @@ sglang/srt/disaggregation/ascend/transfer_engine.py,sha256=_4PszsQL6wICdg5cqpR2N
|
|
87
87
|
sglang/srt/disaggregation/base/__init__.py,sha256=4VwUv0aWxwmVL1049XK82aLTNxmt0WY5RPy9li-wyVk,160
|
88
88
|
sglang/srt/disaggregation/base/conn.py,sha256=CPDAoAkYaFtVPLa1QROfwipSVe7MH6omzIBHzo8TSYk,2811
|
89
89
|
sglang/srt/disaggregation/common/__init__.py,sha256=7yl-EGLMVKRpBUaGF_7lwAsw2J_mqpRZV0238VGxD9o,126
|
90
|
-
sglang/srt/disaggregation/common/conn.py,sha256=
|
90
|
+
sglang/srt/disaggregation/common/conn.py,sha256=3YJVviObY3fkSfQbVxE5lgcqMmqM5nDjWfEjS5-SW6I,17054
|
91
91
|
sglang/srt/disaggregation/common/utils.py,sha256=SxRhAWisNK8seGhb5BXBJ5u53DF7yeKVPMWPcB5ywbE,1194
|
92
92
|
sglang/srt/disaggregation/fake/__init__.py,sha256=jJGWdXwaQiGIoR6atKqkQfkJmVyQ09l55VUN2WjwaeY,77
|
93
93
|
sglang/srt/disaggregation/fake/conn.py,sha256=oD1DArn1yDFZCu-X6p93uSLlAXEkt9lYxERICMznxGw,2286
|
94
94
|
sglang/srt/disaggregation/mooncake/__init__.py,sha256=0TgqkAdQI1YynbHY6c0QISvVoOSk-0SwCIq5rjPSmgE,156
|
95
|
-
sglang/srt/disaggregation/mooncake/conn.py,sha256=
|
96
|
-
sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256
|
95
|
+
sglang/srt/disaggregation/mooncake/conn.py,sha256=uHCmSIdo4-BfVez3uP4JmQ3vsRKXJhrcBAaRNi2dgBs,61314
|
96
|
+
sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=-ulLhz6DBDsmEjwNjjH5Pj8ngZKISgwBc_zL3ViGDN0,4761
|
97
97
|
sglang/srt/disaggregation/nixl/__init__.py,sha256=qODVPIGWUXKXq4zsRIcMYoAoAeg6nBIN9vdQOlVMANE,136
|
98
|
-
sglang/srt/disaggregation/nixl/conn.py,sha256=
|
98
|
+
sglang/srt/disaggregation/nixl/conn.py,sha256=eSof87fG21Dd4COszfnbeXIxne3TWvw0mSvCOkjsBZc,20323
|
99
99
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
100
100
|
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
101
|
-
sglang/srt/distributed/parallel_state.py,sha256=
|
101
|
+
sglang/srt/distributed/parallel_state.py,sha256=crlAUplgruIVrDk6jFZ0-6M3W6FXX67buZ3T-4XDgFA,58530
|
102
102
|
sglang/srt/distributed/utils.py,sha256=aaCxATncLGnVgB0WlGpBdee0behKW8Dy_dakqcuKSaQ,8497
|
103
103
|
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
104
|
-
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=
|
105
|
-
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=
|
104
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=p-GLYYnMSjcUvK30qcwYhXEJwGrOz6rR99p_23SWFj8,16570
|
105
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=Fx9D70NO2dplyuqOxTasMwkzONN3wfC4WOny3tWSbPA,14159
|
106
106
|
sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
|
107
107
|
sglang/srt/distributed/device_communicators/npu_communicator.py,sha256=bRXN1Md_4SHQGzQYZa2GrHv2zbIU5vSpkueHiAZL1xQ,1345
|
108
108
|
sglang/srt/distributed/device_communicators/pymscclpp.py,sha256=8Pgehd02v-BpHixTTB4OB9ZlxA7fyXiPF4Xp9F_heyU,10890
|
109
109
|
sglang/srt/distributed/device_communicators/pynccl.py,sha256=obXyCaZznZHSt486XCnEOBNG3Cen7ysuuMuGRlTTl-8,10095
|
110
110
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
111
|
-
sglang/srt/distributed/device_communicators/
|
111
|
+
sglang/srt/distributed/device_communicators/quick_all_reduce.py,sha256=4j1_E4azoxfd8wxtfFmt9rvbQncl8ny6wmTMl6gAkp0,9932
|
112
|
+
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=IrSrnpZnii0EJTS1CYRwEwE7gyHxJBVgI2QuJS3AKW0,20906
|
112
113
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
113
114
|
sglang/srt/entrypoints/EngineBase.py,sha256=yKN76witT2jz1zhmLHmPNLGMpK2UiOTaKQ2KPD8l99U,2594
|
114
|
-
sglang/srt/entrypoints/engine.py,sha256
|
115
|
-
sglang/srt/entrypoints/http_server.py,sha256
|
115
|
+
sglang/srt/entrypoints/engine.py,sha256=f8FI-BcjiIlcXdcy4NHmnM8SLNe8eYsNwyv4IoKYRZI,31545
|
116
|
+
sglang/srt/entrypoints/http_server.py,sha256=-unKM0lCK7Scd3LLTxLl-3vhit6lMb-wWfjLywelRy0,37214
|
116
117
|
sglang/srt/entrypoints/http_server_engine.py,sha256=_--j4U04OeJLlnnv1f0XmCd_Ry0z1FlhkrbePX8rYV0,4938
|
117
118
|
sglang/srt/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
118
|
-
sglang/srt/entrypoints/openai/protocol.py,sha256=
|
119
|
+
sglang/srt/entrypoints/openai/protocol.py,sha256=UUc5oJ7E60fYoUqMb53H85i7HHhDfmYLIFZidhNrP0s,19147
|
119
120
|
sglang/srt/entrypoints/openai/serving_base.py,sha256=5NJ2S_6B2NFSwn4nLp6eaeJ5iC3IcQzMEY9lW_gPcdA,5246
|
120
|
-
sglang/srt/entrypoints/openai/serving_chat.py,sha256=
|
121
|
+
sglang/srt/entrypoints/openai/serving_chat.py,sha256=WUstZZt80I2yrqUfBRx57-YMVMb45VqlyMnSmXuQFNw,38032
|
121
122
|
sglang/srt/entrypoints/openai/serving_completions.py,sha256=emIutVmnJgOgGRuAqdB80-kVHUfQbE67n1VS-76o4QY,16690
|
122
123
|
sglang/srt/entrypoints/openai/serving_embedding.py,sha256=hqPt2ELpT1yQ8sBXo801aLJ3sExoPXs_K5ZQtC2vpAs,6274
|
123
124
|
sglang/srt/entrypoints/openai/serving_rerank.py,sha256=8n6y5kC6UhRpWrDtzH0eIp0dznW12FT60MLCR4bvD2U,3333
|
@@ -135,31 +136,31 @@ sglang/srt/eplb/eplb_algorithms/deepseek.py,sha256=mMZT7zAAArccdRS0xXxifvMb3qn9e
|
|
135
136
|
sglang/srt/eplb/eplb_algorithms/deepseek_vec.py,sha256=Vzy5Iarua1VgVHzjBNZaVV_vt1LY1BLtJz7PmzB701k,10654
|
136
137
|
sglang/srt/eplb/eplb_simulator/__init__.py,sha256=HIZaXqsvsOCMMJ81dSs1afSGZ5G6OiOZEtHpltyzzjY,21
|
137
138
|
sglang/srt/eplb/eplb_simulator/reader.py,sha256=OlsWqsuXqVQY7PfvvrP-vsl2Ww2Kg4vD_K9BltM_QHo,1828
|
138
|
-
sglang/srt/function_call/base_format_detector.py,sha256=
|
139
|
+
sglang/srt/function_call/base_format_detector.py,sha256=FKBzsGFHfNl3Wvccsdvv9ftjbCZIyyjOLj2kHgKvX9g,15901
|
139
140
|
sglang/srt/function_call/core_types.py,sha256=wLUsW8mUA-i-ISz5QUDL7Ejq72W-K1HIhFa9Wm-_oA8,786
|
140
|
-
sglang/srt/function_call/deepseekv3_detector.py,sha256=
|
141
|
-
sglang/srt/function_call/ebnf_composer.py,sha256=
|
142
|
-
sglang/srt/function_call/function_call_parser.py,sha256=
|
143
|
-
sglang/srt/function_call/kimik2_detector.py,sha256=
|
144
|
-
sglang/srt/function_call/llama32_detector.py,sha256=
|
145
|
-
sglang/srt/function_call/mistral_detector.py,sha256=
|
146
|
-
sglang/srt/function_call/pythonic_detector.py,sha256=
|
147
|
-
sglang/srt/function_call/qwen25_detector.py,sha256=
|
148
|
-
sglang/srt/function_call/
|
141
|
+
sglang/srt/function_call/deepseekv3_detector.py,sha256=WzJbwCe8DoOi8guX9pdVMd70CViVrwoaxi8PZwIC9pg,9712
|
142
|
+
sglang/srt/function_call/ebnf_composer.py,sha256=cG0js5-bSl4FK66DyrTaREqLXXbtuLQxHBzxx6xaK3Q,14700
|
143
|
+
sglang/srt/function_call/function_call_parser.py,sha256=G6b2VmE2mHarRuXlzAWEY6-yWoEbt830LmykE3Jnz9s,7904
|
144
|
+
sglang/srt/function_call/kimik2_detector.py,sha256=7unW_GwH7I6jrh2BZbw22mWRZzQ0OMTYDz_xVX5abPU,10116
|
145
|
+
sglang/srt/function_call/llama32_detector.py,sha256=trZja7IXc2IWUko5PRj7p4pRU6XCVjtxr-7qJNr0EWo,3647
|
146
|
+
sglang/srt/function_call/mistral_detector.py,sha256=Ts2HrJdu5r2lCCMVKPZfnCds9pt1K5H1EaOQrF_HD60,4686
|
147
|
+
sglang/srt/function_call/pythonic_detector.py,sha256=yAatItKtVDf9uhlB78wPWKP3fbefLJDNGGzFV5ey-YA,8756
|
148
|
+
sglang/srt/function_call/qwen25_detector.py,sha256=nv83_sipZMU6Vie3J4HeNiZVCSRU5ei7601j50779HA,5181
|
149
|
+
sglang/srt/function_call/qwen3_coder_detector.py,sha256=Zwpx4NF3dL7tOKnzczmvfmY3tZQ-vJyehbvtco-8tkM,5478
|
149
150
|
sglang/srt/function_call/utils.py,sha256=__ImDF2kNyoLWsYO5RYoryvy1mmgEjnjXlCvLv-uLCM,1695
|
150
|
-
sglang/srt/layers/activation.py,sha256=
|
151
|
+
sglang/srt/layers/activation.py,sha256=uhfhh10N5iLoLEBCvFPRjCfDxu5jXe1ehHJb3ISIjig,7840
|
151
152
|
sglang/srt/layers/amx_utils.py,sha256=1mENgHK2B8mgaD1oMtgbZ15Jmy_Uu1QueBmo09Ff2iA,2865
|
152
|
-
sglang/srt/layers/communicator.py,sha256=
|
153
|
-
sglang/srt/layers/dp_attention.py,sha256=
|
153
|
+
sglang/srt/layers/communicator.py,sha256=c5pJObNyP_7JsIWgLau_E-1ovtPCpAcge254fjo9Rqw,19988
|
154
|
+
sglang/srt/layers/dp_attention.py,sha256=tOre7il5Cppu930-tzM6d4AgjHuIJMCMlg9hJkuBcjA,11382
|
154
155
|
sglang/srt/layers/elementwise.py,sha256=MyQUflyKEfPZ-BggW1Kd4hB53RFD6FXGc2S5LXjx_do,16026
|
155
156
|
sglang/srt/layers/flashinfer_comm_fusion.py,sha256=fkTcAB7qYwSWi95qI3Rqq0JUyDpJdcYkd4TYkWO01HI,5891
|
156
157
|
sglang/srt/layers/layernorm.py,sha256=ooqA-t-vY5erbKBwqnOotfDsJRTygP5E10CfzEAVF6M,8657
|
157
158
|
sglang/srt/layers/linear.py,sha256=NzjLqZrZNYcc3z2KtA9-n6pnSdBHHwU6NF_BbcAWp48,52471
|
158
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
159
|
+
sglang/srt/layers/logits_processor.py,sha256=9eZQE8pI-coXeK1XQFrBI0_aENZ2cGTtmD1FptWda5k,26270
|
159
160
|
sglang/srt/layers/multimodal.py,sha256=YVR69WW-2aGDcZHT8IVJ6F_LRM7wraZr8VjrPDXqDmA,2104
|
160
161
|
sglang/srt/layers/parameter.py,sha256=jCg0G-12GZqTa9gGOqEtVCsnnuBGnYg0VmF6pc6oCFg,17455
|
161
162
|
sglang/srt/layers/pooler.py,sha256=uZ6WX1FLMEafZwusyZdm6KuVlIwSjbKrdwk2qzgqNGk,3812
|
162
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
163
|
+
sglang/srt/layers/radix_attention.py,sha256=vtzCPfEiWYunglcLfqO9dcATb-LXzU4sFBwIWsWHbiY,3513
|
163
164
|
sglang/srt/layers/rotary_embedding.py,sha256=EhxI0E8jcTWZ2COpnku7crbW8Hew5fe_ujMndj7hKvE,52246
|
164
165
|
sglang/srt/layers/sampler.py,sha256=xNds1migup2s6b9_pS6ljkJUkvNtv7nmTGeIdOzoQ6w,11182
|
165
166
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
@@ -167,7 +168,7 @@ sglang/srt/layers/utils.py,sha256=IWGg1Hb7c33Z3LHRPVJyUAzp3BnSid23ZWXAmJ_Jvp8,12
|
|
167
168
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=FSAxHh1w9Qz4A2_8OQfIP5qsmq1s_g-Od-VLJ4vqQPw,22355
|
168
169
|
sglang/srt/layers/attention/aiter_backend.py,sha256=7sEUgViw-xl3yok91yyOD9gTi8lQmME0g0ZiKVTCcyI,32851
|
169
170
|
sglang/srt/layers/attention/ascend_backend.py,sha256=jPCsU9_gH1iZNoZHD9nCeDdVdXqBt31LI65N55BTJPg,8250
|
170
|
-
sglang/srt/layers/attention/base_attn_backend.py,sha256=
|
171
|
+
sglang/srt/layers/attention/base_attn_backend.py,sha256=_vM05ddWy6SaoqWYPR8w5lflSE4MhcVFR271Q9EKK8E,3600
|
171
172
|
sglang/srt/layers/attention/cutlass_mla_backend.py,sha256=SIR7sKCCegwzahSz82I3gsDyN5TkKoa4yG4-pBQWBi4,9813
|
172
173
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
|
173
174
|
sglang/srt/layers/attention/flashattention_backend.py,sha256=rYsBAsFS1qNNe-3CVIaXxWRqA3eul1jVQXvfZ_wlpwg,94076
|
@@ -192,13 +193,13 @@ sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNp
|
|
192
193
|
sglang/srt/layers/moe/cutlass_w4a8_moe.py,sha256=bc8s5Oc36pCF0VozxZuqFOOU2fov-4wGPlyduye3Imk,7296
|
193
194
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=SkOFgU23aZfHmLhnkVhdUWGx0-2wbo6uu9ohKBp2yOI,3486
|
194
195
|
sglang/srt/layers/moe/router.py,sha256=UrPieRvemN7Ew48gtG7DA2xhNDBRSnZxzugTEBI-0_E,12006
|
195
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
196
|
+
sglang/srt/layers/moe/topk.py,sha256=bDHFcFQscnGDpCk7VejoChbpEVVz8Ph4FdzwcQRny0U,25047
|
196
197
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
197
198
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=UFH-XacP3aBjAcn6_SeC641jGzjSrY0fz50IB9URix4,42706
|
198
199
|
sglang/srt/layers/moe/ep_moe/layer.py,sha256=0vJW7RoEJ8Y3wD3q8pk289Gx5KNSVNJBbdpveliyXyM,46910
|
199
200
|
sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=aiBE3mjvWV3eBrFGH9J44tuJncQwOjRS_XeyBNCEtqM,24379
|
200
201
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=4NXZHbCw-G-uSnNUj4up0yh3xBDPnT-x0pdoIr0lku8,831
|
201
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
202
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=6NcY-6UHQUSVP4qzc8jis4h-W9BEV4lnGmrJMx2S6eA,57164
|
202
203
|
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=BUbo3OFiSjqs34MK09McFYqSsKguDcG6KP1905WUMFA,23933
|
203
204
|
sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=hoWVdf8ry8IDGH2bfk2WW-y2S5h5haLTGanBSwkkeE0,5848
|
204
205
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
@@ -361,6 +362,7 @@ sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py,sha256=hoWVdf8ry8ID
|
|
361
362
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=3T8_rF2PEojhgTMyQ8DscXgJCWWdWfDPj4M434zWcA4,3243
|
362
363
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
363
364
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LNx1W_BsWZcpHomiScCRap46dV0-F7S_w3Htskoqlm8,3263
|
365
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json",sha256=cWkEpNqnyn0QS8HcgWiwWI1xqh_U93_S1kyNeb69aOc,3238
|
364
366
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=fo1akCuGoqcGwQgfh56hApgg-wLXfo9kHHksE_6m1F4,3262
|
365
367
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENDYnC1ljPNpDVHdmqR7UbgDWT1IP_55ZIQ_Oae34mI,3264
|
366
368
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5UAs8Z1myw8F-5FY2HHWd5WMT2ii9X-aYIYkf3FsA8s,3269
|
@@ -381,20 +383,20 @@ sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRC9FOn9exNvK4QHbUeBj3Hhv3
|
|
381
383
|
sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
|
382
384
|
sglang/srt/layers/quantization/kv_cache.py,sha256=_9pF5rwvB7ta6Gdc5YKVVGbNzYwqmhIx4TrX1-xnodQ,3261
|
383
385
|
sglang/srt/layers/quantization/marlin_utils.py,sha256=ShsoxcVTYo1Es3XKp9mlQNw-TsRF_o4mHIO5MfAP5HA,25906
|
384
|
-
sglang/srt/layers/quantization/modelopt_quant.py,sha256=
|
386
|
+
sglang/srt/layers/quantization/modelopt_quant.py,sha256=ZxnHbYkpSt20xwV1zhZp3Xk93JzxBQOGHobZWIMluII,37872
|
385
387
|
sglang/srt/layers/quantization/moe_wna16.py,sha256=NGcFjiMXwfOX10dkHAY3EgZHxox2k7I402Bvwj3jW1Q,19105
|
386
388
|
sglang/srt/layers/quantization/petit.py,sha256=74Jn0eICq23v_b4rEctDaYTINtbqkdM6IXycT-_FbbI,8954
|
387
389
|
sglang/srt/layers/quantization/petit_utils.py,sha256=-gy4zMhqNoIA1R0n7-5C0efV54jHonCUgPDUUFjGsyM,3245
|
388
390
|
sglang/srt/layers/quantization/qoq.py,sha256=jM96uJfKz3vo1B6SSIrQXNT3vnD3UJrepK2eY-tSQU4,8139
|
389
391
|
sglang/srt/layers/quantization/scalar_type.py,sha256=nBk4THjeRWMGW3hKWRdrjfQ0gwpuBxmYqapDBaRpnr4,12395
|
390
392
|
sglang/srt/layers/quantization/unquant.py,sha256=hzyGGJoclb1oANy3hYxrEK76F7V7LNjU2rd3NWDpeGg,13997
|
391
|
-
sglang/srt/layers/quantization/utils.py,sha256=
|
393
|
+
sglang/srt/layers/quantization/utils.py,sha256=9ZmfS0MMqueQ6zIxMIFoQy2OFBQ5vHjOHBgVvUn7ews,16213
|
392
394
|
sglang/srt/layers/quantization/w4afp8.py,sha256=r36F5f5tAan8omW2UYZwK7xIwZUAWYHBbwJtmIIVzIY,9672
|
393
395
|
sglang/srt/layers/quantization/w8a8_fp8.py,sha256=I_W9A4Pnluaf-SaT8zq-tAJTt1acpxfE_kG5mqdc5cE,10250
|
394
396
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=VGrVELArNuyw4FEtUefaeK8ayuX7cpXj1OOqCmOCzm4,34902
|
395
397
|
sglang/srt/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
396
398
|
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=DsPCI82A4rqI6mjKo-WorIKUweppFV7-16Qku-zW9_g,25897
|
397
|
-
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=
|
399
|
+
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=vwcgwGuPhH2kHBfg7DnsndzffrbZx8xuFsl9Qeegd9s,24286
|
398
400
|
sglang/srt/layers/quantization/compressed_tensors/utils.py,sha256=mnUmKWFQUnY8bVoFHUuNVwqsfS-cefeR-ofyaihCXcY,7621
|
399
401
|
sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=HWMTnmrj-mUCRXgcOwnnXLrvrAE-ONdPTSzSImjHCMA,347
|
400
402
|
sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=tdKJC8c3SX8T3z8JL-1YCsg4ftcv55Wxt0vZrYftpX8,1635
|
@@ -559,8 +561,9 @@ sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py,sha256=On_uvP1Thx
|
|
559
561
|
sglang/srt/lora/layers.py,sha256=FaTYf0w3zbKKKAT1nJdAzGRtxvySB6FpNKXvEi5RHH0,12958
|
560
562
|
sglang/srt/lora/lora.py,sha256=sRoU7BdlTV4cnBv1-u3QjisVR9XQrrqAsmhxdNtd8dU,9433
|
561
563
|
sglang/srt/lora/lora_config.py,sha256=qDgMTx_69jyJUl29O5FxLzYa0BMhqYVXWXfyyVOvGm0,1684
|
562
|
-
sglang/srt/lora/lora_manager.py,sha256=
|
563
|
-
sglang/srt/lora/
|
564
|
+
sglang/srt/lora/lora_manager.py,sha256=ABuqg1IhY9LNXgJd_v3Yx_VHS52t4SapHmhLiH1thdc,20147
|
565
|
+
sglang/srt/lora/lora_registry.py,sha256=q8HRG6YeWNKQFxbtaBlXSLGOgh0EtMAKPCKAsNEXdQQ,4747
|
566
|
+
sglang/srt/lora/mem_pool.py,sha256=UYvTgEPvBoVsaX5rEnipebOAlo75pML6_NO9GfCFzl8,12148
|
564
567
|
sglang/srt/lora/utils.py,sha256=5G0KiDEn-Zg4OgIOjWnU2ZdQGNstZswoAog9xU4GVDA,5660
|
565
568
|
sglang/srt/lora/backend/base_backend.py,sha256=EIz8I-GIrdmK4fISw3ENhbJVVITaxKfyLxHXGPU4fPs,5044
|
566
569
|
sglang/srt/lora/backend/flashinfer_backend.py,sha256=el6IAB4kTgDTbwCggmqFuukliyoapN5X6FLksG-4wJ8,4151
|
@@ -570,39 +573,39 @@ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=QURCYxHNR8Ls4SQtt3dvdgjvdDVh
|
|
570
573
|
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=iz5scFNo2YFTeiM8beWg9Z1oZI-6AM_T1wBMCQ6qp2Q,6485
|
571
574
|
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=Ksova04wgeGsFqGOXWqJtMYaHgyUYcx8VU42BZQOkVA,5129
|
572
575
|
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=VqCAFvUtq_l-0RGIkx3W_fzD55QcW2FAcqpvSHOBFp4,5115
|
573
|
-
sglang/srt/managers/cache_controller.py,sha256
|
576
|
+
sglang/srt/managers/cache_controller.py,sha256=-je55waMGlOXvEKICJ9CQCRngSwhdae2agFJCwO2E7o,24503
|
574
577
|
sglang/srt/managers/configure_logging.py,sha256=8sNXZ2z9pBWOwn-X3wyz013Ob8Nbm1zDxRkxoZjH-l4,1633
|
575
578
|
sglang/srt/managers/data_parallel_controller.py,sha256=PZ-wOcAFn3PQqMB6I2vjIsFmplf0nlNl5hqTDKTHTG8,12112
|
576
579
|
sglang/srt/managers/detokenizer_manager.py,sha256=SpLxTsSPKBZfD-ZMhJ5zpPPGuUb8PmcYgFSL9CsurU4,10696
|
577
|
-
sglang/srt/managers/io_struct.py,sha256=
|
580
|
+
sglang/srt/managers/io_struct.py,sha256=LNLG7utMidF63eYWlWYxQ3G2GzOqaQzQc-9ld2yaycI,37859
|
578
581
|
sglang/srt/managers/mm_utils.py,sha256=sHEOfM7T46btXHDArz5lS6pRL7gleAJpEz7l_z_MnWs,25476
|
579
582
|
sglang/srt/managers/multimodal_processor.py,sha256=mzCrN-8H0bE0iMO8UzxmYmhE2M1qsbVJXGdhAYcjjYA,2016
|
580
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
583
|
+
sglang/srt/managers/schedule_batch.py,sha256=GC0Pmz9eBI3da1zN0EkNvEQiOyB-qiouTcqITLx7peU,76068
|
581
584
|
sglang/srt/managers/schedule_policy.py,sha256=BFYItMXtrsO5157tpFhYwLpZ8NSlzR4_gNZC1iE10D4,22400
|
582
|
-
sglang/srt/managers/scheduler.py,sha256=
|
585
|
+
sglang/srt/managers/scheduler.py,sha256=OrWS4M1z68igMpFF-UROXXGec1PJOMBeUwqeOCLoBaI,120513
|
583
586
|
sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=Ei-9lyfrSQAlL9_ULuJLAhA7qpvUzjsBDzBBTDdnt4Q,31394
|
584
587
|
sglang/srt/managers/session_controller.py,sha256=dzlMNZlo20FTSl64QqK7y7pElsdCy8ICOWWBPTBVwgs,6040
|
585
588
|
sglang/srt/managers/template_manager.py,sha256=RrwRA2oqId_PMQ98qJQGwIxMroOxiorl2sGC9ARou_0,8543
|
586
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
587
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
589
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=EIBoSgTGKuvYTy_hwTRvw69sbKex7jqPaFuE3P0lXqY,74848
|
590
|
+
sglang/srt/managers/tp_worker.py,sha256=JEY1nJ6bx7IEYR-NlB2Ybuxw0dnD6N5G4XdFYOg4LQw,11257
|
588
591
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=kK0pn8mz-XgLGvVlJK6LvDVp4lnFk5vJH-Cs9NQ5Un4,10959
|
589
592
|
sglang/srt/managers/utils.py,sha256=9_VGE48EK0PXVJ26aYvbRJ6n7gIZALvCcf6uZCccCgM,1369
|
590
|
-
sglang/srt/mem_cache/allocator.py,sha256=
|
593
|
+
sglang/srt/mem_cache/allocator.py,sha256=id4riucsG9d-P1ikj3-DLJbzhM8-MuJfvDmB8H4g9-o,23364
|
591
594
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=hLS2ncTMAz7Kpdk5pNwn5c6g8b61_K9OCBL4Vj1xsYc,2801
|
592
595
|
sglang/srt/mem_cache/chunk_cache.py,sha256=FU_fq8e_dodgU8EkRqE4Jr0sKwCXQnKmapO88k6_JfU,3207
|
593
596
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
594
|
-
sglang/srt/mem_cache/hicache_storage.py,sha256=
|
595
|
-
sglang/srt/mem_cache/hiradix_cache.py,sha256=
|
597
|
+
sglang/srt/mem_cache/hicache_storage.py,sha256=pvNeooVmCZBeZFgSHWNGPZzqtHI_qJ5BpTy_A4i6gg8,5539
|
598
|
+
sglang/srt/mem_cache/hiradix_cache.py,sha256=1cx4yT4Or7HyvC1Qpi-thEfPoht9lLE9dcgWQf8SDWc,24758
|
596
599
|
sglang/srt/mem_cache/memory_pool.py,sha256=jQdWgOr6hpMLbGaEJc4immBgZG_MZbx8LyOv029ASVw,40932
|
597
|
-
sglang/srt/mem_cache/memory_pool_host.py,sha256=
|
600
|
+
sglang/srt/mem_cache/memory_pool_host.py,sha256=xcjYyC94FOH_NguWbLUG4vF_CSBcxA_msuRsBasYCPo,10387
|
598
601
|
sglang/srt/mem_cache/multimodal_cache.py,sha256=wZl2KeEl3xeoEsYdH33UoM-FO8kqfLo_XUgereJVvoM,1348
|
599
602
|
sglang/srt/mem_cache/radix_cache.py,sha256=kp4JiVxh3eS4MsFxGKIYjYAb14B777qc1bHrABPqBMc,19101
|
600
603
|
sglang/srt/mem_cache/swa_radix_cache.py,sha256=4qt_gS7GuHiekd6VvKkx54dB2GHJzNA_kj71Np6jTDA,40300
|
601
604
|
sglang/srt/metrics/collector.py,sha256=FH6Wil2lMK7d7q74CfxfdULCaHKDXeAoo8UBZqqdepM,20289
|
602
605
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
603
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
604
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
605
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
606
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=6njRGysrhBDi2bAt5DyjGnyrexYvuEVsAJwQBBW8Ut0,31886
|
607
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=NvUhReevoy6UfO4991P_5MPQyfefC3BOoTwTqUwMbYk,38430
|
608
|
+
sglang/srt/model_executor/model_runner.py,sha256=wVRFMda8oHYIeSfzINLlDMEQvff1uWMeJOwrS3U8_9Q,70162
|
606
609
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
607
610
|
sglang/srt/model_loader/loader.py,sha256=wxSUYrqv23w9yHYy9Lm0ozWggs9pf6Co4yqe1UO8PZk,59799
|
608
611
|
sglang/srt/model_loader/utils.py,sha256=svSrPQxViBz_-0_sByu4coCYrzJ51-4FCi_bFqa6rDo,4474
|
@@ -616,7 +619,7 @@ sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,155
|
|
616
619
|
sglang/srt/models/deepseek.py,sha256=sqobTr9a9c5pNLNJZE-WrJIE_qBq0lC5gCGv9NpGU_Q,17364
|
617
620
|
sglang/srt/models/deepseek_janus_pro.py,sha256=td8xGs6ARfJ8AQCYwUhMOZoWigrAs7m3trF5-kXCqik,70418
|
618
621
|
sglang/srt/models/deepseek_nextn.py,sha256=47fehxRdiOizr0rdLg5f1fzQEx6gGAOcDcWKtblloyk,5928
|
619
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
622
|
+
sglang/srt/models/deepseek_v2.py,sha256=IG8hrn5j0o88mywclcyKe_djKLClC1Low6IDLNcQ23A,104131
|
620
623
|
sglang/srt/models/deepseek_vl2.py,sha256=7X3gI4DE7guUtNJvMLf06jO8MUHKa2Aan8evZg5hsXc,13061
|
621
624
|
sglang/srt/models/exaone.py,sha256=TpO-rtCpEZ8Ua7hGFnS8l2oAYhY0Pij50grc9WQ2mvc,13576
|
622
625
|
sglang/srt/models/gemma.py,sha256=2atKwFN5wCsJCmBRmURX9vaCcFjxlFirL7xsnG5KPlI,14180
|
@@ -659,7 +662,7 @@ sglang/srt/models/mistral.py,sha256=ni7PppA_5X4ivTnIps3yoEaMEL-G6Gaf9b5hGC1vU5Q,
|
|
659
662
|
sglang/srt/models/mixtral.py,sha256=j2whKt4GdQqtpKL5iN0Vb4TO4el1E2qEpUCI57E_47Q,17222
|
660
663
|
sglang/srt/models/mixtral_quant.py,sha256=UUIq7lpUonD9IoGePyl59oJYEcVwT9wUXvtVp5tjQ9Y,15406
|
661
664
|
sglang/srt/models/mllama.py,sha256=OdX7brhyfGiMFPulxF77QOUi-Kcscg3Qo4ipyxzs9yA,39670
|
662
|
-
sglang/srt/models/mllama4.py,sha256=
|
665
|
+
sglang/srt/models/mllama4.py,sha256=yt2YNE7S6buLAaqSfxD2LcJL1ZRzUzKWU21rCbzDzQo,20465
|
663
666
|
sglang/srt/models/olmo.py,sha256=7-q_fA6XXdG7kPUjpUzYkzMUWJobuSjhqjYw9xSUs_c,12671
|
664
667
|
sglang/srt/models/olmo2.py,sha256=azmljhJF4ivcQfUtfsAUxq3ducE4tRKTL6iwe0IKYMg,14327
|
665
668
|
sglang/srt/models/olmoe.py,sha256=WdPtyKsd3u8OpVsXmDwWuVpt1gTF0679IRkc1pvfnMM,16040
|
@@ -677,11 +680,11 @@ sglang/srt/models/qwen2_5_vl.py,sha256=vOuIyK_NLuGgz9LlAdyM7cSQUU93VnBCYXUr_n4iO
|
|
677
680
|
sglang/srt/models/qwen2_audio.py,sha256=lVeYmC29eVZIR9I6ZjFe4xvC0wSXR0NtGCli6fSXtms,7870
|
678
681
|
sglang/srt/models/qwen2_classification.py,sha256=dGrMm4ebd30_lBhHOhaV57ig2iOTx3nqB4GEzsrRIM8,2747
|
679
682
|
sglang/srt/models/qwen2_eagle.py,sha256=6spFg6JYQGeUXLv2Q5eMkVzZvnqGyvW6SdVBtFMxlTM,4806
|
680
|
-
sglang/srt/models/qwen2_moe.py,sha256=
|
683
|
+
sglang/srt/models/qwen2_moe.py,sha256=3OV_pR36OImR9U6H3hziSxDEIsPLKEAr_oO3hKzlNSc,25806
|
681
684
|
sglang/srt/models/qwen2_rm.py,sha256=-mQXDEv11p-I1HXgYLTtY6ROem6UYorO958WsDrzsgs,2837
|
682
685
|
sglang/srt/models/qwen2_vl.py,sha256=ZRAL32Ymt4ZCBqB0JorXAm4ZBrECZ9EbaXKaTSikkQM,21366
|
683
686
|
sglang/srt/models/qwen3.py,sha256=COSC1TsCQNTq3E1QoVJYj1yTn-CFEVEX7keRwrrBXLs,20121
|
684
|
-
sglang/srt/models/qwen3_moe.py,sha256=
|
687
|
+
sglang/srt/models/qwen3_moe.py,sha256=nSLBfzBzYQ6wpDmXPF2OV1v2kB1wTlnAV37LMj5guMQ,33911
|
685
688
|
sglang/srt/models/registry.py,sha256=bH9H0OiNV0Cn7FRBM30Oh6dxICTs9mJscdLtHYPJvxc,3783
|
686
689
|
sglang/srt/models/roberta.py,sha256=3k53V2Gbezk3jU-D03thXx8csGn5DxFK01ZQ9WFiWPI,9828
|
687
690
|
sglang/srt/models/siglip.py,sha256=DIVJRwdtpLD2QT1kVPIHw7Bn9BE40xJbkHujDNzkjgA,9892
|
@@ -711,7 +714,7 @@ sglang/srt/multimodal/processors/qwen_audio.py,sha256=ILePOlBI5v8GI962q-pZ0dV-tk
|
|
711
714
|
sglang/srt/multimodal/processors/qwen_vl.py,sha256=6maMXOwX3QICy3YNVSAF4VwxrZHCT4XH0MAAXIwIaeM,10464
|
712
715
|
sglang/srt/multimodal/processors/vila.py,sha256=Dj66JpKbSdqpLNfk-NykYjhGAGuzBeMo7lFVCkeHkAM,2142
|
713
716
|
sglang/srt/sampling/custom_logit_processor.py,sha256=AwODYVJdRkcQ8PGtJrhzKsqAgn8XZLQbAmR9fGiQzmc,1608
|
714
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
717
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=Duk4Fp99QZyOFld3i4AGwkW9FlULY86CNKv6v0_kA4w,14683
|
715
718
|
sglang/srt/sampling/sampling_params.py,sha256=HbVcCeSXgPW19MRB-v0FTG1D-zHDAY6-toxyndSl-zI,6462
|
716
719
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=mtN8grFEcaBUhl4yBHmw8NNirt_i6uKO2cDNLHOpZQE,496
|
717
720
|
sglang/srt/sampling/penaltylib/frequency_penalty.py,sha256=Loc3qjJTksNc5s-DV7QZHjgqoo5pxk7-nZzxwyhD2tQ,2144
|
@@ -719,10 +722,10 @@ sglang/srt/sampling/penaltylib/min_new_tokens.py,sha256=rdU_D7RoIcrQPhysNQEzmr4T
|
|
719
722
|
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=XM-Lm6u7gYPtMZrTIc0FR4QxNZxBH5s_Cj82umyCzYk,5721
|
720
723
|
sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=NRh10AJrrQlGJ6S-enGdRefrTrWpyqrSm-aNnyqQNQQ,2119
|
721
724
|
sglang/srt/speculative/build_eagle_tree.py,sha256=O9LJNaBflgJdWT94D-rGH1gJFJ18nst2oOD8HnA2mZ4,12859
|
722
|
-
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=
|
723
|
-
sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py,sha256=
|
724
|
-
sglang/srt/speculative/eagle_utils.py,sha256=
|
725
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
725
|
+
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=kVqMGj25jxtJlP-nTSlq_QJgoabFqKOU7-0WeUFhvw8,14509
|
726
|
+
sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py,sha256=hLjaD66K3ZE5SWsjvSiSdSexQ55F3EuSnfb4oe7rrSY,15261
|
727
|
+
sglang/srt/speculative/eagle_utils.py,sha256=x5MKA2GkecNAXE58KKhPJRy5KxCLhRkD-C_kVSZIyzc,46433
|
728
|
+
sglang/srt/speculative/eagle_worker.py,sha256=8yPQZxpBvcr--Ooo4nov3LqzkLRL-GOLJlwu91wMAmQ,38650
|
726
729
|
sglang/srt/speculative/spec_info.py,sha256=rhaKG0TzyF9XZYHEWp1jccwTBohSNsUDvxHFtAoOl18,709
|
727
730
|
sglang/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
728
731
|
sglang/test/few_shot_gsm8k.py,sha256=qnEejCyPHGEMBEiNc7T4-t9lvoo2DNN72RXw-ei_TnY,4300
|
@@ -736,7 +739,7 @@ sglang/test/simple_eval_humaneval.py,sha256=s8YuKIBkNUEOBP4XKsTMRdXGv5aRhl0oJyl-
|
|
736
739
|
sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
|
737
740
|
sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
|
738
741
|
sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
|
739
|
-
sglang/test/test_activation.py,sha256=
|
742
|
+
sglang/test/test_activation.py,sha256=bDBFl4Y30siXj7Ifl66fc2HytOrcZdHFiP2i9K8TIsY,3140
|
740
743
|
sglang/test/test_block_fp8.py,sha256=hd3GXuC_wxn0HGAN3KCs6Nxf98AHO8XaRvGsvWS-CHc,21822
|
741
744
|
sglang/test/test_block_fp8_deep_gemm_blackwell.py,sha256=Hnhq4kkyINHb4ONedkp5Kf7Xx0MjOoOBYL8J7ETvOVA,8180
|
742
745
|
sglang/test/test_block_fp8_ep.py,sha256=XGZEs7cJiLjatRZcdcav4aFIQxnQB8Vq-b3HmAsw-Ww,10859
|
@@ -755,8 +758,8 @@ sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
755
758
|
sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
|
756
759
|
sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
|
757
760
|
sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
|
758
|
-
sglang-0.4.9.
|
759
|
-
sglang-0.4.9.
|
760
|
-
sglang-0.4.9.
|
761
|
-
sglang-0.4.9.
|
762
|
-
sglang-0.4.9.
|
761
|
+
sglang-0.4.9.post4.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
762
|
+
sglang-0.4.9.post4.dist-info/METADATA,sha256=moSWmWQPcs4bmIJ3FodeQnGZEf9NdVMbJLQlXXX4EAA,27348
|
763
|
+
sglang-0.4.9.post4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
764
|
+
sglang-0.4.9.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
765
|
+
sglang-0.4.9.post4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|