sglang 0.4.8__py3-none-any.whl → 0.4.8.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/configs/model_config.py +1 -0
- sglang/srt/conversation.py +1 -0
- sglang/srt/custom_op.py +7 -1
- sglang/srt/disaggregation/base/conn.py +2 -0
- sglang/srt/disaggregation/decode.py +1 -1
- sglang/srt/disaggregation/mooncake/conn.py +289 -48
- sglang/srt/disaggregation/mooncake/transfer_engine.py +31 -1
- sglang/srt/disaggregation/nixl/conn.py +94 -46
- sglang/srt/disaggregation/prefill.py +3 -2
- sglang/srt/disaggregation/utils.py +12 -11
- sglang/srt/entrypoints/engine.py +5 -3
- sglang/srt/entrypoints/openai/protocol.py +47 -4
- sglang/srt/entrypoints/openai/serving_chat.py +52 -76
- sglang/srt/entrypoints/openai/serving_completions.py +1 -0
- sglang/srt/entrypoints/openai/serving_embedding.py +1 -0
- sglang/srt/layers/activation.py +7 -0
- sglang/srt/layers/attention/flashattention_backend.py +24 -14
- sglang/srt/layers/layernorm.py +15 -0
- sglang/srt/layers/linear.py +18 -1
- sglang/srt/layers/logits_processor.py +12 -3
- sglang/srt/layers/moe/ep_moe/layer.py +79 -12
- sglang/srt/layers/moe/ep_moe/token_dispatcher.py +19 -2
- sglang/srt/layers/moe/fused_moe_native.py +7 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +7 -2
- sglang/srt/layers/moe/fused_moe_triton/layer.py +73 -14
- sglang/srt/layers/moe/topk.py +26 -0
- sglang/srt/layers/quantization/fp8_utils.py +5 -4
- sglang/srt/layers/rotary_embedding.py +103 -11
- sglang/srt/layers/vocab_parallel_embedding.py +14 -1
- sglang/srt/managers/expert_distribution.py +21 -0
- sglang/srt/managers/io_struct.py +10 -2
- sglang/srt/managers/multimodal_processors/base_processor.py +44 -9
- sglang/srt/managers/multimodal_processors/gemma3n.py +97 -0
- sglang/srt/managers/schedule_batch.py +9 -1
- sglang/srt/managers/scheduler.py +42 -6
- sglang/srt/model_executor/cuda_graph_runner.py +1 -1
- sglang/srt/model_executor/model_runner.py +5 -2
- sglang/srt/model_loader/loader.py +45 -10
- sglang/srt/model_loader/weight_utils.py +89 -0
- sglang/srt/models/deepseek_nextn.py +7 -4
- sglang/srt/models/deepseek_v2.py +147 -4
- sglang/srt/models/gemma3n_audio.py +949 -0
- sglang/srt/models/gemma3n_causal.py +1009 -0
- sglang/srt/models/gemma3n_mm.py +511 -0
- sglang/srt/models/hunyuan.py +771 -0
- sglang/srt/server_args.py +16 -2
- sglang/srt/two_batch_overlap.py +4 -1
- sglang/srt/utils.py +71 -0
- sglang/version.py +1 -1
- {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/METADATA +1 -1
- {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/RECORD +54 -49
- {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/WHEEL +0 -0
- {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/top_level.txt +0 -0
sglang/srt/server_args.py
CHANGED
@@ -47,6 +47,7 @@ class ServerArgs:
|
|
47
47
|
tokenizer_mode: str = "auto"
|
48
48
|
skip_tokenizer_init: bool = False
|
49
49
|
load_format: str = "auto"
|
50
|
+
model_loader_extra_config: str = "{}"
|
50
51
|
trust_remote_code: bool = False
|
51
52
|
dtype: str = "auto"
|
52
53
|
kv_cache_dtype: str = "auto"
|
@@ -563,6 +564,7 @@ class ServerArgs:
|
|
563
564
|
# Model and port args
|
564
565
|
parser.add_argument(
|
565
566
|
"--model-path",
|
567
|
+
"--model",
|
566
568
|
type=str,
|
567
569
|
help="The path of the model weights. This can be a local folder or a Hugging Face repo ID.",
|
568
570
|
required=True,
|
@@ -632,6 +634,13 @@ class ServerArgs:
|
|
632
634
|
"layer before loading another to make the peak memory envelope "
|
633
635
|
"smaller.",
|
634
636
|
)
|
637
|
+
parser.add_argument(
|
638
|
+
"--model-loader-extra-config",
|
639
|
+
type=str,
|
640
|
+
help="Extra config for model loader. "
|
641
|
+
"This will be passed to the model loader corresponding to the chosen load_format.",
|
642
|
+
default=ServerArgs.model_loader_extra_config,
|
643
|
+
)
|
635
644
|
parser.add_argument(
|
636
645
|
"--trust-remote-code",
|
637
646
|
action="store_true",
|
@@ -1692,6 +1701,9 @@ class PortArgs:
|
|
1692
1701
|
# The ipc filename for rpc call between Engine and Scheduler
|
1693
1702
|
rpc_ipc_name: str
|
1694
1703
|
|
1704
|
+
# The ipc filename for Scheduler to send metrics
|
1705
|
+
metrics_ipc_name: str
|
1706
|
+
|
1695
1707
|
@staticmethod
|
1696
1708
|
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
|
1697
1709
|
port = server_args.port + random.randint(100, 1000)
|
@@ -1711,6 +1723,7 @@ class PortArgs:
|
|
1711
1723
|
detokenizer_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
1712
1724
|
nccl_port=port,
|
1713
1725
|
rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
1726
|
+
metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
1714
1727
|
)
|
1715
1728
|
else:
|
1716
1729
|
# DP attention. Use TCP + port to handle both single-node and multi-node.
|
@@ -1730,9 +1743,9 @@ class PortArgs:
|
|
1730
1743
|
port_base = int(dist_init_port) + 1
|
1731
1744
|
if dp_rank is None:
|
1732
1745
|
# TokenizerManager to DataParallelController
|
1733
|
-
scheduler_input_port = port_base +
|
1746
|
+
scheduler_input_port = port_base + 4
|
1734
1747
|
else:
|
1735
|
-
scheduler_input_port = port_base +
|
1748
|
+
scheduler_input_port = port_base + 4 + 1 + dp_rank
|
1736
1749
|
|
1737
1750
|
return PortArgs(
|
1738
1751
|
tokenizer_ipc_name=f"tcp://{dist_init_host}:{port_base}",
|
@@ -1740,6 +1753,7 @@ class PortArgs:
|
|
1740
1753
|
detokenizer_ipc_name=f"tcp://{dist_init_host}:{port_base + 1}",
|
1741
1754
|
nccl_port=port,
|
1742
1755
|
rpc_ipc_name=f"tcp://{dist_init_host}:{port_base + 2}",
|
1756
|
+
metrics_ipc_name=f"tcp://{dist_init_host}:{port_base + 3}",
|
1743
1757
|
)
|
1744
1758
|
|
1745
1759
|
|
sglang/srt/two_batch_overlap.py
CHANGED
@@ -346,7 +346,10 @@ class TboForwardBatchPreparer:
|
|
346
346
|
)
|
347
347
|
|
348
348
|
# TODO improve, e.g. unify w/ `init_raw`
|
349
|
-
if
|
349
|
+
if (
|
350
|
+
global_server_args_dict["moe_dense_tp_size"] == 1
|
351
|
+
and batch.gathered_buffer is not None
|
352
|
+
):
|
350
353
|
sum_len = end_token_index - start_token_index
|
351
354
|
gathered_buffer = torch.zeros(
|
352
355
|
(sum_len, batch.gathered_buffer.shape[1]),
|
sglang/srt/utils.py
CHANGED
@@ -2457,6 +2457,77 @@ def cpu_has_amx_support():
|
|
2457
2457
|
return torch._C._cpu._is_amx_tile_supported() and is_intel_amx_backend_available
|
2458
2458
|
|
2459
2459
|
|
2460
|
+
def prepack_weight_if_needed(weight):
|
2461
|
+
if weight.device != torch.device("cpu"):
|
2462
|
+
return weight
|
2463
|
+
if not cpu_has_amx_support():
|
2464
|
+
return weight
|
2465
|
+
|
2466
|
+
return torch.ops.sgl_kernel.convert_weight_packed(weight)
|
2467
|
+
|
2468
|
+
|
2469
|
+
# TODO: currently gemm kernel has the below requirements:
|
2470
|
+
# OC % TILE_N == 0, where TILE_N = 16
|
2471
|
+
# IC % TILE_K == 0, where TILE_K = 32
|
2472
|
+
def dim_is_supported(weight):
|
2473
|
+
return weight.size(0) % 16 == 0 and weight.size(1) % 32 == 0
|
2474
|
+
|
2475
|
+
|
2476
|
+
def _process_weight_after_loading(module, weight_names, transpose_dims=None) -> None:
|
2477
|
+
# Pack weight for get better performance on CPU
|
2478
|
+
devices = {getattr(module, weight_name).device for weight_name in weight_names}
|
2479
|
+
assert len(devices) == 1, f"Expects all weights to be on the same device"
|
2480
|
+
device = devices.pop()
|
2481
|
+
|
2482
|
+
if transpose_dims:
|
2483
|
+
assert len(weight_names) == len(
|
2484
|
+
transpose_dims
|
2485
|
+
), "len(weight_names) should be equal to len(transpose_dims)"
|
2486
|
+
|
2487
|
+
for i, weight_name in enumerate(weight_names):
|
2488
|
+
weight_tensor = getattr(module, weight_name)
|
2489
|
+
|
2490
|
+
# We don't pack weight or use intel amx backend if any weight of this module has unsupported dim.
|
2491
|
+
if not dim_is_supported(weight_tensor):
|
2492
|
+
logger.warning(
|
2493
|
+
f"Expects weight.size(0) % 16 == 0 and weight.size(1) % 32 == 0 "
|
2494
|
+
f"but {weight_tensor.size(0)=} and {weight_tensor.size(1)=} in {module}. "
|
2495
|
+
f"{module} won't use intel amx backend."
|
2496
|
+
)
|
2497
|
+
module.use_intel_amx_backend = False
|
2498
|
+
return
|
2499
|
+
|
2500
|
+
if transpose_dims and transpose_dims[i]:
|
2501
|
+
weight_tensor = weight_tensor.transpose(*transpose_dims[i])
|
2502
|
+
|
2503
|
+
packed_weight = torch.nn.Parameter(
|
2504
|
+
prepack_weight_if_needed(weight_tensor),
|
2505
|
+
requires_grad=False,
|
2506
|
+
)
|
2507
|
+
packed_weight.__dict__ = weight_tensor.__dict__
|
2508
|
+
setattr(module, weight_name, packed_weight)
|
2509
|
+
|
2510
|
+
module.use_intel_amx_backend = (
|
2511
|
+
device == torch.device("cpu") and cpu_has_amx_support()
|
2512
|
+
)
|
2513
|
+
|
2514
|
+
if (
|
2515
|
+
module.use_intel_amx_backend
|
2516
|
+
and hasattr(module, "bias")
|
2517
|
+
and module.bias is not None
|
2518
|
+
):
|
2519
|
+
module.bias = torch.nn.Parameter(module.bias.data.float(), requires_grad=False)
|
2520
|
+
|
2521
|
+
|
2522
|
+
class PackWeightMethod:
|
2523
|
+
def __init__(self, weight_names, transpose_dims=None):
|
2524
|
+
self.weight_names = weight_names
|
2525
|
+
self.transpose_dims = transpose_dims
|
2526
|
+
|
2527
|
+
def process_weights_after_loading(self, module) -> None:
|
2528
|
+
_process_weight_after_loading(module, self.weight_names, self.transpose_dims)
|
2529
|
+
|
2530
|
+
|
2460
2531
|
class LazyValue:
|
2461
2532
|
def __init__(self, creator: Callable):
|
2462
2533
|
self._creator = creator
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.8"
|
1
|
+
__version__ = "0.4.8.post1"
|
@@ -11,7 +11,7 @@ sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
|
11
11
|
sglang/math_utils.py,sha256=QYtbaIA76P33ojcOrL32VR6yXWv-Od_3WCZNO4kQ-YQ,177
|
12
12
|
sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
|
13
13
|
sglang/utils.py,sha256=VH6zrnkjzcR3DE__WfVph6wswJ4JuzoQD47VmbZ38eI,16435
|
14
|
-
sglang/version.py,sha256=
|
14
|
+
sglang/version.py,sha256=E6iPG1WE6yyF-fToZYo4ZM-iwegVLaXOKuEXlAJ_kvg,28
|
15
15
|
sglang/eval/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
16
16
|
sglang/eval/loogle_eval.py,sha256=pRPVA4fxGmT3_oXvXnlNE-UlPrcQGLBJF-OSE9YWJXM,4336
|
17
17
|
sglang/lang/chat_template.py,sha256=HKlx7snSWFED8GKF5ex79sQrPWFw5TSXQM0_LsiD9Bc,20552
|
@@ -31,8 +31,8 @@ sglang/srt/_custom_ops.py,sha256=0lJRMTKTjoxJPh1qQnnMY02Z3SyBDi7LJI34IBLQsgQ,446
|
|
31
31
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
32
32
|
sglang/srt/code_completion_parser.py,sha256=KFa95OU0TeVjJkOqIgS4xV3kaJ5dFWOmAAgISyc1oEc,3803
|
33
33
|
sglang/srt/constants.py,sha256=0i-tEwG2BSYNDy96MxnGHV5HnBELkYcnsVGsE-R18o0,93
|
34
|
-
sglang/srt/conversation.py,sha256=
|
35
|
-
sglang/srt/custom_op.py,sha256=
|
34
|
+
sglang/srt/conversation.py,sha256=xkV_OWdotT_Tf1QzpxLL-oZ-THAKKtJj49Q9B_L-WT8,37144
|
35
|
+
sglang/srt/custom_op.py,sha256=87r2PIgiGLREsIZQ8qsUD-zgI66_54y9GrE0buXzoCI,3076
|
36
36
|
sglang/srt/debug_utils.py,sha256=slaFOY4BYDBFatkfu8FZlzai-u4LFS-5GUzdr-t50zE,2241
|
37
37
|
sglang/srt/hf_transformers_utils.py,sha256=S1ZF4aFKTocenXvj9ti6M-buiCBus4FrP1AdwFz_Wbw,11943
|
38
38
|
sglang/srt/jinja_template_utils.py,sha256=Jc6Vl-lYtUyMBSWWQGIMpJQ1O-ceU9c8DbOi_bN-oVk,6719
|
@@ -42,10 +42,10 @@ sglang/srt/operations.py,sha256=ddQ8KO63L73OciaR8MZ9h2h83gKVY4-WuWgeEGowPJA,5346
|
|
42
42
|
sglang/srt/operations_strategy.py,sha256=6DDLEdmkLrFDTXWZoFO0q9MZjEThvfyvoO-LbQsNpPQ,7023
|
43
43
|
sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
|
44
44
|
sglang/srt/reasoning_parser.py,sha256=vf0kWBM4IXwbuzGBIOMdiXdn9gavqkHb1QIaTbkU7vc,6742
|
45
|
-
sglang/srt/server_args.py,sha256=
|
45
|
+
sglang/srt/server_args.py,sha256=MVp3qB7PmJlw82hY8dQPoVsJZ26RpmiBqLDgzq1vjOc,73495
|
46
46
|
sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
|
47
|
-
sglang/srt/two_batch_overlap.py,sha256=
|
48
|
-
sglang/srt/utils.py,sha256=
|
47
|
+
sglang/srt/two_batch_overlap.py,sha256=M5Ca3LV4_j7g3o78WQDjWzgsSCpY4E75SIKnwoa7tQg,21922
|
48
|
+
sglang/srt/utils.py,sha256=Xg7K6VaTFDrEdW8b07AVREPHdx9M4vJuOI3aIct82JM,80640
|
49
49
|
sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
|
50
50
|
sglang/srt/configs/__init__.py,sha256=8EcVRP95epZ49DxBa6LgKWt7eO3Qe7Hrr3V1c6HkMnY,553
|
51
51
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
@@ -58,7 +58,7 @@ sglang/srt/configs/janus_pro.py,sha256=Rrb7kQsNaUP-TiZrjNk8Lr1momFrql8ScEunnrH0_
|
|
58
58
|
sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew,1358
|
59
59
|
sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
|
60
60
|
sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
|
61
|
-
sglang/srt/configs/model_config.py,sha256=
|
61
|
+
sglang/srt/configs/model_config.py,sha256=ESZEDSvxVfRH4CiZoy8JVpapJU5yPTYCAxZB38AJ7b0,25245
|
62
62
|
sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
|
63
63
|
sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
|
64
64
|
sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
|
@@ -75,25 +75,25 @@ sglang/srt/constrained/outlines_jump_forward.py,sha256=Gyubp-FVetxd6wP4FA_kD6cCX
|
|
75
75
|
sglang/srt/constrained/reasoner_grammar_backend.py,sha256=YFxWuOTTo4e6cGhxnaBwuwli6f8FTUJtszib8dBq_8I,3207
|
76
76
|
sglang/srt/constrained/xgrammar_backend.py,sha256=u9Ao-XX22UzALkATEAR_-0EoyBTxMTJV590qd9LAUHM,7906
|
77
77
|
sglang/srt/constrained/triton_ops/bitmask_ops.py,sha256=WjTen9iuuFWLzkE1mAHQZB9_7aIy5QH8Wjf-lB-Fams,4614
|
78
|
-
sglang/srt/disaggregation/decode.py,sha256=
|
78
|
+
sglang/srt/disaggregation/decode.py,sha256=lqpmpQYMXzDCM19R4Pe4RF8q92UvBqKUITauFGDylEs,33335
|
79
79
|
sglang/srt/disaggregation/decode_schedule_batch_mixin.py,sha256=8UdNaj8KKMLV5Cydhw8gnHM-zRrnKM_AAd3Qc-SRfWg,5648
|
80
80
|
sglang/srt/disaggregation/kv_events.py,sha256=yFbtwOeblcCmOXTjg00TopxnyjkmCBQIVz46KB8jetY,13555
|
81
81
|
sglang/srt/disaggregation/launch_lb.py,sha256=mcbAztN4gnHevw_T5R2_nWsymsDEY9vHkm7OJ1vr6cc,4211
|
82
82
|
sglang/srt/disaggregation/mini_lb.py,sha256=BBeIdeZZxi7Ra5_hukHHX3TNdyfZ4tP1GBUxC-qrv_g,14401
|
83
|
-
sglang/srt/disaggregation/prefill.py,sha256=
|
84
|
-
sglang/srt/disaggregation/utils.py,sha256=
|
83
|
+
sglang/srt/disaggregation/prefill.py,sha256=jWOqYXBBiNuOC78a028FN6mlklzEki4MjRdTA6UE5zU,23518
|
84
|
+
sglang/srt/disaggregation/utils.py,sha256=LBiRVbJ4jjYJXn0JL4VMTmGwAMAIGqJ_zaqPLjrmfkU,11339
|
85
85
|
sglang/srt/disaggregation/base/__init__.py,sha256=4VwUv0aWxwmVL1049XK82aLTNxmt0WY5RPy9li-wyVk,160
|
86
|
-
sglang/srt/disaggregation/base/conn.py,sha256=
|
86
|
+
sglang/srt/disaggregation/base/conn.py,sha256=CPDAoAkYaFtVPLa1QROfwipSVe7MH6omzIBHzo8TSYk,2811
|
87
87
|
sglang/srt/disaggregation/common/__init__.py,sha256=7yl-EGLMVKRpBUaGF_7lwAsw2J_mqpRZV0238VGxD9o,126
|
88
88
|
sglang/srt/disaggregation/common/conn.py,sha256=CZR1lMCE_mpSkvjc6BBmSr1SbHY1uZuxjpanazD-YXc,16071
|
89
89
|
sglang/srt/disaggregation/common/utils.py,sha256=SxRhAWisNK8seGhb5BXBJ5u53DF7yeKVPMWPcB5ywbE,1194
|
90
90
|
sglang/srt/disaggregation/fake/__init__.py,sha256=jJGWdXwaQiGIoR6atKqkQfkJmVyQ09l55VUN2WjwaeY,77
|
91
91
|
sglang/srt/disaggregation/fake/conn.py,sha256=oD1DArn1yDFZCu-X6p93uSLlAXEkt9lYxERICMznxGw,2286
|
92
92
|
sglang/srt/disaggregation/mooncake/__init__.py,sha256=0TgqkAdQI1YynbHY6c0QISvVoOSk-0SwCIq5rjPSmgE,156
|
93
|
-
sglang/srt/disaggregation/mooncake/conn.py,sha256=
|
94
|
-
sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=
|
93
|
+
sglang/srt/disaggregation/mooncake/conn.py,sha256=MATJkiS_5Vh8fc8Gx6S6cI3zCYDbgKXVEAa1xkUsCKA,58879
|
94
|
+
sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=JYB9T-EPdJNfv4I_sVpmMOZCOJ14itD97ws6tTvj240,4281
|
95
95
|
sglang/srt/disaggregation/nixl/__init__.py,sha256=qODVPIGWUXKXq4zsRIcMYoAoAeg6nBIN9vdQOlVMANE,136
|
96
|
-
sglang/srt/disaggregation/nixl/conn.py,sha256=
|
96
|
+
sglang/srt/disaggregation/nixl/conn.py,sha256=G2l-FuXUvtsEo3Z24vyQ8iTcFjqG-sise4ItAtiny30,20327
|
97
97
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
98
98
|
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
99
99
|
sglang/srt/distributed/parallel_state.py,sha256=0_G1TtBOFMYDix5rfuEHYBMpy9A-OuPs9yFd5nCiZ8Q,53927
|
@@ -109,15 +109,15 @@ sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6
|
|
109
109
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=d8mykYmXM1lfbPm8GNtqCF0Un_pdXYjbNmsgoVFyyow,20874
|
110
110
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
111
111
|
sglang/srt/entrypoints/EngineBase.py,sha256=FCwqQMJE_8CCCMThAXUZaKafsHvh2BaQ31J-7_ormwA,2310
|
112
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
112
|
+
sglang/srt/entrypoints/engine.py,sha256=uSlN1vorCBCYk8n8AnCpbnCu-4p9JjukLyXJZZzTrGU,30283
|
113
113
|
sglang/srt/entrypoints/http_server.py,sha256=Fyb3z9OKXF9h_-duhWsHLjZayE3uoZ2dHpeV757bxXc,34745
|
114
114
|
sglang/srt/entrypoints/http_server_engine.py,sha256=ncN45ti9mawSOimPSedI6zugfoMhMQOYh4tmdfC9LcE,4936
|
115
115
|
sglang/srt/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
116
|
-
sglang/srt/entrypoints/openai/protocol.py,sha256=
|
116
|
+
sglang/srt/entrypoints/openai/protocol.py,sha256=QqC3XeJ0RxlFBsPuobjtf6c7iRkEOC8Td4E_GvjKwxM,18759
|
117
117
|
sglang/srt/entrypoints/openai/serving_base.py,sha256=5NJ2S_6B2NFSwn4nLp6eaeJ5iC3IcQzMEY9lW_gPcdA,5246
|
118
|
-
sglang/srt/entrypoints/openai/serving_chat.py,sha256=
|
119
|
-
sglang/srt/entrypoints/openai/serving_completions.py,sha256=
|
120
|
-
sglang/srt/entrypoints/openai/serving_embedding.py,sha256=
|
118
|
+
sglang/srt/entrypoints/openai/serving_chat.py,sha256=tWKWjspTpNnuk-Aqfl_S6OmpZe6gz5XxmSGEwfIvCsY,35935
|
119
|
+
sglang/srt/entrypoints/openai/serving_completions.py,sha256=emIutVmnJgOgGRuAqdB80-kVHUfQbE67n1VS-76o4QY,16690
|
120
|
+
sglang/srt/entrypoints/openai/serving_embedding.py,sha256=hqPt2ELpT1yQ8sBXo801aLJ3sExoPXs_K5ZQtC2vpAs,6274
|
121
121
|
sglang/srt/entrypoints/openai/serving_rerank.py,sha256=8n6y5kC6UhRpWrDtzH0eIp0dznW12FT60MLCR4bvD2U,3333
|
122
122
|
sglang/srt/entrypoints/openai/serving_score.py,sha256=ebAts-m6Pq-LTgFKwggkywBUrAgUSppHHVEzgwYHUzo,1955
|
123
123
|
sglang/srt/entrypoints/openai/usage_processor.py,sha256=9LTB5rqdRuMKyZrIXiUBuF_WKaSg9X45YdzERDxbtCY,2746
|
@@ -134,27 +134,27 @@ sglang/srt/function_call/mistral_detector.py,sha256=xNuVl2vDXVYbXyiXLkJZ9VM6njcw
|
|
134
134
|
sglang/srt/function_call/pythonic_detector.py,sha256=rtXSflE4w993a4OqphWG-WQzvwRz0v3dfgHOnte1fpI,8731
|
135
135
|
sglang/srt/function_call/qwen25_detector.py,sha256=9JfZem_5nw91Og2biwq8eIpUQjy_3kFz4TQI8Lc3Vow,4882
|
136
136
|
sglang/srt/function_call/utils.py,sha256=__ImDF2kNyoLWsYO5RYoryvy1mmgEjnjXlCvLv-uLCM,1695
|
137
|
-
sglang/srt/layers/activation.py,sha256=
|
137
|
+
sglang/srt/layers/activation.py,sha256=w8gr84LdpbBxmSjK9cfqFGXiGxALOwkBzqHChdZ6z7M,7327
|
138
138
|
sglang/srt/layers/communicator.py,sha256=WbefauUNbwfAtaBySi-rqqXkoFZZpxdOJURLBHpF5qA,18597
|
139
139
|
sglang/srt/layers/dp_attention.py,sha256=e-AgUTa70NsNgsw4hB1a-B_yDv7T8PyXW3jqR8sIKgY,9807
|
140
140
|
sglang/srt/layers/elementwise.py,sha256=XCrR2i-9dP-H6jQo2zUuquwZrsl_wEQqj5Wxk6WUf7o,13987
|
141
|
-
sglang/srt/layers/layernorm.py,sha256=
|
142
|
-
sglang/srt/layers/linear.py,sha256=
|
143
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
141
|
+
sglang/srt/layers/layernorm.py,sha256=x6VDTFxvcJMx2txpBW4Y7G1sWfKYsksNpf-L4_ySSDo,7660
|
142
|
+
sglang/srt/layers/linear.py,sha256=ToLkotx239ze3rwizk3r05Gg7_LJk39hAdjgqWwYusE,52462
|
143
|
+
sglang/srt/layers/logits_processor.py,sha256=TTR7LgSwthaH6Qfmcda2Ampibtt-JcwpLSs6-OJI_sQ,25604
|
144
144
|
sglang/srt/layers/multimodal.py,sha256=YVR69WW-2aGDcZHT8IVJ6F_LRM7wraZr8VjrPDXqDmA,2104
|
145
145
|
sglang/srt/layers/parameter.py,sha256=zqWyEzpWzP4NNTjq3G9khq6XofgpcmJqQLg6Vd4WyWE,15084
|
146
146
|
sglang/srt/layers/pooler.py,sha256=uZ6WX1FLMEafZwusyZdm6KuVlIwSjbKrdwk2qzgqNGk,3812
|
147
147
|
sglang/srt/layers/radix_attention.py,sha256=IlqRB4bk06FOH05_7zB8lik0xLpys7jFooLeCwdO0j8,3437
|
148
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
148
|
+
sglang/srt/layers/rotary_embedding.py,sha256=iOxdJEw9jhxYih7WpNkzlGybzewNu9GgqU8dLM-TyD0,52209
|
149
149
|
sglang/srt/layers/sampler.py,sha256=xNds1migup2s6b9_pS6ljkJUkvNtv7nmTGeIdOzoQ6w,11182
|
150
150
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
151
151
|
sglang/srt/layers/utils.py,sha256=IWGg1Hb7c33Z3LHRPVJyUAzp3BnSid23ZWXAmJ_Jvp8,1204
|
152
|
-
sglang/srt/layers/vocab_parallel_embedding.py,sha256=
|
152
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=65N0e4PlOwoTRZC9QpC2G83Crn-OI4rY9wZTRnOVNvg,23166
|
153
153
|
sglang/srt/layers/attention/aiter_backend.py,sha256=7sEUgViw-xl3yok91yyOD9gTi8lQmME0g0ZiKVTCcyI,32851
|
154
154
|
sglang/srt/layers/attention/base_attn_backend.py,sha256=KXVcCguwXh-PSrY9Y2aUrlXXUhWdbVxqVEF2_xIMvm4,3466
|
155
155
|
sglang/srt/layers/attention/cutlass_mla_backend.py,sha256=SIR7sKCCegwzahSz82I3gsDyN5TkKoa4yG4-pBQWBi4,9813
|
156
156
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
|
157
|
-
sglang/srt/layers/attention/flashattention_backend.py,sha256=
|
157
|
+
sglang/srt/layers/attention/flashattention_backend.py,sha256=j4rnX5IARna-pZmVnSR9kjf_yINGnkKO6itTLwkx82k,93278
|
158
158
|
sglang/srt/layers/attention/flashinfer_backend.py,sha256=Ug4SkZnuHjBBwPOj2TfLlg0eU_GoZvKjhY4oYRU_qqU,49666
|
159
159
|
sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=NirRlNFxD0S2EncvvmrerIxNfuTnYpvemZV3MR5_FnU,34040
|
160
160
|
sglang/srt/layers/attention/flashmla_backend.py,sha256=5iSic5ho-lkXNas9mR3uLbXbEl-do31gc8gjR7-a79k,20711
|
@@ -173,16 +173,16 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=waZsmpKIp8rTg
|
|
173
173
|
sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
|
174
174
|
sglang/srt/layers/moe/cutlass_moe.py,sha256=--bNTA2BGbHeULb_XXDoRUyWWbE-doHo5K5k1T0N0WA,14323
|
175
175
|
sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
|
176
|
-
sglang/srt/layers/moe/fused_moe_native.py,sha256=
|
176
|
+
sglang/srt/layers/moe/fused_moe_native.py,sha256=bW3KWxxz9rxKMUQqfmAtF-7ptTODA1pwLydE05ABDJE,5030
|
177
177
|
sglang/srt/layers/moe/router.py,sha256=5Aeqoix_AS4uymb665OJE904wVSBkQeFdZP4e7KKPvg,10530
|
178
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
178
|
+
sglang/srt/layers/moe/topk.py,sha256=_hIyTURqx6Id1C3NQmHVuPxivMN0ywmDoyhFvtgHIZY,18624
|
179
179
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
180
180
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=xdYak2dkrUJjmUigKJ-GbWfdf-tXlUKbvtJgxVekbMA,40130
|
181
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
182
|
-
sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=
|
181
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=U-R2mffzW_snuAxvX1AfN0xgcbt-w9fNFUidd2Hi4eQ,55783
|
182
|
+
sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=IR5RtdL9aIG04QaeySsq4Oy-S8obivBBZKbFfPCGpXc,24077
|
183
183
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
184
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
185
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
184
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=gM_nctg15Wc06pxiMq0IRE0QBCfNyebSsWwm5zdM_Uk,63225
|
185
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=AAEb9pvd3bxiuvIKV2FZboWNvffccYmhF9R09SQSRlo,32038
|
186
186
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
187
187
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
188
188
|
"sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -351,7 +351,7 @@ sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScL
|
|
351
351
|
sglang/srt/layers/quantization/blockwise_int8.py,sha256=vWyPZsRLhdKtSmjvlT5fsowBK_nEebYbDAUh2yqseGw,15285
|
352
352
|
sglang/srt/layers/quantization/fp8.py,sha256=Ne3K177lBdWI8TXyJs8qSaH67KaAv2j4LrZeHyqyH_8,44678
|
353
353
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=pUda_glnAprnFIj3VUgCUYMKb2-uK3UOC3yPahgRMBQ,34743
|
354
|
-
sglang/srt/layers/quantization/fp8_utils.py,sha256=
|
354
|
+
sglang/srt/layers/quantization/fp8_utils.py,sha256=QYHx_OLXFxFCmSMgoKmbJ3Vgl4mVEcXykdnhHO7tU0g,25650
|
355
355
|
sglang/srt/layers/quantization/gptq.py,sha256=d1frUjvXmZfQKkcMQY5t0BA4sXWHE9Jze24qxniptJE,26719
|
356
356
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRC9FOn9exNvK4QHbUeBj3Hhv32VcyGphapFPt5b84,12625
|
357
357
|
sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
|
@@ -545,15 +545,15 @@ sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMa
|
|
545
545
|
sglang/srt/managers/data_parallel_controller.py,sha256=PZ-wOcAFn3PQqMB6I2vjIsFmplf0nlNl5hqTDKTHTG8,12112
|
546
546
|
sglang/srt/managers/detokenizer_manager.py,sha256=SpLxTsSPKBZfD-ZMhJ5zpPPGuUb8PmcYgFSL9CsurU4,10696
|
547
547
|
sglang/srt/managers/eplb_manager.py,sha256=YaxnvD1-wMV7BhtF9AxoNRVsJUdlQzFF7N-JDP43ojE,3411
|
548
|
-
sglang/srt/managers/expert_distribution.py,sha256=
|
548
|
+
sglang/srt/managers/expert_distribution.py,sha256=TzOSO7xFJ1VaxvbC2wqHq4l8UtWLX8K0rBmu5g_V2rU,31562
|
549
549
|
sglang/srt/managers/expert_location.py,sha256=ZSsH17k5bAgbE1wuvpGaHGueiyhfPrgQakBFPu9jswo,16669
|
550
550
|
sglang/srt/managers/expert_location_dispatch.py,sha256=U6-XLZ77RK0oy_JUVug2q-2LJjwoYX-js0_zhBNMXuM,4148
|
551
|
-
sglang/srt/managers/io_struct.py,sha256=
|
551
|
+
sglang/srt/managers/io_struct.py,sha256=gyP8JGyX4DSPYs4_0LIKUs4fj7tonrDhnreiSajgric,34022
|
552
552
|
sglang/srt/managers/mm_utils.py,sha256=mA9W4xZBnXfs-4ZeALvhdpGAYxrJGfOxBWyoHVrt44Q,26518
|
553
553
|
sglang/srt/managers/multimodal_processor.py,sha256=XlRYvNhF6XOssreRX9DZPhLSpps_VE62gSKw3EGdNPo,2088
|
554
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
554
|
+
sglang/srt/managers/schedule_batch.py,sha256=_Bt9hGWtiY3Dy0_GNijQVB9TT1b1V2xFOYK_M2aRvuo,73146
|
555
555
|
sglang/srt/managers/schedule_policy.py,sha256=0T8URzQmLvEmG-42-SFBBl9WnsOSwYO8-_CcBpuD38M,20474
|
556
|
-
sglang/srt/managers/scheduler.py,sha256=
|
556
|
+
sglang/srt/managers/scheduler.py,sha256=nejUktfqa3Qwf6TrN4CUVTYLELNwfUGFpExw-a8WFFw,110046
|
557
557
|
sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=aaj0z1PD3fGIOPoTtWwqoKsii0yQCR7txVFXMPECveQ,29879
|
558
558
|
sglang/srt/managers/session_controller.py,sha256=Lh1kruMcKqR7WVWYJRZbYgowtsssVlP7_paIVXLLIcE,5756
|
559
559
|
sglang/srt/managers/template_manager.py,sha256=RrwRA2oqId_PMQ98qJQGwIxMroOxiorl2sGC9ARou_0,8543
|
@@ -564,10 +564,11 @@ sglang/srt/managers/utils.py,sha256=9_VGE48EK0PXVJ26aYvbRJ6n7gIZALvCcf6uZCccCgM,
|
|
564
564
|
sglang/srt/managers/eplb_algorithms/__init__.py,sha256=wVUv2ZhhC-_VbLaStpk3vulzhqExwfHGZJQqoohs-Fw,1963
|
565
565
|
sglang/srt/managers/eplb_algorithms/deepseek.py,sha256=mMZT7zAAArccdRS0xXxifvMb3qn9enSt426uUTKeiq4,8340
|
566
566
|
sglang/srt/managers/eplb_algorithms/deepseek_vec.py,sha256=Vzy5Iarua1VgVHzjBNZaVV_vt1LY1BLtJz7PmzB701k,10654
|
567
|
-
sglang/srt/managers/multimodal_processors/base_processor.py,sha256=
|
567
|
+
sglang/srt/managers/multimodal_processors/base_processor.py,sha256=1fl0eW24ju9FiKflieYEo2mooYFayWGVtamxhAtcWJM,23348
|
568
568
|
sglang/srt/managers/multimodal_processors/clip.py,sha256=lRc2mcuDbAhZVf-0EfkO81pqDiol9zLvTpDqtPIBQ2k,1525
|
569
569
|
sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=CMfhhdq7u6GzT8ZENo7ByClvQEx-HKaTGVgdYM1vMNw,3460
|
570
570
|
sglang/srt/managers/multimodal_processors/gemma3.py,sha256=oBHXlbwto_84ZkjkW2A7F3Z7kNuDf039uDH4HVXKE1s,2290
|
571
|
+
sglang/srt/managers/multimodal_processors/gemma3n.py,sha256=UjAHeX4a2ZyPccCV_O9isxm61J-w5dglfhYO2IUQkyo,3578
|
571
572
|
sglang/srt/managers/multimodal_processors/internvl.py,sha256=ASv3MQ0Ju6oZG7UceS5ziy4rL2d8Xf1_LbIFmEAuz2E,9512
|
572
573
|
sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=nSZYKLoCZtv7sQIM21KCt4jpnzVfcsF84m9CFPWwR7s,2058
|
573
574
|
sglang/srt/managers/multimodal_processors/kimi_vl.py,sha256=8DER6QFDrmD0sZMjlAffY4z3jtBrrIYoU8ogpZIKNio,1868
|
@@ -590,14 +591,14 @@ sglang/srt/mem_cache/multimodal_cache.py,sha256=Q-lYcI-3HoLu0WJhE2F_An8g9mkZ8LwM
|
|
590
591
|
sglang/srt/mem_cache/radix_cache.py,sha256=ojr9_bUwnPocmpbGZXz8JKac4dS-PrfNYk8UqF4Gvi8,17936
|
591
592
|
sglang/srt/metrics/collector.py,sha256=C9QEJDOEdOPBwy2IJwFS3R6VbGzVzGs2xakKCCPvQDk,19903
|
592
593
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
593
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
594
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=MyZg9hw1dGcjIE5canm5TplUhFptIXt9FIKpDgXWjTQ,30450
|
594
595
|
sglang/srt/model_executor/expert_location_updater.py,sha256=HWLY5lJAWefy2tobWJKlHs3qlBSCS57EwSMdfUuPFc4,20585
|
595
596
|
sglang/srt/model_executor/forward_batch_info.py,sha256=ueHsjmGm52YqVK-8f-TRYpERCCdeSBuc2yaqxD9pWkQ,29268
|
596
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
597
|
+
sglang/srt/model_executor/model_runner.py,sha256=9EIHTDKRtXNrzURQYcC_rBjjWUPGnKHKYCV_GiW70zM,54490
|
597
598
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
598
|
-
sglang/srt/model_loader/loader.py,sha256
|
599
|
+
sglang/srt/model_loader/loader.py,sha256=-Pr8-YMaF3jIGXfhnEMM1WOloeWcPQnaUgSgWN6aAGI,57626
|
599
600
|
sglang/srt/model_loader/utils.py,sha256=zSZBPA9ErPmkf-HfCxJjhmiFwYueB15KUg7NFspLvPY,4454
|
600
|
-
sglang/srt/model_loader/weight_utils.py,sha256=
|
601
|
+
sglang/srt/model_loader/weight_utils.py,sha256=jkd4R6wroef5A3xpVe6rst5xosVitxndnNwH6cMp_zo,35668
|
601
602
|
sglang/srt/models/baichuan.py,sha256=HbvlErnkCSK4pRQYCSDxMcrn-1DQyfiNoeDcnRrJas8,15807
|
602
603
|
sglang/srt/models/bert.py,sha256=ODJe8YfNRP-hHsomFWk4_QpcuiSsNfjzGf256EDS0Pc,15802
|
603
604
|
sglang/srt/models/chatglm.py,sha256=cajLN9caBl09e0TwOFkiTTKDqwlbmHo_yS-NCjdeQW8,13957
|
@@ -606,8 +607,8 @@ sglang/srt/models/commandr.py,sha256=5Y_b3K0QY7D37nFGkyiGgY38RleRui_GJUYcHSuHUZo
|
|
606
607
|
sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,15598
|
607
608
|
sglang/srt/models/deepseek.py,sha256=ZnN02HdgXCB23Vno5V9UMUoOxH5HC82vNTwsVulUJ-o,17206
|
608
609
|
sglang/srt/models/deepseek_janus_pro.py,sha256=OeeI7vZbE4HGpxa8CwT6-Lbfs7J7WMQ3oBNpVJQpv3w,70450
|
609
|
-
sglang/srt/models/deepseek_nextn.py,sha256=
|
610
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
610
|
+
sglang/srt/models/deepseek_nextn.py,sha256=FMeM-5oaUWhonnP7tP8oGvFympGoRkv8h9AfFocI-T4,5941
|
611
|
+
sglang/srt/models/deepseek_v2.py,sha256=KvRQntryvAu9IEY8OHrLhqIGND3qC-EKM5dr5sYhIXA,95084
|
611
612
|
sglang/srt/models/deepseek_vl2.py,sha256=j8BdxZsMjm6lPdbDipEIKhVIVywCP1Vl1Kl46BZ5_0Y,13147
|
612
613
|
sglang/srt/models/exaone.py,sha256=TpO-rtCpEZ8Ua7hGFnS8l2oAYhY0Pij50grc9WQ2mvc,13576
|
613
614
|
sglang/srt/models/gemma.py,sha256=4cdrPISg1VKnsuI-QPTpYvet4BrX8BMKvCIN82iLskw,12641
|
@@ -615,11 +616,15 @@ sglang/srt/models/gemma2.py,sha256=kqtwdo93GWKm2iBN29RoIRH2ggRm-K_80LM5btgfBLo,1
|
|
615
616
|
sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
|
616
617
|
sglang/srt/models/gemma3_causal.py,sha256=Vm605KeF7CBXbtxzOWF-v0TLbl0G12CLF-bEgTV9T0E,25197
|
617
618
|
sglang/srt/models/gemma3_mm.py,sha256=b9YmkipsfVb5IXVeIVwW_PviXiCkRULhEsqNOvPoDxU,17221
|
619
|
+
sglang/srt/models/gemma3n_audio.py,sha256=isgKfjA5UieYawxU6medL2ssXlzYPqAbagDBnLcemC0,36405
|
620
|
+
sglang/srt/models/gemma3n_causal.py,sha256=nPGjcEOoLP-dhl7l94CB0XSC0g33ljFuIT_QeXb4BBE,36271
|
621
|
+
sglang/srt/models/gemma3n_mm.py,sha256=jFNhWCdPd4eChD0OlfSVtJfuufJr6qTj04c-oEXorQo,19273
|
618
622
|
sglang/srt/models/glm4.py,sha256=2VQzUqFkQTy_2nfkxP9SF6_9kKLTZUExGRjge7r99Es,11265
|
619
623
|
sglang/srt/models/gpt2.py,sha256=kclhxEs8oJk1KCyhmAqo7rZqecVGGHYkc-a1WZi3aIk,9841
|
620
624
|
sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtYhl4,10307
|
621
625
|
sglang/srt/models/granite.py,sha256=5WOJyNYAlt5RNHSexNfPNihhSxIMd7wPzju1cTixKig,20852
|
622
626
|
sglang/srt/models/grok.py,sha256=vESZeGS4adI_JAerXIkCcTm15-CNiGeS7VHc36C6w1A,28033
|
627
|
+
sglang/srt/models/hunyuan.py,sha256=dD9kWKTwh1DLa7b-laccQvh2PVVgAHx6487UT8VXhao,28994
|
623
628
|
sglang/srt/models/idefics2.py,sha256=U3khd3hbdawJeRNXsxmaKHdssOCT5TPOZ1D-2_zHoQo,12079
|
624
629
|
sglang/srt/models/internlm2.py,sha256=F_iNY1gUqzAjAuUatcE47gnrcoTh5_08PY2Rw9tKr9M,13150
|
625
630
|
sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
|
@@ -714,8 +719,8 @@ sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
714
719
|
sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
|
715
720
|
sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
|
716
721
|
sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
|
717
|
-
sglang-0.4.8.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
718
|
-
sglang-0.4.8.dist-info/METADATA,sha256=
|
719
|
-
sglang-0.4.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
720
|
-
sglang-0.4.8.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
721
|
-
sglang-0.4.8.dist-info/RECORD,,
|
722
|
+
sglang-0.4.8.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
723
|
+
sglang-0.4.8.post1.dist-info/METADATA,sha256=isDKzDsTthshFCkEmL3isGMcgn1uBG3M2mvGolPE_xc,26609
|
724
|
+
sglang-0.4.8.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
725
|
+
sglang-0.4.8.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
726
|
+
sglang-0.4.8.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|