sglang 0.4.8__py3-none-any.whl → 0.4.8.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sglang/srt/configs/model_config.py +1 -0
  2. sglang/srt/conversation.py +1 -0
  3. sglang/srt/custom_op.py +7 -1
  4. sglang/srt/disaggregation/base/conn.py +2 -0
  5. sglang/srt/disaggregation/decode.py +1 -1
  6. sglang/srt/disaggregation/mooncake/conn.py +289 -48
  7. sglang/srt/disaggregation/mooncake/transfer_engine.py +31 -1
  8. sglang/srt/disaggregation/nixl/conn.py +94 -46
  9. sglang/srt/disaggregation/prefill.py +3 -2
  10. sglang/srt/disaggregation/utils.py +12 -11
  11. sglang/srt/entrypoints/engine.py +5 -3
  12. sglang/srt/entrypoints/openai/protocol.py +47 -4
  13. sglang/srt/entrypoints/openai/serving_chat.py +52 -76
  14. sglang/srt/entrypoints/openai/serving_completions.py +1 -0
  15. sglang/srt/entrypoints/openai/serving_embedding.py +1 -0
  16. sglang/srt/layers/activation.py +7 -0
  17. sglang/srt/layers/attention/flashattention_backend.py +24 -14
  18. sglang/srt/layers/layernorm.py +15 -0
  19. sglang/srt/layers/linear.py +18 -1
  20. sglang/srt/layers/logits_processor.py +12 -3
  21. sglang/srt/layers/moe/ep_moe/layer.py +79 -12
  22. sglang/srt/layers/moe/ep_moe/token_dispatcher.py +19 -2
  23. sglang/srt/layers/moe/fused_moe_native.py +7 -0
  24. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +7 -2
  25. sglang/srt/layers/moe/fused_moe_triton/layer.py +73 -14
  26. sglang/srt/layers/moe/topk.py +26 -0
  27. sglang/srt/layers/quantization/fp8_utils.py +5 -4
  28. sglang/srt/layers/rotary_embedding.py +103 -11
  29. sglang/srt/layers/vocab_parallel_embedding.py +14 -1
  30. sglang/srt/managers/expert_distribution.py +21 -0
  31. sglang/srt/managers/io_struct.py +10 -2
  32. sglang/srt/managers/multimodal_processors/base_processor.py +44 -9
  33. sglang/srt/managers/multimodal_processors/gemma3n.py +97 -0
  34. sglang/srt/managers/schedule_batch.py +9 -1
  35. sglang/srt/managers/scheduler.py +42 -6
  36. sglang/srt/model_executor/cuda_graph_runner.py +1 -1
  37. sglang/srt/model_executor/model_runner.py +5 -2
  38. sglang/srt/model_loader/loader.py +45 -10
  39. sglang/srt/model_loader/weight_utils.py +89 -0
  40. sglang/srt/models/deepseek_nextn.py +7 -4
  41. sglang/srt/models/deepseek_v2.py +147 -4
  42. sglang/srt/models/gemma3n_audio.py +949 -0
  43. sglang/srt/models/gemma3n_causal.py +1009 -0
  44. sglang/srt/models/gemma3n_mm.py +511 -0
  45. sglang/srt/models/hunyuan.py +771 -0
  46. sglang/srt/server_args.py +16 -2
  47. sglang/srt/two_batch_overlap.py +4 -1
  48. sglang/srt/utils.py +71 -0
  49. sglang/version.py +1 -1
  50. {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/METADATA +1 -1
  51. {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/RECORD +54 -49
  52. {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/WHEEL +0 -0
  53. {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/licenses/LICENSE +0 -0
  54. {sglang-0.4.8.dist-info → sglang-0.4.8.post1.dist-info}/top_level.txt +0 -0
sglang/srt/server_args.py CHANGED
@@ -47,6 +47,7 @@ class ServerArgs:
47
47
  tokenizer_mode: str = "auto"
48
48
  skip_tokenizer_init: bool = False
49
49
  load_format: str = "auto"
50
+ model_loader_extra_config: str = "{}"
50
51
  trust_remote_code: bool = False
51
52
  dtype: str = "auto"
52
53
  kv_cache_dtype: str = "auto"
@@ -563,6 +564,7 @@ class ServerArgs:
563
564
  # Model and port args
564
565
  parser.add_argument(
565
566
  "--model-path",
567
+ "--model",
566
568
  type=str,
567
569
  help="The path of the model weights. This can be a local folder or a Hugging Face repo ID.",
568
570
  required=True,
@@ -632,6 +634,13 @@ class ServerArgs:
632
634
  "layer before loading another to make the peak memory envelope "
633
635
  "smaller.",
634
636
  )
637
+ parser.add_argument(
638
+ "--model-loader-extra-config",
639
+ type=str,
640
+ help="Extra config for model loader. "
641
+ "This will be passed to the model loader corresponding to the chosen load_format.",
642
+ default=ServerArgs.model_loader_extra_config,
643
+ )
635
644
  parser.add_argument(
636
645
  "--trust-remote-code",
637
646
  action="store_true",
@@ -1692,6 +1701,9 @@ class PortArgs:
1692
1701
  # The ipc filename for rpc call between Engine and Scheduler
1693
1702
  rpc_ipc_name: str
1694
1703
 
1704
+ # The ipc filename for Scheduler to send metrics
1705
+ metrics_ipc_name: str
1706
+
1695
1707
  @staticmethod
1696
1708
  def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
1697
1709
  port = server_args.port + random.randint(100, 1000)
@@ -1711,6 +1723,7 @@ class PortArgs:
1711
1723
  detokenizer_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
1712
1724
  nccl_port=port,
1713
1725
  rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
1726
+ metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
1714
1727
  )
1715
1728
  else:
1716
1729
  # DP attention. Use TCP + port to handle both single-node and multi-node.
@@ -1730,9 +1743,9 @@ class PortArgs:
1730
1743
  port_base = int(dist_init_port) + 1
1731
1744
  if dp_rank is None:
1732
1745
  # TokenizerManager to DataParallelController
1733
- scheduler_input_port = port_base + 3
1746
+ scheduler_input_port = port_base + 4
1734
1747
  else:
1735
- scheduler_input_port = port_base + 3 + 1 + dp_rank
1748
+ scheduler_input_port = port_base + 4 + 1 + dp_rank
1736
1749
 
1737
1750
  return PortArgs(
1738
1751
  tokenizer_ipc_name=f"tcp://{dist_init_host}:{port_base}",
@@ -1740,6 +1753,7 @@ class PortArgs:
1740
1753
  detokenizer_ipc_name=f"tcp://{dist_init_host}:{port_base + 1}",
1741
1754
  nccl_port=port,
1742
1755
  rpc_ipc_name=f"tcp://{dist_init_host}:{port_base + 2}",
1756
+ metrics_ipc_name=f"tcp://{dist_init_host}:{port_base + 3}",
1743
1757
  )
1744
1758
 
1745
1759
 
@@ -346,7 +346,10 @@ class TboForwardBatchPreparer:
346
346
  )
347
347
 
348
348
  # TODO improve, e.g. unify w/ `init_raw`
349
- if global_server_args_dict["moe_dense_tp_size"] == 1:
349
+ if (
350
+ global_server_args_dict["moe_dense_tp_size"] == 1
351
+ and batch.gathered_buffer is not None
352
+ ):
350
353
  sum_len = end_token_index - start_token_index
351
354
  gathered_buffer = torch.zeros(
352
355
  (sum_len, batch.gathered_buffer.shape[1]),
sglang/srt/utils.py CHANGED
@@ -2457,6 +2457,77 @@ def cpu_has_amx_support():
2457
2457
  return torch._C._cpu._is_amx_tile_supported() and is_intel_amx_backend_available
2458
2458
 
2459
2459
 
2460
+ def prepack_weight_if_needed(weight):
2461
+ if weight.device != torch.device("cpu"):
2462
+ return weight
2463
+ if not cpu_has_amx_support():
2464
+ return weight
2465
+
2466
+ return torch.ops.sgl_kernel.convert_weight_packed(weight)
2467
+
2468
+
2469
+ # TODO: currently gemm kernel has the below requirements:
2470
+ # OC % TILE_N == 0, where TILE_N = 16
2471
+ # IC % TILE_K == 0, where TILE_K = 32
2472
+ def dim_is_supported(weight):
2473
+ return weight.size(0) % 16 == 0 and weight.size(1) % 32 == 0
2474
+
2475
+
2476
+ def _process_weight_after_loading(module, weight_names, transpose_dims=None) -> None:
2477
+ # Pack weight for get better performance on CPU
2478
+ devices = {getattr(module, weight_name).device for weight_name in weight_names}
2479
+ assert len(devices) == 1, f"Expects all weights to be on the same device"
2480
+ device = devices.pop()
2481
+
2482
+ if transpose_dims:
2483
+ assert len(weight_names) == len(
2484
+ transpose_dims
2485
+ ), "len(weight_names) should be equal to len(transpose_dims)"
2486
+
2487
+ for i, weight_name in enumerate(weight_names):
2488
+ weight_tensor = getattr(module, weight_name)
2489
+
2490
+ # We don't pack weight or use intel amx backend if any weight of this module has unsupported dim.
2491
+ if not dim_is_supported(weight_tensor):
2492
+ logger.warning(
2493
+ f"Expects weight.size(0) % 16 == 0 and weight.size(1) % 32 == 0 "
2494
+ f"but {weight_tensor.size(0)=} and {weight_tensor.size(1)=} in {module}. "
2495
+ f"{module} won't use intel amx backend."
2496
+ )
2497
+ module.use_intel_amx_backend = False
2498
+ return
2499
+
2500
+ if transpose_dims and transpose_dims[i]:
2501
+ weight_tensor = weight_tensor.transpose(*transpose_dims[i])
2502
+
2503
+ packed_weight = torch.nn.Parameter(
2504
+ prepack_weight_if_needed(weight_tensor),
2505
+ requires_grad=False,
2506
+ )
2507
+ packed_weight.__dict__ = weight_tensor.__dict__
2508
+ setattr(module, weight_name, packed_weight)
2509
+
2510
+ module.use_intel_amx_backend = (
2511
+ device == torch.device("cpu") and cpu_has_amx_support()
2512
+ )
2513
+
2514
+ if (
2515
+ module.use_intel_amx_backend
2516
+ and hasattr(module, "bias")
2517
+ and module.bias is not None
2518
+ ):
2519
+ module.bias = torch.nn.Parameter(module.bias.data.float(), requires_grad=False)
2520
+
2521
+
2522
+ class PackWeightMethod:
2523
+ def __init__(self, weight_names, transpose_dims=None):
2524
+ self.weight_names = weight_names
2525
+ self.transpose_dims = transpose_dims
2526
+
2527
+ def process_weights_after_loading(self, module) -> None:
2528
+ _process_weight_after_loading(module, self.weight_names, self.transpose_dims)
2529
+
2530
+
2460
2531
  class LazyValue:
2461
2532
  def __init__(self, creator: Callable):
2462
2533
  self._creator = creator
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.8"
1
+ __version__ = "0.4.8.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.4.8
3
+ Version: 0.4.8.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -11,7 +11,7 @@ sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/math_utils.py,sha256=QYtbaIA76P33ojcOrL32VR6yXWv-Od_3WCZNO4kQ-YQ,177
12
12
  sglang/profiler.py,sha256=tEHzHerXC-ymk4OrkoUcMbgcGHmb8VESthsNSP2Yx9w,4417
13
13
  sglang/utils.py,sha256=VH6zrnkjzcR3DE__WfVph6wswJ4JuzoQD47VmbZ38eI,16435
14
- sglang/version.py,sha256=40-PUZPRIakJU2yYWQcwTYvSJA6iewqiG8XylhxuAQk,22
14
+ sglang/version.py,sha256=E6iPG1WE6yyF-fToZYo4ZM-iwegVLaXOKuEXlAJ_kvg,28
15
15
  sglang/eval/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
16
16
  sglang/eval/loogle_eval.py,sha256=pRPVA4fxGmT3_oXvXnlNE-UlPrcQGLBJF-OSE9YWJXM,4336
17
17
  sglang/lang/chat_template.py,sha256=HKlx7snSWFED8GKF5ex79sQrPWFw5TSXQM0_LsiD9Bc,20552
@@ -31,8 +31,8 @@ sglang/srt/_custom_ops.py,sha256=0lJRMTKTjoxJPh1qQnnMY02Z3SyBDi7LJI34IBLQsgQ,446
31
31
  sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
32
32
  sglang/srt/code_completion_parser.py,sha256=KFa95OU0TeVjJkOqIgS4xV3kaJ5dFWOmAAgISyc1oEc,3803
33
33
  sglang/srt/constants.py,sha256=0i-tEwG2BSYNDy96MxnGHV5HnBELkYcnsVGsE-R18o0,93
34
- sglang/srt/conversation.py,sha256=9Ix1NRQAErjXen8s4Yobb1HKizln7VBdrZPK_QKONoY,37104
35
- sglang/srt/custom_op.py,sha256=At-Nqm_noJyoxi928TLvIPzUgj3DM-St-g2mdx1CWEI,2912
34
+ sglang/srt/conversation.py,sha256=xkV_OWdotT_Tf1QzpxLL-oZ-THAKKtJj49Q9B_L-WT8,37144
35
+ sglang/srt/custom_op.py,sha256=87r2PIgiGLREsIZQ8qsUD-zgI66_54y9GrE0buXzoCI,3076
36
36
  sglang/srt/debug_utils.py,sha256=slaFOY4BYDBFatkfu8FZlzai-u4LFS-5GUzdr-t50zE,2241
37
37
  sglang/srt/hf_transformers_utils.py,sha256=S1ZF4aFKTocenXvj9ti6M-buiCBus4FrP1AdwFz_Wbw,11943
38
38
  sglang/srt/jinja_template_utils.py,sha256=Jc6Vl-lYtUyMBSWWQGIMpJQ1O-ceU9c8DbOi_bN-oVk,6719
@@ -42,10 +42,10 @@ sglang/srt/operations.py,sha256=ddQ8KO63L73OciaR8MZ9h2h83gKVY4-WuWgeEGowPJA,5346
42
42
  sglang/srt/operations_strategy.py,sha256=6DDLEdmkLrFDTXWZoFO0q9MZjEThvfyvoO-LbQsNpPQ,7023
43
43
  sglang/srt/patch_torch.py,sha256=OUPCGQSQz3MVZB1zZ_Eq8lXiw0uIKJ_HWjqQolI8FsM,3088
44
44
  sglang/srt/reasoning_parser.py,sha256=vf0kWBM4IXwbuzGBIOMdiXdn9gavqkHb1QIaTbkU7vc,6742
45
- sglang/srt/server_args.py,sha256=HCjiKk1VujrWMR2SPiO2bzgdnySGZO34PaoSeCM-pEw,72873
45
+ sglang/srt/server_args.py,sha256=MVp3qB7PmJlw82hY8dQPoVsJZ26RpmiBqLDgzq1vjOc,73495
46
46
  sglang/srt/torch_memory_saver_adapter.py,sha256=K_eTx0UU84MHSTXI3iqYLdHV4IWtJMJ2FKdGFJR8v1E,2417
47
- sglang/srt/two_batch_overlap.py,sha256=Q6tobqItnlK5ad9mqqMMx0HJ-PAzW4DgBtgSQ7Ax6sM,21848
48
- sglang/srt/utils.py,sha256=kQWtZ0JUyaYDJNW-zNV0AU7Yfn9qGnRztPoSM5gC8sE,78080
47
+ sglang/srt/two_batch_overlap.py,sha256=M5Ca3LV4_j7g3o78WQDjWzgsSCpY4E75SIKnwoa7tQg,21922
48
+ sglang/srt/utils.py,sha256=Xg7K6VaTFDrEdW8b07AVREPHdx9M4vJuOI3aIct82JM,80640
49
49
  sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
50
50
  sglang/srt/configs/__init__.py,sha256=8EcVRP95epZ49DxBa6LgKWt7eO3Qe7Hrr3V1c6HkMnY,553
51
51
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
@@ -58,7 +58,7 @@ sglang/srt/configs/janus_pro.py,sha256=Rrb7kQsNaUP-TiZrjNk8Lr1momFrql8ScEunnrH0_
58
58
  sglang/srt/configs/kimi_vl.py,sha256=4W7VQI3pr888ZsFA2SqCQo4mI0seXTOrGQ-x3oTvWew,1358
59
59
  sglang/srt/configs/kimi_vl_moonvit.py,sha256=hx2Rt4JSFbvy2HUTeLjBpge87m8M6ITAhqsgdNf_Jd4,1163
60
60
  sglang/srt/configs/load_config.py,sha256=qs-AxuplouBx2tsv9KGBOLZPbwzuVA4vbktbGP_cRp8,3309
61
- sglang/srt/configs/model_config.py,sha256=mXERUkexcWnZxrCzpxaMo7FhYm-CNhwttDIVhw-ZysY,25206
61
+ sglang/srt/configs/model_config.py,sha256=ESZEDSvxVfRH4CiZoy8JVpapJU5yPTYCAxZB38AJ7b0,25245
62
62
  sglang/srt/configs/utils.py,sha256=3nHUfisMs_Ltuhv8OZTNCJp63YJKJVF43h1QZB1zqx8,670
63
63
  sglang/srt/connector/__init__.py,sha256=czLX5JOxuMhH-T9eSJzoc1qv1B4z9chyffDRL5I6wo4,1247
64
64
  sglang/srt/connector/base_connector.py,sha256=i6i1TIzsz4NbSEkrdMPq-urb2sN2aLAx8dazga4gB9U,2833
@@ -75,25 +75,25 @@ sglang/srt/constrained/outlines_jump_forward.py,sha256=Gyubp-FVetxd6wP4FA_kD6cCX
75
75
  sglang/srt/constrained/reasoner_grammar_backend.py,sha256=YFxWuOTTo4e6cGhxnaBwuwli6f8FTUJtszib8dBq_8I,3207
76
76
  sglang/srt/constrained/xgrammar_backend.py,sha256=u9Ao-XX22UzALkATEAR_-0EoyBTxMTJV590qd9LAUHM,7906
77
77
  sglang/srt/constrained/triton_ops/bitmask_ops.py,sha256=WjTen9iuuFWLzkE1mAHQZB9_7aIy5QH8Wjf-lB-Fams,4614
78
- sglang/srt/disaggregation/decode.py,sha256=GLIaPpm3I3oykq3A41Ru4gdsLa9LwUADc3xqM2LuHSU,33335
78
+ sglang/srt/disaggregation/decode.py,sha256=lqpmpQYMXzDCM19R4Pe4RF8q92UvBqKUITauFGDylEs,33335
79
79
  sglang/srt/disaggregation/decode_schedule_batch_mixin.py,sha256=8UdNaj8KKMLV5Cydhw8gnHM-zRrnKM_AAd3Qc-SRfWg,5648
80
80
  sglang/srt/disaggregation/kv_events.py,sha256=yFbtwOeblcCmOXTjg00TopxnyjkmCBQIVz46KB8jetY,13555
81
81
  sglang/srt/disaggregation/launch_lb.py,sha256=mcbAztN4gnHevw_T5R2_nWsymsDEY9vHkm7OJ1vr6cc,4211
82
82
  sglang/srt/disaggregation/mini_lb.py,sha256=BBeIdeZZxi7Ra5_hukHHX3TNdyfZ4tP1GBUxC-qrv_g,14401
83
- sglang/srt/disaggregation/prefill.py,sha256=SQYLDpanJNc8sJHXQoPD6P48HGAEigxdf2CVal3Nl40,23419
84
- sglang/srt/disaggregation/utils.py,sha256=84EF9I0lOK8bWqd8cbTTgVDzG6ecofp6WuSVqgCe1jc,11386
83
+ sglang/srt/disaggregation/prefill.py,sha256=jWOqYXBBiNuOC78a028FN6mlklzEki4MjRdTA6UE5zU,23518
84
+ sglang/srt/disaggregation/utils.py,sha256=LBiRVbJ4jjYJXn0JL4VMTmGwAMAIGqJ_zaqPLjrmfkU,11339
85
85
  sglang/srt/disaggregation/base/__init__.py,sha256=4VwUv0aWxwmVL1049XK82aLTNxmt0WY5RPy9li-wyVk,160
86
- sglang/srt/disaggregation/base/conn.py,sha256=6KK_7HL7xixVSB8iap8jjJrIv3JgHDmynZ39Os3k5MU,2771
86
+ sglang/srt/disaggregation/base/conn.py,sha256=CPDAoAkYaFtVPLa1QROfwipSVe7MH6omzIBHzo8TSYk,2811
87
87
  sglang/srt/disaggregation/common/__init__.py,sha256=7yl-EGLMVKRpBUaGF_7lwAsw2J_mqpRZV0238VGxD9o,126
88
88
  sglang/srt/disaggregation/common/conn.py,sha256=CZR1lMCE_mpSkvjc6BBmSr1SbHY1uZuxjpanazD-YXc,16071
89
89
  sglang/srt/disaggregation/common/utils.py,sha256=SxRhAWisNK8seGhb5BXBJ5u53DF7yeKVPMWPcB5ywbE,1194
90
90
  sglang/srt/disaggregation/fake/__init__.py,sha256=jJGWdXwaQiGIoR6atKqkQfkJmVyQ09l55VUN2WjwaeY,77
91
91
  sglang/srt/disaggregation/fake/conn.py,sha256=oD1DArn1yDFZCu-X6p93uSLlAXEkt9lYxERICMznxGw,2286
92
92
  sglang/srt/disaggregation/mooncake/__init__.py,sha256=0TgqkAdQI1YynbHY6c0QISvVoOSk-0SwCIq5rjPSmgE,156
93
- sglang/srt/disaggregation/mooncake/conn.py,sha256=ES9N4J_zy5xdG8_YEsrcodXZPCBwKf9SBddOBCGLOHc,47837
94
- sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=gOcjZPtksJDq4iiIs0_zREdOM8trqUcmOXkrj-wiCuU,3149
93
+ sglang/srt/disaggregation/mooncake/conn.py,sha256=MATJkiS_5Vh8fc8Gx6S6cI3zCYDbgKXVEAa1xkUsCKA,58879
94
+ sglang/srt/disaggregation/mooncake/transfer_engine.py,sha256=JYB9T-EPdJNfv4I_sVpmMOZCOJ14itD97ws6tTvj240,4281
95
95
  sglang/srt/disaggregation/nixl/__init__.py,sha256=qODVPIGWUXKXq4zsRIcMYoAoAeg6nBIN9vdQOlVMANE,136
96
- sglang/srt/disaggregation/nixl/conn.py,sha256=KzVPRH8MaOAiG9EdfDN31sE9U9Ibsj2McDlSRWzHiP0,18055
96
+ sglang/srt/disaggregation/nixl/conn.py,sha256=G2l-FuXUvtsEo3Z24vyQ8iTcFjqG-sise4ItAtiny30,20327
97
97
  sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
98
98
  sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
99
99
  sglang/srt/distributed/parallel_state.py,sha256=0_G1TtBOFMYDix5rfuEHYBMpy9A-OuPs9yFd5nCiZ8Q,53927
@@ -109,15 +109,15 @@ sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6
109
109
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=d8mykYmXM1lfbPm8GNtqCF0Un_pdXYjbNmsgoVFyyow,20874
110
110
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
111
111
  sglang/srt/entrypoints/EngineBase.py,sha256=FCwqQMJE_8CCCMThAXUZaKafsHvh2BaQ31J-7_ormwA,2310
112
- sglang/srt/entrypoints/engine.py,sha256=oNcZDupOYllUjtYnNZOuOqfQzxg3SonwC4jNn-ByvQY,30169
112
+ sglang/srt/entrypoints/engine.py,sha256=uSlN1vorCBCYk8n8AnCpbnCu-4p9JjukLyXJZZzTrGU,30283
113
113
  sglang/srt/entrypoints/http_server.py,sha256=Fyb3z9OKXF9h_-duhWsHLjZayE3uoZ2dHpeV757bxXc,34745
114
114
  sglang/srt/entrypoints/http_server_engine.py,sha256=ncN45ti9mawSOimPSedI6zugfoMhMQOYh4tmdfC9LcE,4936
115
115
  sglang/srt/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
- sglang/srt/entrypoints/openai/protocol.py,sha256=JepHGUD8DcwNJQ0B2YnR6ZvWp6fhc4IQa2zQthBI8z4,17110
116
+ sglang/srt/entrypoints/openai/protocol.py,sha256=QqC3XeJ0RxlFBsPuobjtf6c7iRkEOC8Td4E_GvjKwxM,18759
117
117
  sglang/srt/entrypoints/openai/serving_base.py,sha256=5NJ2S_6B2NFSwn4nLp6eaeJ5iC3IcQzMEY9lW_gPcdA,5246
118
- sglang/srt/entrypoints/openai/serving_chat.py,sha256=ojMNf55Nq-WVpwhl7TQdwepyb1JShfKJkzgfWCPH1UE,36542
119
- sglang/srt/entrypoints/openai/serving_completions.py,sha256=7JKbykpFYnqsmej6iK_SHnD67IJ7KOVBbl11TVtKPPg,16661
120
- sglang/srt/entrypoints/openai/serving_embedding.py,sha256=ZAX4qQ9YZ7qUvT6zCAfrGFtkEfydPHiWuoPDz2Qrr2I,6245
118
+ sglang/srt/entrypoints/openai/serving_chat.py,sha256=tWKWjspTpNnuk-Aqfl_S6OmpZe6gz5XxmSGEwfIvCsY,35935
119
+ sglang/srt/entrypoints/openai/serving_completions.py,sha256=emIutVmnJgOgGRuAqdB80-kVHUfQbE67n1VS-76o4QY,16690
120
+ sglang/srt/entrypoints/openai/serving_embedding.py,sha256=hqPt2ELpT1yQ8sBXo801aLJ3sExoPXs_K5ZQtC2vpAs,6274
121
121
  sglang/srt/entrypoints/openai/serving_rerank.py,sha256=8n6y5kC6UhRpWrDtzH0eIp0dznW12FT60MLCR4bvD2U,3333
122
122
  sglang/srt/entrypoints/openai/serving_score.py,sha256=ebAts-m6Pq-LTgFKwggkywBUrAgUSppHHVEzgwYHUzo,1955
123
123
  sglang/srt/entrypoints/openai/usage_processor.py,sha256=9LTB5rqdRuMKyZrIXiUBuF_WKaSg9X45YdzERDxbtCY,2746
@@ -134,27 +134,27 @@ sglang/srt/function_call/mistral_detector.py,sha256=xNuVl2vDXVYbXyiXLkJZ9VM6njcw
134
134
  sglang/srt/function_call/pythonic_detector.py,sha256=rtXSflE4w993a4OqphWG-WQzvwRz0v3dfgHOnte1fpI,8731
135
135
  sglang/srt/function_call/qwen25_detector.py,sha256=9JfZem_5nw91Og2biwq8eIpUQjy_3kFz4TQI8Lc3Vow,4882
136
136
  sglang/srt/function_call/utils.py,sha256=__ImDF2kNyoLWsYO5RYoryvy1mmgEjnjXlCvLv-uLCM,1695
137
- sglang/srt/layers/activation.py,sha256=vJUWe5O72c0yX9jxb0TND_6SFhiOnvHtp9H67ftgYnU,7174
137
+ sglang/srt/layers/activation.py,sha256=w8gr84LdpbBxmSjK9cfqFGXiGxALOwkBzqHChdZ6z7M,7327
138
138
  sglang/srt/layers/communicator.py,sha256=WbefauUNbwfAtaBySi-rqqXkoFZZpxdOJURLBHpF5qA,18597
139
139
  sglang/srt/layers/dp_attention.py,sha256=e-AgUTa70NsNgsw4hB1a-B_yDv7T8PyXW3jqR8sIKgY,9807
140
140
  sglang/srt/layers/elementwise.py,sha256=XCrR2i-9dP-H6jQo2zUuquwZrsl_wEQqj5Wxk6WUf7o,13987
141
- sglang/srt/layers/layernorm.py,sha256=OLjIEv9POyTOOj2G7rSNzIJ6kdA9DM4-azFZALyQhbw,7149
142
- sglang/srt/layers/linear.py,sha256=xdAv5qJodLdapHi9ex3e0Qa_KHldcGwEiE-LpLRs1U8,51939
143
- sglang/srt/layers/logits_processor.py,sha256=68kkHIwW7mTMSBjUpuw8blLWdL6i3XtHeOcqt4ErYbY,25195
141
+ sglang/srt/layers/layernorm.py,sha256=x6VDTFxvcJMx2txpBW4Y7G1sWfKYsksNpf-L4_ySSDo,7660
142
+ sglang/srt/layers/linear.py,sha256=ToLkotx239ze3rwizk3r05Gg7_LJk39hAdjgqWwYusE,52462
143
+ sglang/srt/layers/logits_processor.py,sha256=TTR7LgSwthaH6Qfmcda2Ampibtt-JcwpLSs6-OJI_sQ,25604
144
144
  sglang/srt/layers/multimodal.py,sha256=YVR69WW-2aGDcZHT8IVJ6F_LRM7wraZr8VjrPDXqDmA,2104
145
145
  sglang/srt/layers/parameter.py,sha256=zqWyEzpWzP4NNTjq3G9khq6XofgpcmJqQLg6Vd4WyWE,15084
146
146
  sglang/srt/layers/pooler.py,sha256=uZ6WX1FLMEafZwusyZdm6KuVlIwSjbKrdwk2qzgqNGk,3812
147
147
  sglang/srt/layers/radix_attention.py,sha256=IlqRB4bk06FOH05_7zB8lik0xLpys7jFooLeCwdO0j8,3437
148
- sglang/srt/layers/rotary_embedding.py,sha256=D5oj1CwmZ7Dq8I1j0hfKXcd2jn7hzu8Z5eMI7phUDWw,49469
148
+ sglang/srt/layers/rotary_embedding.py,sha256=iOxdJEw9jhxYih7WpNkzlGybzewNu9GgqU8dLM-TyD0,52209
149
149
  sglang/srt/layers/sampler.py,sha256=xNds1migup2s6b9_pS6ljkJUkvNtv7nmTGeIdOzoQ6w,11182
150
150
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
151
151
  sglang/srt/layers/utils.py,sha256=IWGg1Hb7c33Z3LHRPVJyUAzp3BnSid23ZWXAmJ_Jvp8,1204
152
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=GPKCQiR2yH5Z2XSE9g3vWObdegcwPnMHXv7qEZovLH4,22755
152
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=65N0e4PlOwoTRZC9QpC2G83Crn-OI4rY9wZTRnOVNvg,23166
153
153
  sglang/srt/layers/attention/aiter_backend.py,sha256=7sEUgViw-xl3yok91yyOD9gTi8lQmME0g0ZiKVTCcyI,32851
154
154
  sglang/srt/layers/attention/base_attn_backend.py,sha256=KXVcCguwXh-PSrY9Y2aUrlXXUhWdbVxqVEF2_xIMvm4,3466
155
155
  sglang/srt/layers/attention/cutlass_mla_backend.py,sha256=SIR7sKCCegwzahSz82I3gsDyN5TkKoa4yG4-pBQWBi4,9813
156
156
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
157
- sglang/srt/layers/attention/flashattention_backend.py,sha256=JgAp-douu45GYrkxc-d9LHE-rw7pXbVpishHfsU9m_8,92529
157
+ sglang/srt/layers/attention/flashattention_backend.py,sha256=j4rnX5IARna-pZmVnSR9kjf_yINGnkKO6itTLwkx82k,93278
158
158
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=Ug4SkZnuHjBBwPOj2TfLlg0eU_GoZvKjhY4oYRU_qqU,49666
159
159
  sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=NirRlNFxD0S2EncvvmrerIxNfuTnYpvemZV3MR5_FnU,34040
160
160
  sglang/srt/layers/attention/flashmla_backend.py,sha256=5iSic5ho-lkXNas9mR3uLbXbEl-do31gc8gjR7-a79k,20711
@@ -173,16 +173,16 @@ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=waZsmpKIp8rTg
173
173
  sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=664WnAJ91EiCUZOcnVDfbTQf4uGJ4ZDZB1CbxpEUFZc,13866
174
174
  sglang/srt/layers/moe/cutlass_moe.py,sha256=--bNTA2BGbHeULb_XXDoRUyWWbE-doHo5K5k1T0N0WA,14323
175
175
  sglang/srt/layers/moe/cutlass_moe_params.py,sha256=9NRCmgP_Ug3gGqCcpi-x-QRbLjCNpw8792gKXwZsbEU,6522
176
- sglang/srt/layers/moe/fused_moe_native.py,sha256=FIES6H7oS-XjwVsWSrAkQzTdnp7kq-Z5VoViqOHpmtg,4851
176
+ sglang/srt/layers/moe/fused_moe_native.py,sha256=bW3KWxxz9rxKMUQqfmAtF-7ptTODA1pwLydE05ABDJE,5030
177
177
  sglang/srt/layers/moe/router.py,sha256=5Aeqoix_AS4uymb665OJE904wVSBkQeFdZP4e7KKPvg,10530
178
- sglang/srt/layers/moe/topk.py,sha256=fLjl0DZMn6UnUtbx8AjyE0wtAtjn0W64t1DrDXBbjiM,17548
178
+ sglang/srt/layers/moe/topk.py,sha256=_hIyTURqx6Id1C3NQmHVuPxivMN0ywmDoyhFvtgHIZY,18624
179
179
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
180
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=xdYak2dkrUJjmUigKJ-GbWfdf-tXlUKbvtJgxVekbMA,40130
181
- sglang/srt/layers/moe/ep_moe/layer.py,sha256=Tl45TYm200db9JNJMuCxgX9yJpKPQ8hUioNkAhUsd7M,52875
182
- sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=wPc0ewfBeoyYmgKMbp1YoT81OaJSme7AmftrXbxR7Jg,23523
181
+ sglang/srt/layers/moe/ep_moe/layer.py,sha256=U-R2mffzW_snuAxvX1AfN0xgcbt-w9fNFUidd2Hi4eQ,55783
182
+ sglang/srt/layers/moe/ep_moe/token_dispatcher.py,sha256=IR5RtdL9aIG04QaeySsq4Oy-S8obivBBZKbFfPCGpXc,24077
183
183
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
184
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=FWUfurqSu4NZBzA7oWx2fcqQGtAhPy-U5qgLsaB3lrI,63103
185
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=nWmrt8Ihybl3uo0tsDNt0IpyvoBx8QO6KRS4rrP8YL8,29833
184
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=gM_nctg15Wc06pxiMq0IRE0QBCfNyebSsWwm5zdM_Uk,63225
185
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=AAEb9pvd3bxiuvIKV2FZboWNvffccYmhF9R09SQSRlo,32038
186
186
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
187
187
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
188
188
  "sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -351,7 +351,7 @@ sglang/srt/layers/quantization/base_config.py,sha256=jWk_egQrVNMYmQgbTI9vkcgzScL
351
351
  sglang/srt/layers/quantization/blockwise_int8.py,sha256=vWyPZsRLhdKtSmjvlT5fsowBK_nEebYbDAUh2yqseGw,15285
352
352
  sglang/srt/layers/quantization/fp8.py,sha256=Ne3K177lBdWI8TXyJs8qSaH67KaAv2j4LrZeHyqyH_8,44678
353
353
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=pUda_glnAprnFIj3VUgCUYMKb2-uK3UOC3yPahgRMBQ,34743
354
- sglang/srt/layers/quantization/fp8_utils.py,sha256=_90Js9EwurcSKIr69_6avEXbX3--Nd9LicRkRU6CGts,25580
354
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=QYHx_OLXFxFCmSMgoKmbJ3Vgl4mVEcXykdnhHO7tU0g,25650
355
355
  sglang/srt/layers/quantization/gptq.py,sha256=d1frUjvXmZfQKkcMQY5t0BA4sXWHE9Jze24qxniptJE,26719
356
356
  sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRC9FOn9exNvK4QHbUeBj3Hhv32VcyGphapFPt5b84,12625
357
357
  sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
@@ -545,15 +545,15 @@ sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMa
545
545
  sglang/srt/managers/data_parallel_controller.py,sha256=PZ-wOcAFn3PQqMB6I2vjIsFmplf0nlNl5hqTDKTHTG8,12112
546
546
  sglang/srt/managers/detokenizer_manager.py,sha256=SpLxTsSPKBZfD-ZMhJ5zpPPGuUb8PmcYgFSL9CsurU4,10696
547
547
  sglang/srt/managers/eplb_manager.py,sha256=YaxnvD1-wMV7BhtF9AxoNRVsJUdlQzFF7N-JDP43ojE,3411
548
- sglang/srt/managers/expert_distribution.py,sha256=HBcfNdS3l6ob17Z9KFlX5f79rqcaY41XMDp8emMD-TY,30880
548
+ sglang/srt/managers/expert_distribution.py,sha256=TzOSO7xFJ1VaxvbC2wqHq4l8UtWLX8K0rBmu5g_V2rU,31562
549
549
  sglang/srt/managers/expert_location.py,sha256=ZSsH17k5bAgbE1wuvpGaHGueiyhfPrgQakBFPu9jswo,16669
550
550
  sglang/srt/managers/expert_location_dispatch.py,sha256=U6-XLZ77RK0oy_JUVug2q-2LJjwoYX-js0_zhBNMXuM,4148
551
- sglang/srt/managers/io_struct.py,sha256=bWybydmTbTuKW_AO6VjHPhB5jbWFdA7qcxb5Ee7Txkg,33687
551
+ sglang/srt/managers/io_struct.py,sha256=gyP8JGyX4DSPYs4_0LIKUs4fj7tonrDhnreiSajgric,34022
552
552
  sglang/srt/managers/mm_utils.py,sha256=mA9W4xZBnXfs-4ZeALvhdpGAYxrJGfOxBWyoHVrt44Q,26518
553
553
  sglang/srt/managers/multimodal_processor.py,sha256=XlRYvNhF6XOssreRX9DZPhLSpps_VE62gSKw3EGdNPo,2088
554
- sglang/srt/managers/schedule_batch.py,sha256=meRGsHztVoSWNHuyq-Qzb_0Xk88d4OasGTvTYBvKoI8,72781
554
+ sglang/srt/managers/schedule_batch.py,sha256=_Bt9hGWtiY3Dy0_GNijQVB9TT1b1V2xFOYK_M2aRvuo,73146
555
555
  sglang/srt/managers/schedule_policy.py,sha256=0T8URzQmLvEmG-42-SFBBl9WnsOSwYO8-_CcBpuD38M,20474
556
- sglang/srt/managers/scheduler.py,sha256=FcnzE0JuheAvuZlslbZRF7sQIbRIcwMkKKHp6--mGxs,108539
556
+ sglang/srt/managers/scheduler.py,sha256=nejUktfqa3Qwf6TrN4CUVTYLELNwfUGFpExw-a8WFFw,110046
557
557
  sglang/srt/managers/scheduler_output_processor_mixin.py,sha256=aaj0z1PD3fGIOPoTtWwqoKsii0yQCR7txVFXMPECveQ,29879
558
558
  sglang/srt/managers/session_controller.py,sha256=Lh1kruMcKqR7WVWYJRZbYgowtsssVlP7_paIVXLLIcE,5756
559
559
  sglang/srt/managers/template_manager.py,sha256=RrwRA2oqId_PMQ98qJQGwIxMroOxiorl2sGC9ARou_0,8543
@@ -564,10 +564,11 @@ sglang/srt/managers/utils.py,sha256=9_VGE48EK0PXVJ26aYvbRJ6n7gIZALvCcf6uZCccCgM,
564
564
  sglang/srt/managers/eplb_algorithms/__init__.py,sha256=wVUv2ZhhC-_VbLaStpk3vulzhqExwfHGZJQqoohs-Fw,1963
565
565
  sglang/srt/managers/eplb_algorithms/deepseek.py,sha256=mMZT7zAAArccdRS0xXxifvMb3qn9enSt426uUTKeiq4,8340
566
566
  sglang/srt/managers/eplb_algorithms/deepseek_vec.py,sha256=Vzy5Iarua1VgVHzjBNZaVV_vt1LY1BLtJz7PmzB701k,10654
567
- sglang/srt/managers/multimodal_processors/base_processor.py,sha256=DQk5qNh7MDY9DQoASiQbCTwRb2HdvLWobzHmE0Ni_fg,21500
567
+ sglang/srt/managers/multimodal_processors/base_processor.py,sha256=1fl0eW24ju9FiKflieYEo2mooYFayWGVtamxhAtcWJM,23348
568
568
  sglang/srt/managers/multimodal_processors/clip.py,sha256=lRc2mcuDbAhZVf-0EfkO81pqDiol9zLvTpDqtPIBQ2k,1525
569
569
  sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py,sha256=CMfhhdq7u6GzT8ZENo7ByClvQEx-HKaTGVgdYM1vMNw,3460
570
570
  sglang/srt/managers/multimodal_processors/gemma3.py,sha256=oBHXlbwto_84ZkjkW2A7F3Z7kNuDf039uDH4HVXKE1s,2290
571
+ sglang/srt/managers/multimodal_processors/gemma3n.py,sha256=UjAHeX4a2ZyPccCV_O9isxm61J-w5dglfhYO2IUQkyo,3578
571
572
  sglang/srt/managers/multimodal_processors/internvl.py,sha256=ASv3MQ0Ju6oZG7UceS5ziy4rL2d8Xf1_LbIFmEAuz2E,9512
572
573
  sglang/srt/managers/multimodal_processors/janus_pro.py,sha256=nSZYKLoCZtv7sQIM21KCt4jpnzVfcsF84m9CFPWwR7s,2058
573
574
  sglang/srt/managers/multimodal_processors/kimi_vl.py,sha256=8DER6QFDrmD0sZMjlAffY4z3jtBrrIYoU8ogpZIKNio,1868
@@ -590,14 +591,14 @@ sglang/srt/mem_cache/multimodal_cache.py,sha256=Q-lYcI-3HoLu0WJhE2F_An8g9mkZ8LwM
590
591
  sglang/srt/mem_cache/radix_cache.py,sha256=ojr9_bUwnPocmpbGZXz8JKac4dS-PrfNYk8UqF4Gvi8,17936
591
592
  sglang/srt/metrics/collector.py,sha256=C9QEJDOEdOPBwy2IJwFS3R6VbGzVzGs2xakKCCPvQDk,19903
592
593
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
593
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=OqU5EYdJU6pfk3fMNPklnvVhlMaXKWWKguLkAV9cdbw,30444
594
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=MyZg9hw1dGcjIE5canm5TplUhFptIXt9FIKpDgXWjTQ,30450
594
595
  sglang/srt/model_executor/expert_location_updater.py,sha256=HWLY5lJAWefy2tobWJKlHs3qlBSCS57EwSMdfUuPFc4,20585
595
596
  sglang/srt/model_executor/forward_batch_info.py,sha256=ueHsjmGm52YqVK-8f-TRYpERCCdeSBuc2yaqxD9pWkQ,29268
596
- sglang/srt/model_executor/model_runner.py,sha256=mbYBcddyYxBuRcFmRawa17xdxySxdG-LwxmovH4BCLw,54309
597
+ sglang/srt/model_executor/model_runner.py,sha256=9EIHTDKRtXNrzURQYcC_rBjjWUPGnKHKYCV_GiW70zM,54490
597
598
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
598
- sglang/srt/model_loader/loader.py,sha256=R1-_cWKvrFx6zWWDYxKHOnfP7QtnVmMXi_zyS6seeCI,56286
599
+ sglang/srt/model_loader/loader.py,sha256=-Pr8-YMaF3jIGXfhnEMM1WOloeWcPQnaUgSgWN6aAGI,57626
599
600
  sglang/srt/model_loader/utils.py,sha256=zSZBPA9ErPmkf-HfCxJjhmiFwYueB15KUg7NFspLvPY,4454
600
- sglang/srt/model_loader/weight_utils.py,sha256=XwNVS5MLtNzW5fTS5UDK2CHUuSDMHKznI4Du8l4xoks,32562
601
+ sglang/srt/model_loader/weight_utils.py,sha256=jkd4R6wroef5A3xpVe6rst5xosVitxndnNwH6cMp_zo,35668
601
602
  sglang/srt/models/baichuan.py,sha256=HbvlErnkCSK4pRQYCSDxMcrn-1DQyfiNoeDcnRrJas8,15807
602
603
  sglang/srt/models/bert.py,sha256=ODJe8YfNRP-hHsomFWk4_QpcuiSsNfjzGf256EDS0Pc,15802
603
604
  sglang/srt/models/chatglm.py,sha256=cajLN9caBl09e0TwOFkiTTKDqwlbmHo_yS-NCjdeQW8,13957
@@ -606,8 +607,8 @@ sglang/srt/models/commandr.py,sha256=5Y_b3K0QY7D37nFGkyiGgY38RleRui_GJUYcHSuHUZo
606
607
  sglang/srt/models/dbrx.py,sha256=4pn_fdoATg01VEqNnIAxNEsKV5XU7gwHyd289eydq1s,15598
607
608
  sglang/srt/models/deepseek.py,sha256=ZnN02HdgXCB23Vno5V9UMUoOxH5HC82vNTwsVulUJ-o,17206
608
609
  sglang/srt/models/deepseek_janus_pro.py,sha256=OeeI7vZbE4HGpxa8CwT6-Lbfs7J7WMQ3oBNpVJQpv3w,70450
609
- sglang/srt/models/deepseek_nextn.py,sha256=LaNBVtTSSO6_v5VRq-aZQ7K7lVFSIlUm8NcBB9p0fmw,5751
610
- sglang/srt/models/deepseek_v2.py,sha256=8sYA5m2a590tF58IvRzOnPQUshKFAhQxQCbRmqEzoyY,89180
610
+ sglang/srt/models/deepseek_nextn.py,sha256=FMeM-5oaUWhonnP7tP8oGvFympGoRkv8h9AfFocI-T4,5941
611
+ sglang/srt/models/deepseek_v2.py,sha256=KvRQntryvAu9IEY8OHrLhqIGND3qC-EKM5dr5sYhIXA,95084
611
612
  sglang/srt/models/deepseek_vl2.py,sha256=j8BdxZsMjm6lPdbDipEIKhVIVywCP1Vl1Kl46BZ5_0Y,13147
612
613
  sglang/srt/models/exaone.py,sha256=TpO-rtCpEZ8Ua7hGFnS8l2oAYhY0Pij50grc9WQ2mvc,13576
613
614
  sglang/srt/models/gemma.py,sha256=4cdrPISg1VKnsuI-QPTpYvet4BrX8BMKvCIN82iLskw,12641
@@ -615,11 +616,15 @@ sglang/srt/models/gemma2.py,sha256=kqtwdo93GWKm2iBN29RoIRH2ggRm-K_80LM5btgfBLo,1
615
616
  sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
616
617
  sglang/srt/models/gemma3_causal.py,sha256=Vm605KeF7CBXbtxzOWF-v0TLbl0G12CLF-bEgTV9T0E,25197
617
618
  sglang/srt/models/gemma3_mm.py,sha256=b9YmkipsfVb5IXVeIVwW_PviXiCkRULhEsqNOvPoDxU,17221
619
+ sglang/srt/models/gemma3n_audio.py,sha256=isgKfjA5UieYawxU6medL2ssXlzYPqAbagDBnLcemC0,36405
620
+ sglang/srt/models/gemma3n_causal.py,sha256=nPGjcEOoLP-dhl7l94CB0XSC0g33ljFuIT_QeXb4BBE,36271
621
+ sglang/srt/models/gemma3n_mm.py,sha256=jFNhWCdPd4eChD0OlfSVtJfuufJr6qTj04c-oEXorQo,19273
618
622
  sglang/srt/models/glm4.py,sha256=2VQzUqFkQTy_2nfkxP9SF6_9kKLTZUExGRjge7r99Es,11265
619
623
  sglang/srt/models/gpt2.py,sha256=kclhxEs8oJk1KCyhmAqo7rZqecVGGHYkc-a1WZi3aIk,9841
620
624
  sglang/srt/models/gpt_bigcode.py,sha256=1D6bi8Zu760gCRZkvdLHFcg8kCkY35ARwQYaMDtYhl4,10307
621
625
  sglang/srt/models/granite.py,sha256=5WOJyNYAlt5RNHSexNfPNihhSxIMd7wPzju1cTixKig,20852
622
626
  sglang/srt/models/grok.py,sha256=vESZeGS4adI_JAerXIkCcTm15-CNiGeS7VHc36C6w1A,28033
627
+ sglang/srt/models/hunyuan.py,sha256=dD9kWKTwh1DLa7b-laccQvh2PVVgAHx6487UT8VXhao,28994
623
628
  sglang/srt/models/idefics2.py,sha256=U3khd3hbdawJeRNXsxmaKHdssOCT5TPOZ1D-2_zHoQo,12079
624
629
  sglang/srt/models/internlm2.py,sha256=F_iNY1gUqzAjAuUatcE47gnrcoTh5_08PY2Rw9tKr9M,13150
625
630
  sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
@@ -714,8 +719,8 @@ sglang/test/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
714
719
  sglang/test/attention/test_flashattn_backend.py,sha256=_rTG849FwQdVTyGKkqhczaOqngBmRWXFmkl5NnuK1GM,13914
715
720
  sglang/test/attention/test_flashattn_mla_backend.py,sha256=g4O50WblTpM7_Gq2b76k0i25_z01BOUBQ4i6PmyxpO4,10774
716
721
  sglang/test/attention/test_prefix_chunk_info.py,sha256=hpoDe2wfSa6RlUbfyri_c0iyBTb35UXGL9I2Xh6jamM,7772
717
- sglang-0.4.8.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
718
- sglang-0.4.8.dist-info/METADATA,sha256=8Spz4kOLcrEbQuzVTiI4EE0A4Ldrdcholyuz3mNVUC4,26603
719
- sglang-0.4.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
720
- sglang-0.4.8.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
721
- sglang-0.4.8.dist-info/RECORD,,
722
+ sglang-0.4.8.post1.dist-info/licenses/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
723
+ sglang-0.4.8.post1.dist-info/METADATA,sha256=isDKzDsTthshFCkEmL3isGMcgn1uBG3M2mvGolPE_xc,26609
724
+ sglang-0.4.8.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
725
+ sglang-0.4.8.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
726
+ sglang-0.4.8.post1.dist-info/RECORD,,