sglang 0.3.6.post2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. sglang/bench_offline_throughput.py +55 -2
  2. sglang/bench_one_batch.py +7 -6
  3. sglang/bench_one_batch_server.py +4 -3
  4. sglang/bench_serving.py +13 -0
  5. sglang/check_env.py +1 -1
  6. sglang/launch_server.py +3 -2
  7. sglang/srt/_custom_ops.py +118 -0
  8. sglang/srt/configs/device_config.py +17 -0
  9. sglang/srt/configs/load_config.py +84 -0
  10. sglang/srt/configs/model_config.py +161 -4
  11. sglang/srt/configs/qwen2vl.py +5 -8
  12. sglang/srt/constrained/outlines_backend.py +6 -1
  13. sglang/srt/constrained/outlines_jump_forward.py +8 -1
  14. sglang/srt/distributed/__init__.py +3 -0
  15. sglang/srt/distributed/communication_op.py +34 -0
  16. sglang/srt/distributed/device_communicators/__init__.py +0 -0
  17. sglang/srt/distributed/device_communicators/cuda_wrapper.py +182 -0
  18. sglang/srt/distributed/device_communicators/custom_all_reduce.py +352 -0
  19. sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +291 -0
  20. sglang/srt/distributed/device_communicators/hpu_communicator.py +48 -0
  21. sglang/srt/distributed/device_communicators/pynccl.py +204 -0
  22. sglang/srt/distributed/device_communicators/pynccl_wrapper.py +362 -0
  23. sglang/srt/distributed/device_communicators/shm_broadcast.py +568 -0
  24. sglang/srt/distributed/device_communicators/xpu_communicator.py +47 -0
  25. sglang/srt/distributed/parallel_state.py +1275 -0
  26. sglang/srt/distributed/utils.py +223 -0
  27. sglang/srt/hf_transformers_utils.py +37 -1
  28. sglang/srt/layers/attention/flashinfer_backend.py +13 -15
  29. sglang/srt/layers/attention/torch_native_backend.py +285 -0
  30. sglang/srt/layers/fused_moe_patch.py +20 -11
  31. sglang/srt/layers/linear.py +1 -0
  32. sglang/srt/layers/logits_processor.py +17 -3
  33. sglang/srt/layers/quantization/__init__.py +34 -0
  34. sglang/srt/layers/vocab_parallel_embedding.py +1 -0
  35. sglang/srt/lora/lora.py +1 -1
  36. sglang/srt/managers/data_parallel_controller.py +7 -11
  37. sglang/srt/managers/detokenizer_manager.py +7 -4
  38. sglang/srt/managers/image_processor.py +1 -1
  39. sglang/srt/managers/io_struct.py +48 -12
  40. sglang/srt/managers/schedule_batch.py +42 -36
  41. sglang/srt/managers/schedule_policy.py +7 -4
  42. sglang/srt/managers/scheduler.py +111 -46
  43. sglang/srt/managers/session_controller.py +0 -3
  44. sglang/srt/managers/tokenizer_manager.py +169 -100
  45. sglang/srt/managers/tp_worker.py +36 -3
  46. sglang/srt/managers/tp_worker_overlap_thread.py +32 -5
  47. sglang/srt/model_executor/cuda_graph_runner.py +16 -7
  48. sglang/srt/model_executor/forward_batch_info.py +9 -4
  49. sglang/srt/model_executor/model_runner.py +136 -150
  50. sglang/srt/model_loader/__init__.py +34 -0
  51. sglang/srt/model_loader/loader.py +1139 -0
  52. sglang/srt/model_loader/utils.py +41 -0
  53. sglang/srt/model_loader/weight_utils.py +640 -0
  54. sglang/srt/models/baichuan.py +9 -10
  55. sglang/srt/models/chatglm.py +6 -15
  56. sglang/srt/models/commandr.py +2 -3
  57. sglang/srt/models/dbrx.py +2 -3
  58. sglang/srt/models/deepseek.py +4 -11
  59. sglang/srt/models/deepseek_v2.py +3 -11
  60. sglang/srt/models/exaone.py +2 -3
  61. sglang/srt/models/gemma.py +2 -6
  62. sglang/srt/models/gemma2.py +3 -14
  63. sglang/srt/models/gemma2_reward.py +0 -1
  64. sglang/srt/models/gpt2.py +5 -12
  65. sglang/srt/models/gpt_bigcode.py +6 -22
  66. sglang/srt/models/grok.py +14 -51
  67. sglang/srt/models/internlm2.py +2 -3
  68. sglang/srt/models/internlm2_reward.py +0 -1
  69. sglang/srt/models/llama.py +97 -27
  70. sglang/srt/models/llama_classification.py +1 -2
  71. sglang/srt/models/llama_embedding.py +1 -2
  72. sglang/srt/models/llama_reward.py +2 -3
  73. sglang/srt/models/llava.py +10 -12
  74. sglang/srt/models/llavavid.py +1 -2
  75. sglang/srt/models/minicpm.py +4 -7
  76. sglang/srt/models/minicpm3.py +6 -19
  77. sglang/srt/models/mixtral.py +12 -5
  78. sglang/srt/models/mixtral_quant.py +2 -3
  79. sglang/srt/models/mllama.py +3 -7
  80. sglang/srt/models/olmo.py +2 -8
  81. sglang/srt/models/olmo2.py +391 -0
  82. sglang/srt/models/olmoe.py +3 -5
  83. sglang/srt/models/phi3_small.py +8 -8
  84. sglang/srt/models/qwen.py +2 -3
  85. sglang/srt/models/qwen2.py +10 -9
  86. sglang/srt/models/qwen2_moe.py +4 -11
  87. sglang/srt/models/qwen2_vl.py +12 -9
  88. sglang/srt/models/registry.py +99 -0
  89. sglang/srt/models/stablelm.py +2 -3
  90. sglang/srt/models/torch_native_llama.py +6 -12
  91. sglang/srt/models/xverse.py +2 -4
  92. sglang/srt/models/xverse_moe.py +4 -11
  93. sglang/srt/models/yivl.py +2 -3
  94. sglang/srt/openai_api/adapter.py +10 -6
  95. sglang/srt/openai_api/protocol.py +1 -0
  96. sglang/srt/server.py +303 -204
  97. sglang/srt/server_args.py +65 -31
  98. sglang/srt/utils.py +253 -48
  99. sglang/test/test_utils.py +27 -7
  100. sglang/utils.py +2 -2
  101. sglang/version.py +1 -1
  102. {sglang-0.3.6.post2.dist-info → sglang-0.4.0.dist-info}/METADATA +2 -1
  103. sglang-0.4.0.dist-info/RECORD +184 -0
  104. sglang/srt/layers/fused_moe_grok/__init__.py +0 -1
  105. sglang/srt/layers/fused_moe_grok/fused_moe.py +0 -692
  106. sglang/srt/layers/fused_moe_grok/layer.py +0 -630
  107. sglang-0.3.6.post2.dist-info/RECORD +0 -164
  108. {sglang-0.3.6.post2.dist-info → sglang-0.4.0.dist-info}/LICENSE +0 -0
  109. {sglang-0.3.6.post2.dist-info → sglang-0.4.0.dist-info}/WHEEL +0 -0
  110. {sglang-0.3.6.post2.dist-info → sglang-0.4.0.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py CHANGED
@@ -22,7 +22,7 @@ from sglang.bench_serving import run_benchmark
22
22
  from sglang.global_config import global_config
23
23
  from sglang.lang.backend.openai import OpenAI
24
24
  from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
25
- from sglang.srt.utils import get_bool_env_var, kill_child_process
25
+ from sglang.srt.utils import get_bool_env_var, kill_process_tree
26
26
  from sglang.test.run_eval import run_eval
27
27
  from sglang.utils import get_exception_traceback
28
28
 
@@ -424,6 +424,7 @@ def popen_launch_server(
424
424
  port,
425
425
  *other_args,
426
426
  ]
427
+
427
428
  if api_key:
428
429
  command += ["--api-key", api_key]
429
430
 
@@ -504,7 +505,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float):
504
505
  )
505
506
  assert ret_code == 0
506
507
  except TimeoutError:
507
- kill_child_process(process.pid, include_self=True)
508
+ kill_process_tree(process.pid)
508
509
  time.sleep(5)
509
510
  print(
510
511
  f"\nTimeout after {timeout_per_file} seconds when running {filename}\n",
@@ -567,6 +568,7 @@ def run_bench_serving(
567
568
  disable_tqdm=False,
568
569
  disable_stream=disable_stream,
569
570
  disable_ignore_eos=False,
571
+ lora_name=None,
570
572
  extra_request_body=None,
571
573
  profile=None,
572
574
  )
@@ -578,7 +580,7 @@ def run_bench_serving(
578
580
  run_benchmark(warmup_args)
579
581
  res = run_benchmark(args)
580
582
  finally:
581
- kill_child_process(process.pid, include_self=True)
583
+ kill_process_tree(process.pid)
582
584
 
583
585
  assert res["completed"] == num_prompts
584
586
  return res
@@ -611,7 +613,7 @@ def run_bench_one_batch(model, other_args):
611
613
  lastline = output.split("\n")[-3]
612
614
  output_throughput = float(lastline.split(" ")[-2])
613
615
  finally:
614
- kill_child_process(process.pid, include_self=True)
616
+ kill_process_tree(process.pid)
615
617
 
616
618
  return output_throughput
617
619
 
@@ -677,8 +679,14 @@ def run_and_check_memory_leak(
677
679
  enable_mixed_chunk,
678
680
  disable_overlap,
679
681
  chunked_prefill_size,
682
+ assert_has_abort,
680
683
  ):
681
- other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
684
+ other_args = [
685
+ "--chunked-prefill-size",
686
+ str(chunked_prefill_size),
687
+ "--log-level",
688
+ "debug",
689
+ ]
682
690
  if disable_radix_cache:
683
691
  other_args += ["--disable-radix-cache"]
684
692
  if enable_mixed_chunk:
@@ -710,8 +718,8 @@ def run_and_check_memory_leak(
710
718
  workload_func(base_url, model)
711
719
 
712
720
  # Clean up everything
713
- kill_child_process(process.pid, include_self=True)
714
- kill_child_process(process.pid, include_self=True)
721
+ kill_process_tree(process.pid)
722
+ kill_process_tree(process.pid)
715
723
  stdout.close()
716
724
  stderr.close()
717
725
  if os.path.exists(STDOUT_FILENAME):
@@ -723,14 +731,19 @@ def run_and_check_memory_leak(
723
731
  # Assert success
724
732
  has_new_server = False
725
733
  has_leak = False
734
+ has_abort = False
726
735
  for line in output_lines:
727
736
  if "The server is fired" in line:
728
737
  has_new_server = True
729
738
  if "leak" in line:
730
739
  has_leak = True
740
+ if "Abort" in line:
741
+ has_abort = True
731
742
 
732
743
  assert has_new_server
733
744
  assert not has_leak
745
+ if assert_has_abort:
746
+ assert has_abort
734
747
 
735
748
 
736
749
  def run_mmlu_test(
@@ -761,6 +774,7 @@ def run_mmlu_test(
761
774
  enable_mixed_chunk,
762
775
  disable_overlap,
763
776
  chunked_prefill_size,
777
+ assert_has_abort=False,
764
778
  )
765
779
 
766
780
 
@@ -800,4 +814,10 @@ def run_mulit_request_test(
800
814
  enable_mixed_chunk,
801
815
  enable_overlap,
802
816
  chunked_prefill_size,
817
+ assert_has_abort=False,
803
818
  )
819
+
820
+
821
+ def write_github_step_summary(content):
822
+ with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
823
+ f.write(content)
sglang/utils.py CHANGED
@@ -348,9 +348,9 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
348
348
 
349
349
 
350
350
  def terminate_process(process):
351
- from sglang.srt.utils import kill_child_process
351
+ from sglang.srt.utils import kill_process_tree
352
352
 
353
- kill_child_process(process.pid, include_self=True)
353
+ kill_process_tree(process.pid)
354
354
 
355
355
 
356
356
  def print_highlight(html_content: str):
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.6.post2"
1
+ __version__ = "0.4.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.6.post2
3
+ Version: 0.4.0
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -241,6 +241,7 @@ Requires-Dist: sglang[runtime_common]; extra == "srt"
241
241
  Requires-Dist: torch; extra == "srt"
242
242
  Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
243
243
  Requires-Dist: cuda-python; extra == "srt"
244
+ Requires-Dist: flashinfer>=0.1.6; extra == "srt"
244
245
  Provides-Extra: srt-hip
245
246
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
246
247
  Requires-Dist: torch; extra == "srt-hip"
@@ -0,0 +1,184 @@
1
+ sglang/__init__.py,sha256=3M0oz0ZA8fULhV5LwQ4hxh-MRdHsOJRD1D63C60pdG4,1616
2
+ sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
3
+ sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
+ sglang/bench_offline_throughput.py,sha256=3OrFI26PmoVTU3pQrBFC50AZI7HpKKuk4vYycbkDjhY,12428
5
+ sglang/bench_one_batch.py,sha256=vxXSCQRTMeJUtJKsSoP6tLdoWTdFp1mhwsLpKHccs2c,15858
6
+ sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
7
+ sglang/bench_serving.py,sha256=Oa_Qi7YApv37jGDAmuIaZSIhayvRpKq9GZGZLXBU-9I,52924
8
+ sglang/check_env.py,sha256=q1sdYL-gcKSCeIZMk7sUMh9rjM71f-EUgp07OGPSbZM,5446
9
+ sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
10
+ sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
11
+ sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
+ sglang/utils.py,sha256=r4Dw-xffcrTRposls-gqyoYxjgJNYhVduK_6bDN_Vj4,11526
13
+ sglang/version.py,sha256=42STGor_9nKYXumfeV5tiyD_M8VdcddX7CEexmibPBk,22
14
+ sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
16
+ sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
17
+ sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
18
+ sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
19
+ sglang/lang/ir.py,sha256=zpzzAO1YVldhE95Vwz5hU_TQltu-xt8A6rfFr0PuIDA,18410
20
+ sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
21
+ sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
23
+ sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
24
+ sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
25
+ sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
26
+ sglang/lang/backend/runtime_endpoint.py,sha256=IWbrAKrUkzNOvwV6V9_y6pkTr2SUYEkKBT-3kirgad0,10514
27
+ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
28
+ sglang/srt/_custom_ops.py,sha256=Y4gyTDGhWz-W2Igq25Ojm8XFiyvkawW9I-79iwYvxJ0,3574
29
+ sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
30
+ sglang/srt/hf_transformers_utils.py,sha256=38Ms0H2-VMerOS6jnczcFtZMS6lhw9B5rSWKAfxVUfQ,7945
31
+ sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
32
+ sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
33
+ sglang/srt/server.py,sha256=uApz59VUGkRx_HCy6kaiKJFTmaOAvAM3o2yYjEWi9EM,34594
34
+ sglang/srt/server_args.py,sha256=NubTM6ocZY0YDSvPQbvBdURK-7E5kSKHA2Ze09O8_1I,32182
35
+ sglang/srt/utils.py,sha256=p-eYNMVQkw6Yw1b7MBSNWhVvykCuNfejqMjl7cLDq7A,40708
36
+ sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
37
+ sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
38
+ sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
39
+ sglang/srt/configs/load_config.py,sha256=TcPi_HY6xu5SiVZsxPOoB5pGeDUNebOk7muoUH9VBDg,3083
40
+ sglang/srt/configs/model_config.py,sha256=OjEeigs5tMNKP-RImJk2NHVFXv-fyQfsGREWMO3rqhM,15839
41
+ sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
42
+ sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
43
+ sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
44
+ sglang/srt/constrained/outlines_backend.py,sha256=LmezsMAmTlQgaPqO4Axl4EcSAqKr_ZYZASfAoVxT17A,6670
45
+ sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
46
+ sglang/srt/constrained/xgrammar_backend.py,sha256=4ZCQgcjWEY2Lg4r2V9sAiYJJblkQ_uVbEnvsjqhR1Pc,4548
47
+ sglang/srt/distributed/__init__.py,sha256=__tl9Frrf3PFrSyNYcn5i-y2rL-J4-Qn6RJwrsZ4xgc,83
48
+ sglang/srt/distributed/communication_op.py,sha256=ZoIhboZyefiAwr-1K-wF3rAFSQ4Wt-RxXpsX443Gbt4,1157
49
+ sglang/srt/distributed/parallel_state.py,sha256=HplRH5S0AWdwSdhoHYX9_UWQZlFjh2Z1LHaz68EXlpE,47555
50
+ sglang/srt/distributed/utils.py,sha256=riYflM9l1-Yi-8Ce8Acxa4mAjZaxHRQfta8Dtah4yG0,8500
51
+ sglang/srt/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=u8_kYVOBDrNZMiQCJC538yJvpZgq6ZEpB28tCrp04yM,7065
53
+ sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=5ARfr-1_V4QoxjvdfxOKPtSK_Rax8qAQTPoA5z_Emtc,13567
54
+ sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=qq8GTZl0br0ggfosb8mH3U6cXbm4NWfr8y_B83W4fDg,11081
55
+ sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=LXGOhoNT5iVu1JWlRvGfHMB0wRW6lkhDamVT9JhVD94,1755
56
+ sglang/srt/distributed/device_communicators/pynccl.py,sha256=cDEoHU24C8ph-4fJAIDjZfl53aSzrjCG3FAFkt4vjrM,7186
57
+ sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=pU4xhG-WKytSHJ-cpcPEs0WG4dAg44jpOgv2dAmHisE,11990
58
+ sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=WVxBd1QfIgRWzVGtN2axxO-3PFT-Qww8GQ82Yg5PPYU,22824
59
+ sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=P3WKgddcfpUhBa-_5PvjYxH146ZE-N1cotTzEpPRKlY,1620
60
+ sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwxBQ,5471
61
+ sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
62
+ sglang/srt/layers/fused_moe_patch.py,sha256=DMIyrwOON7OSidKZdreL5HzMhP0AD5Ues0xdY-ADOQw,4471
63
+ sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
64
+ sglang/srt/layers/linear.py,sha256=dF2HvqiMbhWlCjvkLFRCcgUFGhG-B0keM_CIpjvgTtg,46154
65
+ sglang/srt/layers/logits_processor.py,sha256=oZNu9pNNgmswhuw8irlLm0SfpVrD7cFf-GdfPsLZGHE,13227
66
+ sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
67
+ sglang/srt/layers/radix_attention.py,sha256=C_mK4mfmKlxMRNeKYP9E5R3PRd3eT-OcE_g3mo36dJM,2058
68
+ sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
69
+ sglang/srt/layers/sampler.py,sha256=_enfER8MSxsCYrR6_NgyFxKA_XqKtii_asOZUFUUsd8,4580
70
+ sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
71
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=slGwLiWjuFLCUdRe-GTlfumyZpqVX9VF6No_UGOT-hA,21624
72
+ sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
73
+ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
74
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=q0yPeKUjGbxKnOqbZLHs5fyYkkVE3YEkBBMWtUaiDL4,24611
75
+ sglang/srt/layers/attention/torch_native_backend.py,sha256=amR8xGaaCONvo3M8I4nuYMv6OxPHyYoxu3m8vAqjWfo,10295
76
+ sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
77
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=BE63WhKiutSNkhJLsRwvfsRy-ExvuAv7FZyoWv73ul8,18744
78
+ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
79
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=Gfct-0_l-S2ZrP4F-zkzNiFbmd3C3f7uJovacOuDxaA,11472
80
+ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
81
+ sglang/srt/layers/fused_moe_triton/__init__.py,sha256=PHKFqd2hPOO-g9kSMseg2g76lpg9OGXQDThWU6bt9vs,902
82
+ sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=qwfRBOeY5DT48Q6z71Eh9cjFehvs_K6eLIVWNL044Ug,28363
83
+ sglang/srt/layers/fused_moe_triton/layer.py,sha256=URDkTt8xEqnqpO5tb_3L7JlhlO53VWfqDDNSRYEu-LY,21545
84
+ sglang/srt/layers/quantization/__init__.py,sha256=0LnBJ2LOtk7HnMzOZm14tMd4Y0eSPSgqUhj9uOn53Es,5961
85
+ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
86
+ sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
87
+ sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
88
+ sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
89
+ sglang/srt/managers/data_parallel_controller.py,sha256=psI4FAuBGjtdnEuwagnGdtRqvqSSxOROfNKQqVDqlVA,8382
90
+ sglang/srt/managers/detokenizer_manager.py,sha256=TtrtE37XT5XcJzk8-R5rHZ16NHTPd5XZi8hf3h-sB2A,7462
91
+ sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
92
+ sglang/srt/managers/io_struct.py,sha256=d_kctmHcNBzzaP5lUEIpdrVVsob4dNOetMHkobUJZz4,14439
93
+ sglang/srt/managers/schedule_batch.py,sha256=p6OmeCH3arGZ99Ch7JrnTgGU-SsPgjQY7Z6arVzeqao,44998
94
+ sglang/srt/managers/schedule_policy.py,sha256=7QuIsJDRzkrvs3IJk10oOfL4Me0UZwDYvRniT1fSFuo,12620
95
+ sglang/srt/managers/scheduler.py,sha256=fmvwVcZS5DsDdxIJ4bjlLV-qJsRbTxjsqPP5a2SS0C8,59372
96
+ sglang/srt/managers/session_controller.py,sha256=Yp-IV3rXczACZxZXmF-QxW9CWICGy8KHQ9ttBGJ8WXA,2800
97
+ sglang/srt/managers/tokenizer_manager.py,sha256=XPaSXB6b23u95viFqlqd-tdyrNMMOOiSDWviz_g7UBM,29890
98
+ sglang/srt/managers/tp_worker.py,sha256=X1EwFX3FSsmXx7jeeX2tjZRocaujabQYWm-M-0CFEBE,7363
99
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=ZBLCAYz-Ls1coObyO8dNtNsO6gG2rn40KulEmALB_J8,8686
100
+ sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
101
+ sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
102
+ sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
103
+ sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
104
+ sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
105
+ sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
106
+ sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
107
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=ffypnhzwAdiBAngHEpK9HVmpvX0H3dFIKGa45tS0IGI,15197
108
+ sglang/srt/model_executor/forward_batch_info.py,sha256=L5mVoW5SaO6To-7nGk0TZM-FFB5_78cARpJ-aC2rwD0,12883
109
+ sglang/srt/model_executor/model_runner.py,sha256=wU06s5tpHgtxitmAWGQC1NjLIcp68iAdbFDWN-TtkBU,29260
110
+ sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
111
+ sglang/srt/model_loader/loader.py,sha256=VBrY4W9CiVvS_D8yXhdkW9jReV9rSMSkJplabz0Fxgk,43528
112
+ sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
113
+ sglang/srt/model_loader/weight_utils.py,sha256=kQo9KPThjH3HAOCfC_tdwdrshdWuWJOVpPR0skSyaRY,24193
114
+ sglang/srt/models/baichuan.py,sha256=PzBOFcEAixakPEkQSaJwC0Xc1fu-yCsN9T0I67r8QmY,14919
115
+ sglang/srt/models/chatglm.py,sha256=DOrEhmb0s-yPId88R6nJeLOTUEtogk-vkB69qT2JdWc,12913
116
+ sglang/srt/models/commandr.py,sha256=-cswAK2_ZTcOsvBkv5Z8SZ1x60cxJryslbSPHDOcnbM,14169
117
+ sglang/srt/models/dbrx.py,sha256=2Wqcf3sv57l4gi2xH8yrb5WSmY-4_kbbf6fhpJ4aKWw,14581
118
+ sglang/srt/models/deepseek.py,sha256=BVNICGoLjQoHmR5lc31YrZ6YbxSRTBilHqlLsALr2u8,15693
119
+ sglang/srt/models/deepseek_v2.py,sha256=2AWdDb5qZ6dj_M6ZY-rCG0omTmerUxbhwuvI9Um4Bg4,31967
120
+ sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
121
+ sglang/srt/models/gemma.py,sha256=ydRqsG-7004r1fAiz01LHUmcj_6XN0Tn4xO1keJnMQk,12126
122
+ sglang/srt/models/gemma2.py,sha256=vPrAasJajitQHB9ZqMFut58xNsOm3fk2m05a-feQL10,14600
123
+ sglang/srt/models/gemma2_reward.py,sha256=hJw0hXNPyQSpazkVJVYiW04OtTZH0GiLI-JJef_kaGs,2529
124
+ sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
125
+ sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
126
+ sglang/srt/models/grok.py,sha256=lJQEk2D8zyAXE0g4vXLCEmiWM938K_qly02EScwwV_k,13942
127
+ sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
128
+ sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
129
+ sglang/srt/models/llama.py,sha256=JVwVDU-8L8RSNOYH0CK8FI557xqjPU2Ts_pQhVfGZv4,19550
130
+ sglang/srt/models/llama_classification.py,sha256=EdXmiMyfJ9NH5P-Wel7SRhf_v8ddFFhVJMQgzDt0oVk,3377
131
+ sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
132
+ sglang/srt/models/llama_reward.py,sha256=JVaiTK4gVXNMimeq3kKkv7dt5Hc77hPqF4ewvmzjJes,4622
133
+ sglang/srt/models/llava.py,sha256=l9mqS9wl_l6ARC-K1UUe7XsB5k9sZratMNQEwx5IjR0,25229
134
+ sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
135
+ sglang/srt/models/minicpm.py,sha256=ws4AqhOfAvYHGd04QuXCZel-Oxy9_vN4p4rTjs9RSz0,13723
136
+ sglang/srt/models/minicpm3.py,sha256=YIKJDTpwjmpLlv1sNT93k2yZMvGQlI_H87czjf6QYyo,24707
137
+ sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
138
+ sglang/srt/models/mixtral.py,sha256=KvvtLWn-GLtwbdrt4PBq9gIFJiJH-JFcj0BLfcLcXjo,14322
139
+ sglang/srt/models/mixtral_quant.py,sha256=uuVO1nWUZJiDhbqZN6gzSMwyfpyZorMuFXHeMCGo7N0,14022
140
+ sglang/srt/models/mllama.py,sha256=3kX-UqeTSYZL5kPNdkfKEAEv3DpSAW1ArAAoeiXVzIc,37739
141
+ sglang/srt/models/olmo.py,sha256=OCDMtX1OI83r80mzU4FMC3Tg8cleQ-7C8Tpoe8zgzss,11708
142
+ sglang/srt/models/olmo2.py,sha256=aC7svioN7XT5owRxPrvhvWBNMON9QXGQBWJ1KHMyXeA,13442
143
+ sglang/srt/models/olmoe.py,sha256=Rw-3YrHWd90MZQFnmcfUQ-3wAaI0PCFKb0DIrCDND3s,15347
144
+ sglang/srt/models/phi3_small.py,sha256=rQFv-4aUGkNPTQjTEME5bcOL9DEQqqFb3aDKrj2joOA,15083
145
+ sglang/srt/models/qwen.py,sha256=_FKDbwaS5C07uJyyivZpBrXJVej4Ph9ivzJdzWJPxJ4,9904
146
+ sglang/srt/models/qwen2.py,sha256=Kh6mW0H2jQdrPS9dJnJShLpo0BNEq6oI4oy5VMHGzac,12444
147
+ sglang/srt/models/qwen2_moe.py,sha256=5-ZTYW5bERhTG5LnNXMqYfWB17NBkvxmQJu6DuDAFCo,16819
148
+ sglang/srt/models/qwen2_vl.py,sha256=3EaUlTbyWOTRXA7eViK1WqmVbCFhXLIpnos49zzf-yM,26561
149
+ sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
150
+ sglang/srt/models/stablelm.py,sha256=iBlIkM7CQmqI25nsujWk0LLCQD7TshzUU8qzZYYrt20,11311
151
+ sglang/srt/models/torch_native_llama.py,sha256=dHuUln7RcsRcdyOIUw7HKtOOG9OPmnfOpK8j5gYXzJY,19039
152
+ sglang/srt/models/xverse.py,sha256=Oq--KqvbYu2H4TMVGEHpSnJLEwXBpxlncR9ilsQeckc,13579
153
+ sglang/srt/models/xverse_moe.py,sha256=AawKEQw--oAl-yzwCjoaZRG7q3rdkyDiam3FS0zjf_c,15537
154
+ sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
155
+ sglang/srt/openai_api/adapter.py,sha256=gZEaG1dVSFv9WLj0369Ke1yrNNgi_gpgKxPt5Ju9mUw,53775
156
+ sglang/srt/openai_api/protocol.py,sha256=4T9hGCrpfCUSBjKZFvemTfj49CkTUzpCcx6izLv3ir0,10246
157
+ sglang/srt/sampling/sampling_batch_info.py,sha256=YC-KPyDWyLGNPL4YVcst4xwP8Wlz2zcCNJHB_5zljXQ,8470
158
+ sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
159
+ sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
160
+ sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
161
+ sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
162
+ sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
163
+ sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
164
+ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
165
+ sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
166
+ sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
167
+ sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
168
+ sglang/test/runners.py,sha256=ANzjrHkT_1E0G3UcD47O8XEKst3Si4AOfx-uErbFS7o,15129
169
+ sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
170
+ sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
171
+ sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
172
+ sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
173
+ sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
174
+ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
175
+ sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
176
+ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
177
+ sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
178
+ sglang/test/test_utils.py,sha256=0lY3ZNfS3JCB4LqSRJgBfB8I0MA8TUT-BJmnrvQC8vw,23797
179
+ sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
180
+ sglang-0.4.0.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
181
+ sglang-0.4.0.dist-info/METADATA,sha256=dXh43Qx9ImYKIF-eB8vuiz_0DDbmNgUrOjCukOXCBTA,22165
182
+ sglang-0.4.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
183
+ sglang-0.4.0.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
184
+ sglang-0.4.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- from sglang.srt.layers.fused_moe_grok.layer import FusedMoE, FusedMoEMethodBase