sglang 0.4.9.post6__tar.gz → 0.4.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (801) hide show
  1. {sglang-0.4.9.post6/sglang.egg-info → sglang-0.4.10}/PKG-INFO +3 -4
  2. {sglang-0.4.9.post6 → sglang-0.4.10}/README.md +0 -1
  3. {sglang-0.4.9.post6 → sglang-0.4.10}/pyproject.toml +3 -3
  4. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/bench_one_batch.py +3 -0
  5. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/__init__.py +8 -0
  6. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/model_config.py +3 -0
  7. sglang-0.4.10/sglang/srt/configs/step3_vl.py +172 -0
  8. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/conversation.py +23 -0
  9. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/decode.py +2 -8
  10. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/prefill.py +2 -6
  11. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/parallel_state.py +86 -1
  12. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/engine.py +14 -18
  13. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/http_server.py +10 -2
  14. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_chat.py +2 -21
  15. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/expert_distribution.py +5 -0
  16. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/expert_location.py +17 -6
  17. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/expert_location_dispatch.py +1 -0
  18. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/expert_location_updater.py +2 -0
  19. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/function_call_parser.py +2 -0
  20. sglang-0.4.10/sglang/srt/function_call/step3_detector.py +436 -0
  21. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/hf_transformers_utils.py +2 -0
  22. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/jinja_template_utils.py +4 -1
  23. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/cutlass_moe.py +2 -1
  24. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/ep_moe/layer.py +20 -640
  25. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +26 -13
  26. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/layer.py +97 -38
  27. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/fp8.py +0 -18
  28. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/unquant.py +0 -8
  29. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/w4afp8.py +1 -0
  30. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/cache_controller.py +143 -45
  31. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/data_parallel_controller.py +2 -0
  32. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/io_struct.py +0 -2
  33. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/scheduler.py +89 -671
  34. sglang-0.4.10/sglang/srt/managers/scheduler_metrics_mixin.py +229 -0
  35. sglang-0.4.10/sglang/srt/managers/scheduler_profiler_mixin.py +279 -0
  36. sglang-0.4.10/sglang/srt/managers/scheduler_update_weights_mixin.py +142 -0
  37. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/template_manager.py +62 -19
  38. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/tokenizer_manager.py +123 -74
  39. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/tp_worker.py +4 -0
  40. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/tp_worker_overlap_thread.py +2 -1
  41. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/hicache_storage.py +45 -11
  42. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/hiradix_cache.py +15 -4
  43. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/memory_pool_host.py +73 -1
  44. sglang-0.4.10/sglang/srt/mem_cache/mooncake_store/mooncake_store.py +264 -0
  45. sglang-0.4.10/sglang/srt/mem_cache/mooncake_store/unit_test.py +40 -0
  46. sglang-0.4.10/sglang/srt/mem_cache/storage/hf3fs/client_hf3fs.py +177 -0
  47. sglang-0.4.10/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +278 -0
  48. sglang-0.4.10/sglang/srt/mem_cache/storage/hf3fs/test_hf3fs_utils.py +43 -0
  49. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_executor/model_runner.py +5 -0
  50. sglang-0.4.10/sglang/srt/models/arcee.py +532 -0
  51. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/deepseek_v2.py +2 -0
  52. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/glm4_moe.py +3 -1
  53. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/granitemoe.py +3 -0
  54. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/grok.py +3 -0
  55. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/hunyuan.py +1 -0
  56. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama4.py +3 -0
  57. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mixtral.py +3 -0
  58. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/olmoe.py +3 -0
  59. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phimoe.py +1 -0
  60. sglang-0.4.10/sglang/srt/models/step3_vl.py +994 -0
  61. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/base_processor.py +15 -16
  62. sglang-0.4.10/sglang/srt/multimodal/processors/step3_vl.py +515 -0
  63. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/reasoning_parser.py +2 -1
  64. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/server_args.py +10 -13
  65. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/eagle_worker.py +2 -0
  66. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/utils.py +0 -11
  67. sglang-0.4.10/sglang/version.py +1 -0
  68. {sglang-0.4.9.post6 → sglang-0.4.10/sglang.egg-info}/PKG-INFO +3 -4
  69. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang.egg-info/SOURCES.txt +13 -0
  70. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang.egg-info/requires.txt +2 -2
  71. sglang-0.4.9.post6/sglang/version.py +0 -1
  72. {sglang-0.4.9.post6 → sglang-0.4.10}/LICENSE +0 -0
  73. {sglang-0.4.9.post6 → sglang-0.4.10}/setup.cfg +0 -0
  74. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/__init__.py +0 -0
  75. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/api.py +0 -0
  76. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/bench_offline_throughput.py +0 -0
  77. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/bench_one_batch_server.py +0 -0
  78. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/bench_serving.py +0 -0
  79. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/check_env.py +0 -0
  80. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/compile_deep_gemm.py +0 -0
  81. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/eval/llama3_eval.py +0 -0
  82. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/eval/loogle_eval.py +0 -0
  83. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/global_config.py +0 -0
  84. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/__init__.py +0 -0
  85. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/anthropic.py +0 -0
  86. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/base_backend.py +0 -0
  87. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/litellm.py +0 -0
  88. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/openai.py +0 -0
  89. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/runtime_endpoint.py +0 -0
  90. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/backend/vertexai.py +0 -0
  91. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/chat_template.py +0 -0
  92. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/choices.py +0 -0
  93. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/compiler.py +0 -0
  94. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/interpreter.py +0 -0
  95. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/ir.py +0 -0
  96. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/lang/tracer.py +0 -0
  97. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/launch_server.py +0 -0
  98. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/profiler.py +0 -0
  99. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/_custom_ops.py +0 -0
  100. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/aio_rwlock.py +0 -0
  101. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/code_completion_parser.py +0 -0
  102. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/chatglm.py +0 -0
  103. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/dbrx.py +0 -0
  104. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/deepseekvl2.py +0 -0
  105. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/device_config.py +0 -0
  106. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/exaone.py +0 -0
  107. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/internvl.py +0 -0
  108. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/janus_pro.py +0 -0
  109. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/kimi_vl.py +0 -0
  110. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/kimi_vl_moonvit.py +0 -0
  111. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/load_config.py +0 -0
  112. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/update_config.py +0 -0
  113. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/configs/utils.py +0 -0
  114. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/__init__.py +0 -0
  115. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/base_connector.py +0 -0
  116. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/redis.py +0 -0
  117. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/s3.py +0 -0
  118. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/serde/__init__.py +0 -0
  119. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/serde/safe_serde.py +0 -0
  120. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/serde/serde.py +0 -0
  121. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/connector/utils.py +0 -0
  122. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constants.py +0 -0
  123. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/base_grammar_backend.py +0 -0
  124. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/llguidance_backend.py +0 -0
  125. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/outlines_backend.py +0 -0
  126. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/outlines_jump_forward.py +0 -0
  127. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/reasoner_grammar_backend.py +0 -0
  128. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/triton_ops/bitmask_ops.py +0 -0
  129. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/constrained/xgrammar_backend.py +0 -0
  130. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/custom_op.py +0 -0
  131. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/debug_utils/__init__.py +0 -0
  132. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/debug_utils/dump_comparator.py +0 -0
  133. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/debug_utils/dumper.py +0 -0
  134. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/debug_utils/text_comparator.py +0 -0
  135. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/ascend/__init__.py +0 -0
  136. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/ascend/conn.py +0 -0
  137. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/ascend/transfer_engine.py +0 -0
  138. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/base/__init__.py +0 -0
  139. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/base/conn.py +0 -0
  140. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/common/__init__.py +0 -0
  141. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/common/conn.py +0 -0
  142. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/common/utils.py +0 -0
  143. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/decode_schedule_batch_mixin.py +0 -0
  144. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/fake/__init__.py +0 -0
  145. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/fake/conn.py +0 -0
  146. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/kv_events.py +0 -0
  147. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/launch_lb.py +0 -0
  148. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/mini_lb.py +0 -0
  149. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/mooncake/__init__.py +0 -0
  150. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/mooncake/conn.py +0 -0
  151. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/mooncake/transfer_engine.py +0 -0
  152. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/nixl/__init__.py +0 -0
  153. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/nixl/conn.py +0 -0
  154. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/disaggregation/utils.py +0 -0
  155. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/__init__.py +0 -0
  156. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/communication_op.py +0 -0
  157. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/cuda_wrapper.py +0 -0
  158. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/custom_all_reduce.py +0 -0
  159. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +0 -0
  160. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/hpu_communicator.py +0 -0
  161. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/npu_communicator.py +0 -0
  162. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/pymscclpp.py +0 -0
  163. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/pynccl.py +0 -0
  164. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/pynccl_wrapper.py +0 -0
  165. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/quick_all_reduce.py +0 -0
  166. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/shm_broadcast.py +0 -0
  167. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/device_communicators/xpu_communicator.py +0 -0
  168. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/distributed/utils.py +0 -0
  169. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/EngineBase.py +0 -0
  170. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/http_server_engine.py +0 -0
  171. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/__init__.py +0 -0
  172. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/protocol.py +0 -0
  173. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_base.py +0 -0
  174. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_completions.py +0 -0
  175. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_embedding.py +0 -0
  176. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_rerank.py +0 -0
  177. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/serving_score.py +0 -0
  178. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/usage_processor.py +0 -0
  179. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/entrypoints/openai/utils.py +0 -0
  180. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/__init__.py +0 -0
  181. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_algorithms/__init__.py +0 -0
  182. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_algorithms/deepseek.py +0 -0
  183. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py +0 -0
  184. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_manager.py +0 -0
  185. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_simulator/__init__.py +0 -0
  186. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/eplb/eplb_simulator/reader.py +0 -0
  187. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/base_format_detector.py +0 -0
  188. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/core_types.py +0 -0
  189. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/deepseekv3_detector.py +0 -0
  190. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/ebnf_composer.py +0 -0
  191. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/glm4_moe_detector.py +0 -0
  192. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/kimik2_detector.py +0 -0
  193. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/llama32_detector.py +0 -0
  194. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/mistral_detector.py +0 -0
  195. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/pythonic_detector.py +0 -0
  196. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/qwen25_detector.py +0 -0
  197. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/qwen3_coder_detector.py +0 -0
  198. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/function_call/utils.py +0 -0
  199. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/activation.py +0 -0
  200. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/amx_utils.py +0 -0
  201. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/aiter_backend.py +0 -0
  202. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/ascend_backend.py +0 -0
  203. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/base_attn_backend.py +0 -0
  204. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/cutlass_mla_backend.py +0 -0
  205. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/double_sparsity_backend.py +0 -0
  206. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/flashattention_backend.py +0 -0
  207. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/flashinfer_backend.py +0 -0
  208. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/flashinfer_mla_backend.py +0 -0
  209. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/flashmla_backend.py +0 -0
  210. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/hybrid_attn_backend.py +0 -0
  211. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/intel_amx_backend.py +0 -0
  212. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/merge_state.py +0 -0
  213. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/tbo_backend.py +0 -0
  214. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/torch_native_backend.py +0 -0
  215. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_backend.py +0 -0
  216. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/decode_attention.py +0 -0
  217. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +0 -0
  218. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/extend_attention.py +0 -0
  219. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/merge_state.py +0 -0
  220. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/prefill_attention.py +0 -0
  221. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py +0 -0
  222. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/utils.py +0 -0
  223. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/attention/vision.py +0 -0
  224. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/communicator.py +0 -0
  225. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/dp_attention.py +0 -0
  226. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/elementwise.py +0 -0
  227. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/flashinfer_comm_fusion.py +0 -0
  228. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/layernorm.py +0 -0
  229. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/linear.py +0 -0
  230. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/logits_processor.py +0 -0
  231. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/cutlass_moe_params.py +0 -0
  232. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/cutlass_w4a8_moe.py +0 -0
  233. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/ep_moe/__init__.py +0 -0
  234. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/ep_moe/kernels.py +0 -0
  235. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +0 -0
  236. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_native.py +0 -0
  237. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/__init__.py +0 -0
  238. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  239. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  240. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  241. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  242. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  243. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  244. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  245. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  246. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  247. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  248. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  249. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  250. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  251. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H200.json +0 -0
  252. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  253. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  254. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  255. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  256. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  257. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  258. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  259. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  260. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  261. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  262. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  263. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  264. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  265. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  266. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  267. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  268. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  269. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  270. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  271. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  272. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  273. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  274. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  275. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  276. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  277. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  278. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  279. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  280. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json +0 -0
  281. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  282. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  283. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  284. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  285. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  286. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  287. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  288. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  289. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  290. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json +0 -0
  291. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json +0 -0
  292. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  293. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  294. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  295. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  296. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  297. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  298. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200.json +0 -0
  299. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  300. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  301. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200.json +0 -0
  302. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  303. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  304. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  305. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200.json +0 -0
  306. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  307. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  308. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  309. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +0 -0
  310. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  311. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  312. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  313. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200.json +0 -0
  314. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +0 -0
  315. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +0 -0
  316. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Radeon_Graphics.json +0 -0
  317. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  318. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  319. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200.json +0 -0
  320. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +0 -0
  321. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +0 -0
  322. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Radeon_Graphics.json +0 -0
  323. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  324. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  325. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  326. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  327. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200.json +0 -0
  328. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  329. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  330. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  331. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  332. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200.json +0 -0
  333. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +0 -0
  334. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +0 -0
  335. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Radeon_Graphics.json +0 -0
  336. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  337. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  338. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +0 -0
  339. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  340. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  341. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  342. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200.json +0 -0
  343. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_L40S.json +0 -0
  344. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +0 -0
  345. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +0 -0
  346. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +0 -0
  347. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  348. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  349. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  350. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  351. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200.json +0 -0
  352. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +0 -0
  353. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +0 -0
  354. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Radeon_Graphics.json +0 -0
  355. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  356. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  357. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  358. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  359. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200.json +0 -0
  360. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +0 -0
  361. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +0 -0
  362. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +0 -0
  363. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  364. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  365. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  366. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  367. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H20.json +0 -0
  368. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H200.json +0 -0
  369. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  370. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  371. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20.json +0 -0
  372. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  373. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200.json +0 -0
  374. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  375. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  376. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  377. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  378. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20.json +0 -0
  379. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  380. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200.json +0 -0
  381. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=96,device_name=NVIDIA_H20.json +0 -0
  382. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=160,N=320,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  383. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  384. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  385. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  386. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  387. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  388. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  389. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  390. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  391. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  392. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  393. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  394. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  395. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  396. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  397. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  398. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  399. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  400. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +0 -0
  401. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  402. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  403. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  404. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  405. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  406. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  407. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  408. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  409. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +0 -0
  410. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/router.py +0 -0
  411. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/token_dispatcher/__init__.py +0 -0
  412. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/token_dispatcher/base_dispatcher.py +0 -0
  413. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/token_dispatcher/standard.py +0 -0
  414. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/moe/topk.py +0 -0
  415. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/multimodal.py +0 -0
  416. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/parameter.py +0 -0
  417. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/pooler.py +0 -0
  418. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/__init__.py +0 -0
  419. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/awq.py +0 -0
  420. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/awq_triton.py +0 -0
  421. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/base_config.py +0 -0
  422. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/blockwise_int8.py +0 -0
  423. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/__init__.py +0 -0
  424. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +0 -0
  425. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +0 -0
  426. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +0 -0
  427. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py +0 -0
  428. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +0 -0
  429. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +0 -0
  430. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/compressed_tensors/utils.py +0 -0
  431. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  432. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  433. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  434. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  435. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  436. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  437. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  438. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  439. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  440. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  441. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  442. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  443. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  444. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  445. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  446. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  447. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  448. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  449. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  450. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  451. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  452. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  453. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  454. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  455. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  456. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  457. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  458. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  459. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  460. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  461. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  462. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  463. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  464. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  465. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  466. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  467. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  468. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  469. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  470. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  471. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  472. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  473. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  474. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  475. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  476. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  477. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  478. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  479. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  480. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  481. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  482. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  483. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  484. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  485. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  486. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  487. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  488. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  489. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  490. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  491. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  492. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  493. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  494. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  495. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  496. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  497. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  498. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  499. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  500. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  501. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  502. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  503. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  504. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  505. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  506. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  507. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  508. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  509. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  510. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  511. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  512. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  513. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  514. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  515. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  516. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  517. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  518. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  519. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  520. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  521. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  522. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  523. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  524. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  525. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  526. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  527. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  528. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  529. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  530. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  531. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  532. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  533. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  534. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  535. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  536. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  537. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  538. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  539. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  540. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  541. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  542. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  543. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  544. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  545. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  546. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  547. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  548. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  549. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  550. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  551. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  552. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  553. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  554. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  555. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  556. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  557. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  558. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  559. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  560. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  561. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  562. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  563. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  564. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  565. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  566. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  567. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  568. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  569. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  570. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  571. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  572. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  573. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  574. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  575. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  576. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  577. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  578. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  579. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  580. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  581. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  582. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  583. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py +0 -0
  584. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +0 -0
  585. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +0 -0
  586. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py +0 -0
  587. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/fp8_kernel.py +0 -0
  588. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/fp8_utils.py +0 -0
  589. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/gptq.py +0 -0
  590. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/int8_kernel.py +0 -0
  591. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/int8_utils.py +0 -0
  592. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/kv_cache.py +0 -0
  593. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/marlin_utils.py +0 -0
  594. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/modelopt_quant.py +0 -0
  595. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/moe_wna16.py +0 -0
  596. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/petit.py +0 -0
  597. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/petit_utils.py +0 -0
  598. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/qoq.py +0 -0
  599. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/scalar_type.py +0 -0
  600. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/utils.py +0 -0
  601. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/w8a8_fp8.py +0 -0
  602. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/quantization/w8a8_int8.py +0 -0
  603. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/radix_attention.py +0 -0
  604. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/rotary_embedding.py +0 -0
  605. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/sampler.py +0 -0
  606. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/torchao_utils.py +0 -0
  607. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/utils.py +0 -0
  608. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/layers/vocab_parallel_embedding.py +0 -0
  609. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/backend/base_backend.py +0 -0
  610. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/backend/flashinfer_backend.py +0 -0
  611. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/backend/triton_backend.py +0 -0
  612. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/layers.py +0 -0
  613. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/lora.py +0 -0
  614. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/lora_config.py +0 -0
  615. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/lora_manager.py +0 -0
  616. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/lora_registry.py +0 -0
  617. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/mem_pool.py +0 -0
  618. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/triton_ops/__init__.py +0 -0
  619. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/triton_ops/gate_up_lora_b.py +0 -0
  620. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/triton_ops/qkv_lora_b.py +0 -0
  621. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/triton_ops/sgemm_lora_a.py +0 -0
  622. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/triton_ops/sgemm_lora_b.py +0 -0
  623. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/lora/utils.py +0 -0
  624. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/configure_logging.py +0 -0
  625. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/detokenizer_manager.py +0 -0
  626. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/mm_utils.py +0 -0
  627. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/multimodal_processor.py +0 -0
  628. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/schedule_batch.py +0 -0
  629. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/schedule_policy.py +0 -0
  630. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/scheduler_input_blocker.py +0 -0
  631. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/scheduler_output_processor_mixin.py +0 -0
  632. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/session_controller.py +0 -0
  633. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/managers/utils.py +0 -0
  634. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/allocator.py +0 -0
  635. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/base_prefix_cache.py +0 -0
  636. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/chunk_cache.py +0 -0
  637. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/flush_cache.py +0 -0
  638. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/memory_pool.py +0 -0
  639. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/multimodal_cache.py +0 -0
  640. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/radix_cache.py +0 -0
  641. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/mem_cache/swa_radix_cache.py +0 -0
  642. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/metrics/collector.py +0 -0
  643. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/metrics/func_timer.py +0 -0
  644. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_executor/cuda_graph_runner.py +0 -0
  645. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_executor/forward_batch_info.py +0 -0
  646. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_loader/__init__.py +0 -0
  647. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_loader/loader.py +0 -0
  648. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_loader/utils.py +0 -0
  649. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_loader/weight_utils.py +0 -0
  650. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/model_parallel.py +0 -0
  651. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/baichuan.py +0 -0
  652. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/bert.py +0 -0
  653. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/chatglm.py +0 -0
  654. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/clip.py +0 -0
  655. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/commandr.py +0 -0
  656. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/dbrx.py +0 -0
  657. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/deepseek.py +0 -0
  658. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/deepseek_janus_pro.py +0 -0
  659. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/deepseek_nextn.py +0 -0
  660. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/deepseek_vl2.py +0 -0
  661. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/exaone.py +0 -0
  662. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma.py +0 -0
  663. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma2.py +0 -0
  664. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma2_reward.py +0 -0
  665. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma3_causal.py +0 -0
  666. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma3_mm.py +0 -0
  667. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma3n_audio.py +0 -0
  668. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma3n_causal.py +0 -0
  669. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gemma3n_mm.py +0 -0
  670. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/glm4.py +0 -0
  671. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/glm4_moe_nextn.py +0 -0
  672. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gpt2.py +0 -0
  673. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/gpt_bigcode.py +0 -0
  674. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/granite.py +0 -0
  675. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/idefics2.py +0 -0
  676. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/internlm2.py +0 -0
  677. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/internlm2_reward.py +0 -0
  678. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/interns1.py +0 -0
  679. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/internvl.py +0 -0
  680. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/kimi_vl.py +0 -0
  681. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/kimi_vl_moonvit.py +0 -0
  682. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama.py +0 -0
  683. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama_classification.py +0 -0
  684. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama_eagle.py +0 -0
  685. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama_eagle3.py +0 -0
  686. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama_embedding.py +0 -0
  687. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llama_reward.py +0 -0
  688. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llava.py +0 -0
  689. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/llavavid.py +0 -0
  690. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mimo.py +0 -0
  691. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mimo_mtp.py +0 -0
  692. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/minicpm.py +0 -0
  693. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/minicpm3.py +0 -0
  694. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/minicpmo.py +0 -0
  695. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/minicpmv.py +0 -0
  696. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mistral.py +0 -0
  697. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mixtral_quant.py +0 -0
  698. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mllama.py +0 -0
  699. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/mllama4.py +0 -0
  700. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/olmo.py +0 -0
  701. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/olmo2.py +0 -0
  702. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/persimmon.py +0 -0
  703. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phi.py +0 -0
  704. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phi3_small.py +0 -0
  705. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phi4mm.py +0 -0
  706. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phi4mm_audio.py +0 -0
  707. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/phi4mm_utils.py +0 -0
  708. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/pixtral.py +0 -0
  709. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen.py +0 -0
  710. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2.py +0 -0
  711. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_5_vl.py +0 -0
  712. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_audio.py +0 -0
  713. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_classification.py +0 -0
  714. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_eagle.py +0 -0
  715. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_moe.py +0 -0
  716. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_rm.py +0 -0
  717. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen2_vl.py +0 -0
  718. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen3.py +0 -0
  719. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/qwen3_moe.py +0 -0
  720. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/registry.py +0 -0
  721. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/roberta.py +0 -0
  722. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/siglip.py +0 -0
  723. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/stablelm.py +0 -0
  724. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/torch_native_llama.py +0 -0
  725. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/transformers.py +0 -0
  726. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/vila.py +0 -0
  727. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/xverse.py +0 -0
  728. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/xverse_moe.py +0 -0
  729. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/models/yivl.py +0 -0
  730. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/mm_utils.py +0 -0
  731. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/clip.py +0 -0
  732. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/deepseek_vl_v2.py +0 -0
  733. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/gemma3.py +0 -0
  734. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/gemma3n.py +0 -0
  735. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/internvl.py +0 -0
  736. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/janus_pro.py +0 -0
  737. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/kimi_vl.py +0 -0
  738. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/llava.py +0 -0
  739. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/minicpm.py +0 -0
  740. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/mlama.py +0 -0
  741. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/mllama4.py +0 -0
  742. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/phi4mm.py +0 -0
  743. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/pixtral.py +0 -0
  744. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/qwen_audio.py +0 -0
  745. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/qwen_vl.py +0 -0
  746. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/multimodal/processors/vila.py +0 -0
  747. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/operations.py +0 -0
  748. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/operations_strategy.py +0 -0
  749. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/patch_torch.py +0 -0
  750. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/poll_based_barrier.py +0 -0
  751. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/custom_logit_processor.py +0 -0
  752. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/penaltylib/__init__.py +0 -0
  753. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/penaltylib/frequency_penalty.py +0 -0
  754. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/penaltylib/min_new_tokens.py +0 -0
  755. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/penaltylib/orchestrator.py +0 -0
  756. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/penaltylib/presence_penalty.py +0 -0
  757. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/sampling_batch_info.py +0 -0
  758. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/sampling/sampling_params.py +0 -0
  759. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/build_eagle_tree.py +0 -0
  760. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +0 -0
  761. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +0 -0
  762. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/eagle_utils.py +0 -0
  763. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/speculative/spec_info.py +0 -0
  764. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/torch_memory_saver_adapter.py +0 -0
  765. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/two_batch_overlap.py +0 -0
  766. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/utils.py +0 -0
  767. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/warmup.py +0 -0
  768. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/srt/weight_sync/utils.py +0 -0
  769. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/__init__.py +0 -0
  770. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/attention/__init__.py +0 -0
  771. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/attention/test_flashattn_backend.py +0 -0
  772. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/attention/test_flashattn_mla_backend.py +0 -0
  773. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/attention/test_prefix_chunk_info.py +0 -0
  774. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/few_shot_gsm8k.py +0 -0
  775. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/few_shot_gsm8k_engine.py +0 -0
  776. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/run_eval.py +0 -0
  777. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/runners.py +0 -0
  778. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/send_one.py +0 -0
  779. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_common.py +0 -0
  780. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_gpqa.py +0 -0
  781. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_humaneval.py +0 -0
  782. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_math.py +0 -0
  783. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_mgsm.py +0 -0
  784. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/simple_eval_mmlu.py +0 -0
  785. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_activation.py +0 -0
  786. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_block_fp8.py +0 -0
  787. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_block_fp8_deep_gemm_blackwell.py +0 -0
  788. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_block_fp8_ep.py +0 -0
  789. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_custom_ops.py +0 -0
  790. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_cutlass_moe.py +0 -0
  791. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_cutlass_w4a8_moe.py +0 -0
  792. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_deepep_utils.py +0 -0
  793. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_dynamic_grad_mode.py +0 -0
  794. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_fp4_moe.py +0 -0
  795. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_layernorm.py +0 -0
  796. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_marlin_moe.py +0 -0
  797. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_marlin_utils.py +0 -0
  798. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_programs.py +0 -0
  799. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang/test/test_utils.py +0 -0
  800. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang.egg-info/dependency_links.txt +0 -0
  801. {sglang-0.4.9.post6 → sglang-0.4.10}/sglang.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.4.9.post6
3
+ Version: 0.4.10
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -246,14 +246,14 @@ Requires-Dist: sentencepiece; extra == "runtime-common"
246
246
  Requires-Dist: soundfile==0.13.1; extra == "runtime-common"
247
247
  Requires-Dist: scipy; extra == "runtime-common"
248
248
  Requires-Dist: torchao==0.9.0; extra == "runtime-common"
249
- Requires-Dist: transformers==4.54.0; extra == "runtime-common"
249
+ Requires-Dist: transformers==4.54.1; extra == "runtime-common"
250
250
  Requires-Dist: timm==1.0.16; extra == "runtime-common"
251
251
  Requires-Dist: uvicorn; extra == "runtime-common"
252
252
  Requires-Dist: uvloop; extra == "runtime-common"
253
253
  Requires-Dist: xgrammar==0.1.21; extra == "runtime-common"
254
254
  Provides-Extra: srt
255
255
  Requires-Dist: sglang[runtime_common]; extra == "srt"
256
- Requires-Dist: sgl-kernel==0.2.7; extra == "srt"
256
+ Requires-Dist: sgl-kernel==0.2.8; extra == "srt"
257
257
  Requires-Dist: torch==2.7.1; extra == "srt"
258
258
  Requires-Dist: torchaudio==2.7.1; extra == "srt"
259
259
  Requires-Dist: torchvision==0.22.1; extra == "srt"
@@ -427,7 +427,6 @@ SGLang has been deployed at large scale, generating trillions of tokens in produ
427
427
  <img src="https://raw.githubusercontent.com/sgl-project/sgl-learning-materials/refs/heads/main/slides/adoption.png" alt="logo" width="800" margin="10px"></img>
428
428
 
429
429
  ## Contact Us
430
-
431
430
  For enterprises interested in adopting or deploying SGLang at scale, including technical consulting, sponsorship opportunities, or partnership inquiries, please contact us at contact@sglang.ai.
432
431
 
433
432
  ## Acknowledgment
@@ -70,7 +70,6 @@ SGLang has been deployed at large scale, generating trillions of tokens in produ
70
70
  <img src="https://raw.githubusercontent.com/sgl-project/sgl-learning-materials/refs/heads/main/slides/adoption.png" alt="logo" width="800" margin="10px"></img>
71
71
 
72
72
  ## Contact Us
73
-
74
73
  For enterprises interested in adopting or deploying SGLang at scale, including technical consulting, sponsorship opportunities, or partnership inquiries, please contact us at contact@sglang.ai.
75
74
 
76
75
  ## Acknowledgment
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sglang"
7
- version = "0.4.9.post6"
7
+ version = "0.4.10"
8
8
  description = "SGLang is yet another fast serving framework for large language models and vision language models."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -45,7 +45,7 @@ runtime_common = [
45
45
  "soundfile==0.13.1",
46
46
  "scipy",
47
47
  "torchao==0.9.0",
48
- "transformers==4.54.0",
48
+ "transformers==4.54.1",
49
49
  "timm==1.0.16",
50
50
  "uvicorn",
51
51
  "uvloop",
@@ -54,7 +54,7 @@ runtime_common = [
54
54
 
55
55
  srt = [
56
56
  "sglang[runtime_common]",
57
- "sgl-kernel==0.2.7",
57
+ "sgl-kernel==0.2.8",
58
58
  "torch==2.7.1",
59
59
  "torchaudio==2.7.1",
60
60
  "torchvision==0.22.1",
@@ -138,6 +138,7 @@ class BenchArgs:
138
138
  def load_model(server_args, port_args, tp_rank):
139
139
  suppress_other_loggers()
140
140
  rank_print = print if tp_rank == 0 else lambda *args, **kwargs: None
141
+ moe_ep_rank = tp_rank // (server_args.tp_size // server_args.ep_size)
141
142
 
142
143
  model_config = ModelConfig.from_server_args(server_args)
143
144
  model_runner = ModelRunner(
@@ -146,6 +147,8 @@ def load_model(server_args, port_args, tp_rank):
146
147
  gpu_id=tp_rank,
147
148
  tp_rank=tp_rank,
148
149
  tp_size=server_args.tp_size,
150
+ moe_ep_rank=moe_ep_rank,
151
+ moe_ep_size=server_args.ep_size,
149
152
  pp_rank=0,
150
153
  pp_size=1,
151
154
  nccl_port=port_args.nccl_port,
@@ -5,6 +5,11 @@ from sglang.srt.configs.exaone import ExaoneConfig
5
5
  from sglang.srt.configs.janus_pro import MultiModalityConfig
6
6
  from sglang.srt.configs.kimi_vl import KimiVLConfig
7
7
  from sglang.srt.configs.kimi_vl_moonvit import MoonViTConfig
8
+ from sglang.srt.configs.step3_vl import (
9
+ Step3TextConfig,
10
+ Step3VisionEncoderConfig,
11
+ Step3VLConfig,
12
+ )
8
13
 
9
14
  __all__ = [
10
15
  "ExaoneConfig",
@@ -14,4 +19,7 @@ __all__ = [
14
19
  "MultiModalityConfig",
15
20
  "KimiVLConfig",
16
21
  "MoonViTConfig",
22
+ "Step3VLConfig",
23
+ "Step3TextConfig",
24
+ "Step3VisionEncoderConfig",
17
25
  ]
@@ -335,6 +335,8 @@ class ModelConfig:
335
335
  "num_key_value_heads",
336
336
  # For ChatGLM:
337
337
  "multi_query_group_num",
338
+ # For Step3
339
+ "num_attention_groups",
338
340
  ]
339
341
  for attr in attributes:
340
342
  num_kv_heads = getattr(self.hf_text_config, attr, None)
@@ -644,6 +646,7 @@ multimodal_model_archs = [
644
646
  "InternS1ForConditionalGeneration",
645
647
  "Phi4MMForCausalLM",
646
648
  "VILAForConditionalGeneration",
649
+ "Step3VLForConditionalGeneration",
647
650
  ]
648
651
 
649
652
 
@@ -0,0 +1,172 @@
1
+ from typing import Any, Optional, Union
2
+
3
+ from transformers.configuration_utils import PretrainedConfig
4
+
5
+
6
+ class Step3VisionEncoderConfig(PretrainedConfig):
7
+ model_type = "step3_vision_encoder"
8
+
9
+ def __init__(
10
+ self,
11
+ hidden_size=1792,
12
+ intermediate_size=3072,
13
+ output_hidden_size=4096,
14
+ num_hidden_layers=63,
15
+ num_attention_heads=16,
16
+ num_channels=3,
17
+ image_size=728,
18
+ patch_size=14,
19
+ hidden_act="quick_gelu",
20
+ layer_norm_eps=1e-5,
21
+ **kwargs,
22
+ ):
23
+ self.hidden_size = hidden_size
24
+ self.intermediate_size = intermediate_size
25
+ self.output_hidden_size = output_hidden_size
26
+ self.num_hidden_layers = num_hidden_layers
27
+ self.num_attention_heads = num_attention_heads
28
+ self.num_channels = num_channels
29
+ self.patch_size = patch_size
30
+ self.image_size = image_size
31
+ self.layer_norm_eps = layer_norm_eps
32
+ self.hidden_act = hidden_act
33
+ super().__init__(**kwargs)
34
+
35
+
36
+ class Step3TextConfig(PretrainedConfig):
37
+ model_type = "step3_text"
38
+ architectures = ["Step3TextForCausalLM"]
39
+
40
+ def __init__(
41
+ self,
42
+ hidden_size: int = 7168,
43
+ intermediate_size: int = 18432,
44
+ num_attention_heads: int = 64,
45
+ num_attention_groups: int = 1,
46
+ num_hidden_layers: int = 61,
47
+ max_seq_len: int = 65536,
48
+ vocab_size: int = 128815,
49
+ rms_norm_eps: float = 1e-5,
50
+ moe_intermediate_size: int = 5120,
51
+ moe_num_experts: int = 48,
52
+ moe_top_k: int = 3,
53
+ rope_theta: float = 500000,
54
+ rope_scaling: Optional[dict[str, Any]] = None,
55
+ max_position_embedding: int = 65536,
56
+ share_expert_dim: int = 5120,
57
+ share_q_dim: int = 2048,
58
+ head_dim: int = 256,
59
+ norm_expert_weight: bool = False,
60
+ moe_layers_enum: tuple[int] = (
61
+ 4,
62
+ 5,
63
+ 6,
64
+ 7,
65
+ 8,
66
+ 9,
67
+ 10,
68
+ 11,
69
+ 12,
70
+ 13,
71
+ 14,
72
+ 15,
73
+ 16,
74
+ 17,
75
+ 18,
76
+ 19,
77
+ 20,
78
+ 21,
79
+ 22,
80
+ 23,
81
+ 24,
82
+ 25,
83
+ 26,
84
+ 27,
85
+ 28,
86
+ 29,
87
+ 30,
88
+ 31,
89
+ 32,
90
+ 33,
91
+ 34,
92
+ 35,
93
+ 36,
94
+ 37,
95
+ 38,
96
+ 39,
97
+ 40,
98
+ 41,
99
+ 42,
100
+ 43,
101
+ 44,
102
+ 45,
103
+ 46,
104
+ 47,
105
+ 48,
106
+ 49,
107
+ 50,
108
+ 51,
109
+ 52,
110
+ 53,
111
+ 54,
112
+ 55,
113
+ 56,
114
+ 57,
115
+ 58,
116
+ 59,
117
+ ),
118
+ **kwargs,
119
+ ) -> None:
120
+ self.hidden_size = hidden_size
121
+ self.intermediate_size = intermediate_size
122
+ self.num_attention_heads = num_attention_heads
123
+ self.num_attention_groups = num_attention_groups
124
+ self.num_hidden_layers = num_hidden_layers
125
+ self.max_seq_len = max_seq_len
126
+ self.vocab_size = vocab_size
127
+ self.rms_norm_eps = rms_norm_eps
128
+ self.moe_intermediate_size = moe_intermediate_size
129
+ self.moe_num_experts = moe_num_experts
130
+ self.moe_top_k = moe_top_k
131
+ self.rope_theta = rope_theta
132
+ self.rope_scaling = rope_scaling
133
+ self.max_position_embedding = max_position_embedding
134
+ self.share_expert_dim = share_expert_dim
135
+ self.share_q_dim = share_q_dim
136
+ self.head_dim = head_dim
137
+ self.norm_expert_weight = norm_expert_weight
138
+ self.moe_layers_enum = moe_layers_enum
139
+
140
+ super().__init__(**kwargs)
141
+
142
+
143
+ class Step3VLConfig(PretrainedConfig):
144
+ model_type = "step3_vl"
145
+
146
+ def __init__(
147
+ self,
148
+ vision_config: Optional[Union[dict, Step3VisionEncoderConfig]] = None,
149
+ text_config: Optional[Union[dict, Step3TextConfig]] = None,
150
+ understand_projector_stride: int = 1,
151
+ projector_bias: bool = True,
152
+ image_token_id: int = 128001,
153
+ **kwargs,
154
+ ) -> None:
155
+ if vision_config is None:
156
+ vision_config = Step3VisionEncoderConfig()
157
+ elif isinstance(vision_config, dict):
158
+ vision_config = Step3VisionEncoderConfig(**vision_config)
159
+ self.vision_config = vision_config
160
+
161
+ if text_config is None:
162
+ text_config = Step3TextConfig()
163
+ elif isinstance(text_config, dict):
164
+ text_config = Step3TextConfig(**text_config)
165
+ self.text_config = text_config
166
+
167
+ self.understand_projector_stride = understand_projector_stride
168
+ self.projector_bias = projector_bias
169
+ self.hidden_size = text_config.hidden_size
170
+ self.image_token_id = image_token_id
171
+
172
+ super().__init__(**kwargs)
@@ -994,6 +994,23 @@ register_conv_template(
994
994
  )
995
995
  )
996
996
 
997
+ register_conv_template(
998
+ Conversation(
999
+ name="step3-vl",
1000
+ system_message="<|begin▁of▁sentence|>You are a helpful assistant",
1001
+ system_template="{system_message}\n",
1002
+ roles=(
1003
+ "<|BOT|>user\n",
1004
+ "<|BOT|>assistant\n<think>\n",
1005
+ ),
1006
+ sep="<|EOT|>",
1007
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
1008
+ stop_str="<|EOT|>",
1009
+ image_token="<im_patch>",
1010
+ # add_bos=True,
1011
+ )
1012
+ )
1013
+
997
1014
 
998
1015
  @register_conv_template_matching_function
999
1016
  def match_internvl(model_path: str):
@@ -1103,3 +1120,9 @@ def match_vila(model_path: str):
1103
1120
  def match_mimo_vl(model_path: str):
1104
1121
  if re.search(r"mimo.*vl", model_path, re.IGNORECASE):
1105
1122
  return "mimo-vl"
1123
+
1124
+
1125
+ # @register_conv_template_matching_function
1126
+ # def match_step3(model_path: str):
1127
+ # if re.search(r"step3", model_path, re.IGNORECASE):
1128
+ # return "step3-vl"
@@ -694,10 +694,7 @@ class SchedulerDisaggregationDecodeMixin:
694
694
  + len(self.disagg_decode_prealloc_queue.queue)
695
695
  == 0
696
696
  ):
697
- # When the server is idle, do self-check and re-init some states
698
- self.check_memory()
699
- self.new_token_ratio = self.init_new_token_ratio
700
- self.maybe_sleep_on_idle()
697
+ self.self_check_during_idle()
701
698
 
702
699
  self.last_batch = batch
703
700
 
@@ -771,10 +768,7 @@ class SchedulerDisaggregationDecodeMixin:
771
768
  + len(self.disagg_decode_prealloc_queue.queue)
772
769
  == 0
773
770
  ):
774
- # When the server is idle, do self-check and re-init some states
775
- self.check_memory()
776
- self.new_token_ratio = self.init_new_token_ratio
777
- self.maybe_sleep_on_idle()
771
+ self.self_check_during_idle()
778
772
 
779
773
  self.last_batch = batch
780
774
  self.last_batch_in_queue = last_batch_in_queue
@@ -287,9 +287,7 @@ class SchedulerDisaggregationPrefillMixin:
287
287
  self.process_disagg_prefill_inflight_queue()
288
288
 
289
289
  if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
290
- self.check_memory()
291
- self.new_token_ratio = self.init_new_token_ratio
292
- self.maybe_sleep_on_idle()
290
+ self.self_check_during_idle()
293
291
 
294
292
  self.last_batch = batch
295
293
  # HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
@@ -337,9 +335,7 @@ class SchedulerDisaggregationPrefillMixin:
337
335
  self.process_disagg_prefill_inflight_queue()
338
336
 
339
337
  if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
340
- self.check_memory()
341
- self.new_token_ratio = self.init_new_token_ratio
342
- self.maybe_sleep_on_idle()
338
+ self.self_check_during_idle()
343
339
 
344
340
  self.last_batch = batch
345
341
  # HACK (byronhsu): reset the batch_is_full flag because we never enter update_running_batch which resets it
@@ -354,6 +354,13 @@ class GroupCoordinator:
354
354
  self.cpu_group, 1 << 22, 6
355
355
  )
356
356
 
357
+ def __repr__(self):
358
+ return (
359
+ f"ranks={self.ranks} rank={self.rank} local_rank={self.local_rank} use_pynccl={self.use_pynccl} "
360
+ f"device_group={self.device_group} cpu_group={self.cpu_group} unique_name={self.unique_name} "
361
+ f"world_size={self.world_size} rank_in_group={self.rank_in_group}"
362
+ )
363
+
357
364
  @property
358
365
  def first_rank(self):
359
366
  """Return the global rank of the first process in the group"""
@@ -1141,6 +1148,20 @@ def get_tp_group() -> GroupCoordinator:
1141
1148
  return _TP
1142
1149
 
1143
1150
 
1151
+ _MOE_EP: Optional[GroupCoordinator] = None
1152
+ _MOE_TP: Optional[GroupCoordinator] = None
1153
+
1154
+
1155
+ def get_moe_ep_group() -> GroupCoordinator:
1156
+ assert _MOE_EP is not None, "expert model parallel group is not initialized"
1157
+ return _MOE_EP
1158
+
1159
+
1160
+ def get_moe_tp_group() -> GroupCoordinator:
1161
+ assert _MOE_TP is not None, "expert model parallel group is not initialized"
1162
+ return _MOE_TP
1163
+
1164
+
1144
1165
  # kept for backward compatibility
1145
1166
  get_tensor_model_parallel_group = get_tp_group
1146
1167
 
@@ -1250,6 +1271,7 @@ def init_distributed_environment(
1250
1271
 
1251
1272
  def initialize_model_parallel(
1252
1273
  tensor_model_parallel_size: int = 1,
1274
+ expert_model_parallel_size: int = 1,
1253
1275
  pipeline_model_parallel_size: int = 1,
1254
1276
  backend: Optional[str] = None,
1255
1277
  duplicate_tp_group: bool = False,
@@ -1327,6 +1349,45 @@ def initialize_model_parallel(
1327
1349
  _TP.pynccl_comm.disabled = False
1328
1350
  _PDMUX_PREFILL_TP_GROUP.pynccl_comm.disabled = False
1329
1351
 
1352
+ moe_ep_size = expert_model_parallel_size
1353
+
1354
+ moe_tp_size = tensor_model_parallel_size // moe_ep_size
1355
+ global _MOE_EP
1356
+ assert _MOE_EP is None, "expert model parallel group is already initialized"
1357
+ group_ranks = []
1358
+ for i in range(num_tensor_model_parallel_groups):
1359
+ for j in range(moe_tp_size):
1360
+ st = i * tensor_model_parallel_size + j
1361
+ en = (i + 1) * tensor_model_parallel_size + j
1362
+ ranks = list(range(st, en, moe_tp_size))
1363
+ group_ranks.append(ranks)
1364
+
1365
+ _MOE_EP = init_model_parallel_group(
1366
+ group_ranks,
1367
+ get_world_group().local_rank,
1368
+ backend,
1369
+ use_custom_allreduce=False,
1370
+ group_name="moe_ep",
1371
+ )
1372
+
1373
+ global _MOE_TP
1374
+ assert _MOE_TP is None, "expert model parallel group is already initialized"
1375
+ group_ranks = []
1376
+ for i in range(num_tensor_model_parallel_groups):
1377
+ for j in range(moe_ep_size):
1378
+ st = i * tensor_model_parallel_size + j * moe_tp_size
1379
+ en = i * tensor_model_parallel_size + (j + 1) * moe_tp_size
1380
+ ranks = list(range(st, en))
1381
+ group_ranks.append(ranks)
1382
+
1383
+ _MOE_TP = init_model_parallel_group(
1384
+ group_ranks,
1385
+ get_world_group().local_rank,
1386
+ backend,
1387
+ use_custom_allreduce=False,
1388
+ group_name="moe_tp",
1389
+ )
1390
+
1330
1391
  # Build the pipeline model-parallel groups.
1331
1392
  num_pipeline_model_parallel_groups: int = world_size // pipeline_model_parallel_size
1332
1393
  global _PP
@@ -1347,6 +1408,7 @@ def initialize_model_parallel(
1347
1408
 
1348
1409
  def ensure_model_parallel_initialized(
1349
1410
  tensor_model_parallel_size: int,
1411
+ expert_model_parallel_size: int,
1350
1412
  pipeline_model_parallel_size: int,
1351
1413
  backend: Optional[str] = None,
1352
1414
  ) -> None:
@@ -1357,7 +1419,10 @@ def ensure_model_parallel_initialized(
1357
1419
  backend = backend or torch.distributed.get_backend(get_world_group().device_group)
1358
1420
  if not model_parallel_is_initialized():
1359
1421
  initialize_model_parallel(
1360
- tensor_model_parallel_size, pipeline_model_parallel_size, backend
1422
+ tensor_model_parallel_size,
1423
+ expert_model_parallel_size,
1424
+ pipeline_model_parallel_size,
1425
+ backend,
1361
1426
  )
1362
1427
  return
1363
1428
 
@@ -1417,6 +1482,26 @@ def get_tensor_model_parallel_rank():
1417
1482
  return get_tp_group().rank_in_group
1418
1483
 
1419
1484
 
1485
+ def get_moe_expert_parallel_world_size():
1486
+ """Return world size for the moe expert parallel group."""
1487
+ return get_moe_ep_group().world_size
1488
+
1489
+
1490
+ def get_moe_expert_parallel_rank():
1491
+ """Return my rank for the moe expert parallel group."""
1492
+ return get_moe_ep_group().rank_in_group
1493
+
1494
+
1495
+ def get_moe_tensor_parallel_world_size():
1496
+ """Return world size for the moe tensor parallel group."""
1497
+ return get_moe_tp_group().world_size
1498
+
1499
+
1500
+ def get_moe_tensor_parallel_rank():
1501
+ """Return my rank for the moe tensor parallel group."""
1502
+ return get_moe_tp_group().rank_in_group
1503
+
1504
+
1420
1505
  def destroy_model_parallel():
1421
1506
  """Set the groups to none and destroy them."""
1422
1507
  global _TP
@@ -648,29 +648,23 @@ def _set_envs_and_config(server_args: ServerArgs):
648
648
  if _is_cuda:
649
649
  assert_pkg_version(
650
650
  "sgl-kernel",
651
- "0.2.7",
651
+ "0.2.8",
652
652
  "Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
653
653
  )
654
654
 
655
- def sigchld_handler(signum, frame):
656
- pid, exitcode = os.waitpid(0, os.WNOHANG)
657
- if exitcode != 0:
658
- logger.warning(
659
- f"Child process unexpectedly failed with {exitcode=}. {pid=}"
655
+ if True: # Keep this check for internal code compatibility
656
+ # Register the signal handler.
657
+ # The child processes will send SIGQUIT to this process when any error happens
658
+ # This process then clean up the whole process tree
659
+ # Note: This sigquit handler is used in the launch phase, and may be replaced by
660
+ # the running_phase_sigquit_handler in the tokenizer manager after the grpc server is launched.
661
+ def launch_phase_sigquit_handler(signum, frame):
662
+ logger.error(
663
+ "Received sigquit from a child process. It usually means the child failed."
660
664
  )
665
+ kill_process_tree(os.getpid())
661
666
 
662
- signal.signal(signal.SIGCHLD, sigchld_handler)
663
-
664
- # Register the signal handler.
665
- # The child processes will send SIGQUIT to this process when any error happens
666
- # This process then clean up the whole process tree
667
- def sigquit_handler(signum, frame):
668
- logger.error(
669
- "Received sigquit from a child process. It usually means the child failed."
670
- )
671
- kill_process_tree(os.getpid())
672
-
673
- signal.signal(signal.SIGQUIT, sigquit_handler)
667
+ signal.signal(signal.SIGQUIT, launch_phase_sigquit_handler)
674
668
 
675
669
  # Set mp start method
676
670
  mp.set_start_method("spawn", force=True)
@@ -725,6 +719,7 @@ def _launch_subprocesses(
725
719
  + ((pp_rank % pp_size_per_node) * tp_size_per_node)
726
720
  + (tp_rank % tp_size_per_node) * server_args.gpu_id_step
727
721
  )
722
+ moe_ep_rank = tp_rank // (server_args.tp_size // server_args.ep_size)
728
723
  proc = mp.Process(
729
724
  target=run_scheduler_process,
730
725
  args=(
@@ -732,6 +727,7 @@ def _launch_subprocesses(
732
727
  port_args,
733
728
  gpu_id,
734
729
  tp_rank,
730
+ moe_ep_rank,
735
731
  pp_rank,
736
732
  None,
737
733
  writer,
@@ -238,6 +238,9 @@ async def health() -> Response:
238
238
  @app.get("/health_generate")
239
239
  async def health_generate(request: Request) -> Response:
240
240
  """Check the health of the inference server by generating one token."""
241
+ if _global_state.tokenizer_manager.gracefully_exit:
242
+ logger.info("Health check request received during shutdown. Returning 503.")
243
+ return Response(status_code=503)
241
244
 
242
245
  sampling_params = {"max_new_tokens": 1, "temperature": 0.0}
243
246
  rid = f"HEALTH_CHECK_{time.time()}"
@@ -260,9 +263,14 @@ async def health_generate(request: Request) -> Response:
260
263
  async for _ in _global_state.tokenizer_manager.generate_request(gri, request):
261
264
  break
262
265
 
263
- tic = time.perf_counter()
266
+ # This request is a special request.
267
+ # If the server already has something running, this request will be ignored, so it creates zero overhead.
268
+ # If the server is not running, this request will be run, so we know whether the server is healthy.
264
269
  task = asyncio.create_task(gen())
265
- while time.perf_counter() < tic + HEALTH_CHECK_TIMEOUT:
270
+
271
+ # As long as we receive any response from the detokenizer/scheduler, we consider the server is healthy.
272
+ tic = time.time()
273
+ while time.time() < tic + HEALTH_CHECK_TIMEOUT:
266
274
  await asyncio.sleep(1)
267
275
  if _global_state.tokenizer_manager.last_receive_tstamp > tic:
268
276
  task.cancel()
@@ -127,12 +127,12 @@ class OpenAIServingChat(OpenAIServingBase):
127
127
  request.skip_special_tokens = False
128
128
  if not isinstance(request.tool_choice, str):
129
129
  tools = [
130
- item.model_dump()
130
+ item.function.model_dump()
131
131
  for item in request.tools
132
132
  if item.function.name == request.tool_choice.function.name
133
133
  ]
134
134
  else:
135
- tools = [item.model_dump() for item in request.tools]
135
+ tools = [item.function.model_dump() for item in request.tools]
136
136
 
137
137
  tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
138
138
  parser = FunctionCallParser(request.tools, tool_call_parser)
@@ -178,25 +178,6 @@ class OpenAIServingChat(OpenAIServingBase):
178
178
  audio_data,
179
179
  modalities,
180
180
  )
181
-
182
- if "tool_calls" in processed_msg and isinstance(
183
- processed_msg.get("tool_calls"), list
184
- ):
185
- for call in processed_msg["tool_calls"]:
186
- try:
187
- if "arguments" in call["function"] and isinstance(
188
- call["function"]["arguments"], str
189
- ):
190
- call["function"]["arguments"] = json.loads(
191
- call["function"]["arguments"]
192
- )
193
- except json.JSONDecodeError as e:
194
- # Log a warning or error if JSON parsing fails for arguments
195
- logger.warning(
196
- f"Failed to parse tool call arguments as JSON: {e}"
197
- )
198
- # Decide whether to continue or raise the exception based on desired behavior
199
- continue # Or raise e if strict parsing is required
200
181
  openai_compatible_messages.append(processed_msg)
201
182
 
202
183
  # Handle assistant prefix for continue_final_message
@@ -47,6 +47,11 @@ class ExpertDistributionRecorder(ABC):
47
47
  rank: int,
48
48
  ):
49
49
  if server_args.expert_distribution_recorder_mode is not None:
50
+ assert (
51
+ expert_location_metadata is not None
52
+ ), "ExpertLocationMetadata is required for expert distribution recording. One possible"
53
+ "reason is that you are using a model that does not support expert distribution"
54
+ "recording. Try setting `get_model_config_for_expert_location` in your model."
50
55
  return _ExpertDistributionRecorderReal(
51
56
  server_args, expert_location_metadata, rank
52
57
  )