sglang 0.5.3.post2__tar.gz → 0.5.3.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1055) hide show
  1. {sglang-0.5.3.post2/sglang.egg-info → sglang-0.5.3.post3}/PKG-INFO +1 -1
  2. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/pyproject.toml +1 -1
  3. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/bench_one_batch.py +13 -8
  4. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/base/conn.py +17 -4
  5. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/common/conn.py +1 -0
  6. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/decode.py +113 -8
  7. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/fake/conn.py +11 -3
  8. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/mooncake/conn.py +148 -17
  9. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/nixl/conn.py +7 -1
  10. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/prefill.py +71 -1
  11. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -3
  12. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/environ.py +3 -3
  13. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/ascend_backend.py +17 -0
  14. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/layernorm.py +41 -9
  15. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/logits_processor.py +1 -1
  16. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/utils.py +4 -2
  17. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/rotary_embedding.py +16 -2
  18. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/sampler.py +3 -3
  19. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler.py +0 -6
  20. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/allocator_ascend.py +1 -1
  21. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/common.py +1 -5
  22. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/memory_pool.py +248 -137
  23. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/model_runner.py +28 -13
  24. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/npu_graph_runner.py +2 -2
  25. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_loader/weight_utils.py +2 -2
  26. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/deepseek_v2.py +1 -0
  27. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/glm4_moe.py +4 -2
  28. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/server_args.py +31 -9
  29. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_worker.py +2 -2
  30. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/spec_info.py +2 -0
  31. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/standalone_worker.py +1 -1
  32. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/runners.py +1 -1
  33. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/send_one.py +27 -1
  34. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_disaggregation_utils.py +33 -15
  35. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_utils.py +37 -2
  36. sglang-0.5.3.post3/sglang/version.py +1 -0
  37. {sglang-0.5.3.post2 → sglang-0.5.3.post3/sglang.egg-info}/PKG-INFO +1 -1
  38. sglang-0.5.3.post2/sglang/version.py +0 -1
  39. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/LICENSE +0 -0
  40. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/README.md +0 -0
  41. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/setup.cfg +0 -0
  42. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/__init__.py +0 -0
  43. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/bench_offline_throughput.py +0 -0
  44. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/bench_one_batch_server.py +0 -0
  45. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/bench_serving.py +0 -0
  46. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/check_env.py +0 -0
  47. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/compile_deep_gemm.py +0 -0
  48. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/eval/llama3_eval.py +0 -0
  49. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/eval/loogle_eval.py +0 -0
  50. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/global_config.py +0 -0
  51. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/api.py +0 -0
  52. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/anthropic.py +0 -0
  53. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/base_backend.py +0 -0
  54. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/litellm.py +0 -0
  55. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/openai.py +0 -0
  56. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/runtime_endpoint.py +0 -0
  57. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/backend/vertexai.py +0 -0
  58. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/chat_template.py +0 -0
  59. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/choices.py +0 -0
  60. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/compiler.py +0 -0
  61. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/interpreter.py +0 -0
  62. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/ir.py +0 -0
  63. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/lang/tracer.py +0 -0
  64. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/launch_server.py +0 -0
  65. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/profiler.py +0 -0
  66. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/_custom_ops.py +0 -0
  67. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/batch_invariant_ops/__init__.py +0 -0
  68. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/batch_invariant_ops/batch_invariant_ops.py +0 -0
  69. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/backend.py +0 -0
  70. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/compilation_config.py +0 -0
  71. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/compilation_counter.py +0 -0
  72. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/compile.py +0 -0
  73. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/compiler_interface.py +0 -0
  74. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/cuda_piecewise_backend.py +0 -0
  75. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/fix_functionalization.py +0 -0
  76. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/fx_utils.py +0 -0
  77. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/inductor_pass.py +0 -0
  78. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/pass_manager.py +0 -0
  79. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/piecewise_context_manager.py +0 -0
  80. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/compilation/weak_ref_tensor_jit.py +0 -0
  81. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/__init__.py +0 -0
  82. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/chatglm.py +0 -0
  83. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/dbrx.py +0 -0
  84. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/deepseekvl2.py +0 -0
  85. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/device_config.py +0 -0
  86. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/dots_ocr.py +0 -0
  87. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/dots_vlm.py +0 -0
  88. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/exaone.py +0 -0
  89. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/falcon_h1.py +0 -0
  90. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/internvl.py +0 -0
  91. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/janus_pro.py +0 -0
  92. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/kimi_vl.py +0 -0
  93. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/kimi_vl_moonvit.py +0 -0
  94. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/load_config.py +0 -0
  95. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/longcat_flash.py +0 -0
  96. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/mamba_utils.py +0 -0
  97. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/model_config.py +0 -0
  98. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/nemotron_h.py +0 -0
  99. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/qwen3_next.py +0 -0
  100. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/qwen3_vl.py +0 -0
  101. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/step3_vl.py +0 -0
  102. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/update_config.py +0 -0
  103. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/configs/utils.py +0 -0
  104. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/__init__.py +0 -0
  105. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/base_connector.py +0 -0
  106. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/redis.py +0 -0
  107. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/remote_instance.py +0 -0
  108. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/s3.py +0 -0
  109. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/serde/__init__.py +0 -0
  110. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/serde/safe_serde.py +0 -0
  111. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/serde/serde.py +0 -0
  112. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/connector/utils.py +0 -0
  113. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constants.py +0 -0
  114. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/base_grammar_backend.py +0 -0
  115. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/llguidance_backend.py +0 -0
  116. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/outlines_backend.py +0 -0
  117. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/outlines_jump_forward.py +0 -0
  118. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/reasoner_grammar_backend.py +0 -0
  119. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/triton_ops/bitmask_ops.py +0 -0
  120. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/constrained/xgrammar_backend.py +0 -0
  121. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/custom_op.py +0 -0
  122. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/debug_utils/__init__.py +0 -0
  123. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/debug_utils/dump_comparator.py +0 -0
  124. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/debug_utils/dump_loader.py +0 -0
  125. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/debug_utils/dumper.py +0 -0
  126. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/debug_utils/text_comparator.py +0 -0
  127. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/ascend/__init__.py +0 -0
  128. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/ascend/conn.py +0 -0
  129. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/ascend/transfer_engine.py +0 -0
  130. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/base/__init__.py +0 -0
  131. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/common/__init__.py +0 -0
  132. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/common/utils.py +0 -0
  133. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/decode_kvcache_offload_manager.py +0 -0
  134. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/decode_schedule_batch_mixin.py +0 -0
  135. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/fake/__init__.py +0 -0
  136. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/kv_events.py +0 -0
  137. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/mini_lb.py +0 -0
  138. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/mooncake/__init__.py +0 -0
  139. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/mooncake/transfer_engine.py +0 -0
  140. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/nixl/__init__.py +0 -0
  141. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/disaggregation/utils.py +0 -0
  142. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/__init__.py +0 -0
  143. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/communication_op.py +0 -0
  144. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/all_reduce_utils.py +0 -0
  145. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/cuda_wrapper.py +0 -0
  146. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +0 -0
  147. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/hpu_communicator.py +0 -0
  148. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/npu_communicator.py +0 -0
  149. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/pymscclpp.py +0 -0
  150. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/pynccl.py +0 -0
  151. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/pynccl_allocator.py +0 -0
  152. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/pynccl_wrapper.py +0 -0
  153. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/quick_all_reduce.py +0 -0
  154. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/shm_broadcast.py +0 -0
  155. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/symm_mem.py +0 -0
  156. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/device_communicators/xpu_communicator.py +0 -0
  157. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/naive_distributed.py +0 -0
  158. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/parallel_state.py +0 -0
  159. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/distributed/utils.py +0 -0
  160. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/EngineBase.py +0 -0
  161. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/context.py +0 -0
  162. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/engine.py +0 -0
  163. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/grpc_server.py +0 -0
  164. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/harmony_utils.py +0 -0
  165. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/http_server.py +0 -0
  166. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/http_server_engine.py +0 -0
  167. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/__init__.py +0 -0
  168. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/protocol.py +0 -0
  169. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_base.py +0 -0
  170. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_chat.py +0 -0
  171. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_completions.py +0 -0
  172. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_embedding.py +0 -0
  173. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_rerank.py +0 -0
  174. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_responses.py +0 -0
  175. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_score.py +0 -0
  176. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/serving_tokenize.py +0 -0
  177. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/tool_server.py +0 -0
  178. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/usage_processor.py +0 -0
  179. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/openai/utils.py +0 -0
  180. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/entrypoints/tool.py +0 -0
  181. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/__init__.py +0 -0
  182. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_algorithms/__init__.py +0 -0
  183. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_algorithms/deepseek.py +0 -0
  184. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py +0 -0
  185. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_manager.py +0 -0
  186. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_simulator/__init__.py +0 -0
  187. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/eplb_simulator/reader.py +0 -0
  188. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/expert_distribution.py +0 -0
  189. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/expert_location.py +0 -0
  190. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/expert_location_dispatch.py +0 -0
  191. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/eplb/expert_location_updater.py +0 -0
  192. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/base_format_detector.py +0 -0
  193. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/core_types.py +0 -0
  194. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/deepseekv31_detector.py +0 -0
  195. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/deepseekv3_detector.py +0 -0
  196. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/ebnf_composer.py +0 -0
  197. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/function_call_parser.py +0 -0
  198. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/glm4_moe_detector.py +0 -0
  199. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/gpt_oss_detector.py +0 -0
  200. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/json_array_parser.py +0 -0
  201. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/kimik2_detector.py +0 -0
  202. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/llama32_detector.py +0 -0
  203. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/mistral_detector.py +0 -0
  204. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/pythonic_detector.py +0 -0
  205. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/qwen25_detector.py +0 -0
  206. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/qwen3_coder_detector.py +0 -0
  207. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/step3_detector.py +0 -0
  208. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/function_call/utils.py +0 -0
  209. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/__init__.py +0 -0
  210. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/compile_proto.py +0 -0
  211. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/grpc_request_manager.py +0 -0
  212. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/sglang_scheduler_pb2.py +0 -0
  213. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/sglang_scheduler_pb2.pyi +0 -0
  214. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/grpc/sglang_scheduler_pb2_grpc.py +0 -0
  215. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/activation.py +0 -0
  216. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/amx_utils.py +0 -0
  217. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/aiter_backend.py +0 -0
  218. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/attention_registry.py +0 -0
  219. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/base_attn_backend.py +0 -0
  220. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/cutlass_mla_backend.py +0 -0
  221. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/double_sparsity_backend.py +0 -0
  222. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +0 -0
  223. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/chunk.py +0 -0
  224. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/chunk_delta_h.py +0 -0
  225. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/chunk_o.py +0 -0
  226. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py +0 -0
  227. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/cumsum.py +0 -0
  228. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/fused_recurrent.py +0 -0
  229. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py +0 -0
  230. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/index.py +0 -0
  231. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/l2norm.py +0 -0
  232. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/layernorm_gated.py +0 -0
  233. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/op.py +0 -0
  234. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/solve_tril.py +0 -0
  235. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/utils.py +0 -0
  236. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/fla/wy_fast.py +0 -0
  237. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/flashattention_backend.py +0 -0
  238. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/flashinfer_backend.py +0 -0
  239. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/flashinfer_mla_backend.py +0 -0
  240. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/flashmla_backend.py +0 -0
  241. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/hybrid_attn_backend.py +0 -0
  242. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/hybrid_linear_attn_backend.py +0 -0
  243. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/intel_amx_backend.py +0 -0
  244. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/causal_conv1d.py +0 -0
  245. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py +0 -0
  246. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/mamba.py +0 -0
  247. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/mamba2_metadata.py +0 -0
  248. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/mixer2_rms_norm_gated.py +0 -0
  249. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/__init__.py +0 -0
  250. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/layernorm_gated.py +0 -0
  251. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/mamba_ssm.py +0 -0
  252. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/ssd_bmm.py +0 -0
  253. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py +0 -0
  254. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py +0 -0
  255. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/ssd_combined.py +0 -0
  256. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py +0 -0
  257. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/merge_state.py +0 -0
  258. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/npu_ops/mla_preprocess.py +0 -0
  259. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/dequant_k_cache.py +0 -0
  260. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/index_buf_accessor.py +0 -0
  261. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/nsa_indexer.py +0 -0
  262. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/quant_k_cache.py +0 -0
  263. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/tilelang_kernel.py +0 -0
  264. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/transform_index.py +0 -0
  265. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/triton_kernel.py +0 -0
  266. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa/utils.py +0 -0
  267. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/nsa_backend.py +0 -0
  268. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/tbo_backend.py +0 -0
  269. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/torch_flex_backend.py +0 -0
  270. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/torch_native_backend.py +0 -0
  271. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_backend.py +0 -0
  272. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/decode_attention.py +0 -0
  273. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +0 -0
  274. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/extend_attention.py +0 -0
  275. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/merge_state.py +0 -0
  276. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/prefill_attention.py +0 -0
  277. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py +0 -0
  278. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/trtllm_mha_backend.py +0 -0
  279. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/trtllm_mla_backend.py +0 -0
  280. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/utils.py +0 -0
  281. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/vision.py +0 -0
  282. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/vision_utils.py +0 -0
  283. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/wave_backend.py +0 -0
  284. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/wave_ops/decode_attention.py +0 -0
  285. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/wave_ops/extend_attention.py +0 -0
  286. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/attention/wave_ops/prefill_attention.py +0 -0
  287. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/communicator.py +0 -0
  288. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/dp_attention.py +0 -0
  289. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/elementwise.py +0 -0
  290. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/flashinfer_comm_fusion.py +0 -0
  291. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/linear.py +0 -0
  292. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/model_parallel.py +0 -0
  293. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/modelopt_utils.py +0 -0
  294. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/__init__.py +0 -0
  295. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/cutlass_moe.py +0 -0
  296. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/cutlass_moe_params.py +0 -0
  297. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/cutlass_w4a8_moe.py +0 -0
  298. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/ep_moe/__init__.py +0 -0
  299. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/ep_moe/kernels.py +0 -0
  300. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/ep_moe/layer.py +0 -0
  301. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py +0 -0
  302. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_native.py +0 -0
  303. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/__init__.py +0 -0
  304. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  305. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  306. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  307. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  308. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  309. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  310. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  311. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  312. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  313. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  314. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  315. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  316. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  317. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H200.json +0 -0
  318. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  319. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  320. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  321. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  322. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  323. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  324. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  325. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  326. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  327. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  328. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  329. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  330. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  331. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  332. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  333. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  334. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +0 -0
  335. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  336. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +0 -0
  337. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  338. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  339. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  340. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  341. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  342. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  343. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  344. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  345. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  346. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json +0 -0
  347. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  348. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  349. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  350. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  351. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  352. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  353. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  354. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  355. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  356. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json +0 -0
  357. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json +0 -0
  358. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  359. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  360. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  361. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  362. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  363. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  364. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200.json +0 -0
  365. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  366. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  367. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200.json +0 -0
  368. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  369. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  370. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  371. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200.json +0 -0
  372. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  373. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  374. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  375. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +0 -0
  376. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  377. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  378. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  379. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200.json +0 -0
  380. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +0 -0
  381. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +0 -0
  382. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=AMD_Radeon_Graphics.json +0 -0
  383. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  384. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  385. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200.json +0 -0
  386. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +0 -0
  387. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +0 -0
  388. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=AMD_Radeon_Graphics.json +0 -0
  389. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  390. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  391. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  392. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  393. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200.json +0 -0
  394. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  395. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  396. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  397. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  398. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200.json +0 -0
  399. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +0 -0
  400. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +0 -0
  401. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=AMD_Radeon_Graphics.json +0 -0
  402. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  403. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  404. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +0 -0
  405. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  406. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  407. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  408. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200.json +0 -0
  409. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=3584,device_name=NVIDIA_L40S.json +0 -0
  410. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +0 -0
  411. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +0 -0
  412. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +0 -0
  413. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  414. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  415. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  416. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  417. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200.json +0 -0
  418. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +0 -0
  419. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +0 -0
  420. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=AMD_Radeon_Graphics.json +0 -0
  421. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  422. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  423. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  424. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  425. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200.json +0 -0
  426. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +0 -0
  427. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +0 -0
  428. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +0 -0
  429. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +0 -0
  430. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  431. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  432. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  433. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H20.json +0 -0
  434. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=192,device_name=NVIDIA_H200.json +0 -0
  435. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  436. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  437. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20.json +0 -0
  438. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  439. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200.json +0 -0
  440. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  441. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  442. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  443. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  444. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20.json +0 -0
  445. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  446. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200.json +0 -0
  447. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=128,N=96,device_name=NVIDIA_H20.json +0 -0
  448. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=129,N=352,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  449. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=160,N=320,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  450. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=161,N=192,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  451. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  452. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  453. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  454. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  455. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  456. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  457. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  458. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  459. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  460. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +0 -0
  461. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  462. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  463. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  464. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +0 -0
  465. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  466. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_0/E=16,N=1024,device_name=NVIDIA_B200.json +0 -0
  467. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=352,device_name=NVIDIA_RTX_6000_Ada_Generation,dtype=fp8_w8a8.json +0 -0
  468. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  469. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  470. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  471. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20.json +0 -0
  472. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  473. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +0 -0
  474. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +0 -0
  475. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  476. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  477. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  478. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  479. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  480. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  481. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  482. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  483. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  484. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  485. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  486. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  487. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  488. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=352,device_name=NVIDIA_RTX_5880_Ada_Generation,dtype=fp8_w8a8.json +0 -0
  489. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  490. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json +0 -0
  491. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json +0 -0
  492. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json +0 -0
  493. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=161,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json +0 -0
  494. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  495. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_B200.json +0 -0
  496. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  497. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=512,device_name=NVIDIA_H20.json +0 -0
  498. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  499. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  500. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=384,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  501. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  502. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json +0 -0
  503. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H200.json +0 -0
  504. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  505. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_B200.json +0 -0
  506. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json +0 -0
  507. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H200.json +0 -0
  508. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  509. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H200.json +0 -0
  510. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +0 -0
  511. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +0 -0
  512. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +0 -0
  513. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/layer.py +0 -0
  514. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py +0 -0
  515. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +0 -0
  516. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/moe_runner/__init__.py +0 -0
  517. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/moe_runner/base.py +0 -0
  518. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/moe_runner/deep_gemm.py +0 -0
  519. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/moe_runner/runner.py +0 -0
  520. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/moe_runner/triton.py +0 -0
  521. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/rocm_moe_utils.py +0 -0
  522. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/router.py +0 -0
  523. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/token_dispatcher/__init__.py +0 -0
  524. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/token_dispatcher/base.py +0 -0
  525. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/token_dispatcher/deepep.py +0 -0
  526. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/token_dispatcher/mooncake.py +0 -0
  527. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/token_dispatcher/standard.py +0 -0
  528. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/moe/topk.py +0 -0
  529. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/multimodal.py +0 -0
  530. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/parameter.py +0 -0
  531. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/pooler.py +0 -0
  532. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/__init__.py +0 -0
  533. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/awq.py +0 -0
  534. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/awq_triton.py +0 -0
  535. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/base_config.py +0 -0
  536. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/blockwise_int8.py +0 -0
  537. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/__init__.py +0 -0
  538. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +0 -0
  539. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +0 -0
  540. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +0 -0
  541. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py +0 -0
  542. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +0 -0
  543. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +0 -0
  544. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +0 -0
  545. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/compressed_tensors/utils.py +0 -0
  546. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  547. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  548. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  549. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  550. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  551. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  552. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  553. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  554. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  555. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  556. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  557. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  558. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  559. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  560. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  561. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  562. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  563. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  564. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  565. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  566. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  567. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  568. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  569. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  570. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  571. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  572. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  573. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  574. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  575. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  576. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  577. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  578. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  579. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  580. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  581. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  582. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  583. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  584. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  585. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  586. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  587. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  588. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  589. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  590. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  591. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  592. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  593. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  594. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  595. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  596. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  597. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  598. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  599. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  600. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  601. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  602. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  603. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  604. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  605. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  606. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  607. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  608. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  609. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  610. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  611. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  612. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  613. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  614. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  615. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  616. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  617. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  618. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  619. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  620. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  621. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  622. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  623. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  624. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  625. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  626. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  627. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  628. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  629. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  630. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  631. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  632. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  633. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  634. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  635. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  636. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  637. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  638. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  639. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  640. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  641. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  642. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  643. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  644. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  645. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  646. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  647. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  648. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  649. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  650. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  651. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  652. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  653. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  654. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  655. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  656. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  657. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  658. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  659. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  660. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  661. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  662. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  663. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  664. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  665. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  666. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  667. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  668. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  669. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  670. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  671. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  672. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  673. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  674. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  675. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  676. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  677. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  678. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  679. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  680. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  681. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  682. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  683. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  684. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  685. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  686. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  687. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  688. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  689. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  690. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  691. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  692. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  693. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  694. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  695. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  696. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json +0 -0
  697. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +0 -0
  698. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/deep_gemm_wrapper/__init__.py +0 -0
  699. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +0 -0
  700. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +0 -0
  701. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py +0 -0
  702. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/fp8.py +0 -0
  703. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/fp8_kernel.py +0 -0
  704. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/fp8_utils.py +0 -0
  705. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/fpgemm_fp8.py +0 -0
  706. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/gptq.py +0 -0
  707. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/int8_kernel.py +0 -0
  708. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/int8_utils.py +0 -0
  709. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/kv_cache.py +0 -0
  710. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/marlin_utils.py +0 -0
  711. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/marlin_utils_fp8.py +0 -0
  712. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/modelopt_quant.py +0 -0
  713. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/moe_wna16.py +0 -0
  714. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/mxfp4.py +0 -0
  715. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/mxfp4_tensor.py +0 -0
  716. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/petit.py +0 -0
  717. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/petit_utils.py +0 -0
  718. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/qoq.py +0 -0
  719. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/__init__.py +0 -0
  720. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/quark.py +0 -0
  721. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/quark_moe.py +0 -0
  722. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/schemes/__init__.py +0 -0
  723. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/schemes/quark_scheme.py +0 -0
  724. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +0 -0
  725. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/quark/utils.py +0 -0
  726. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/rocm_mxfp4_utils.py +0 -0
  727. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/unquant.py +0 -0
  728. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/utils.py +0 -0
  729. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/w4afp8.py +0 -0
  730. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/w8a8_fp8.py +0 -0
  731. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/quantization/w8a8_int8.py +0 -0
  732. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/radix_attention.py +0 -0
  733. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/rocm_linear_utils.py +0 -0
  734. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/torchao_utils.py +0 -0
  735. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/utils.py +0 -0
  736. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/layers/vocab_parallel_embedding.py +0 -0
  737. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/backend/base_backend.py +0 -0
  738. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/backend/chunked_backend.py +0 -0
  739. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/backend/triton_backend.py +0 -0
  740. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/eviction_policy.py +0 -0
  741. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/layers.py +0 -0
  742. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/lora.py +0 -0
  743. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/lora_config.py +0 -0
  744. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/lora_manager.py +0 -0
  745. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/lora_registry.py +0 -0
  746. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/mem_pool.py +0 -0
  747. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/__init__.py +0 -0
  748. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/chunked_sgmv_expand.py +0 -0
  749. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/chunked_sgmv_shrink.py +0 -0
  750. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/gate_up_lora_b.py +0 -0
  751. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/qkv_lora_b.py +0 -0
  752. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/sgemm_lora_a.py +0 -0
  753. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/triton_ops/sgemm_lora_b.py +0 -0
  754. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/lora/utils.py +0 -0
  755. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/async_dynamic_batch_tokenizer.py +0 -0
  756. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/cache_controller.py +0 -0
  757. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/configure_logging.py +0 -0
  758. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/data_parallel_controller.py +0 -0
  759. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/detokenizer_manager.py +0 -0
  760. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/disagg_service.py +0 -0
  761. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/io_struct.py +0 -0
  762. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/mm_utils.py +0 -0
  763. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/multi_tokenizer_mixin.py +0 -0
  764. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/multimodal_processor.py +0 -0
  765. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/overlap_utils.py +0 -0
  766. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/schedule_batch.py +0 -0
  767. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/schedule_policy.py +0 -0
  768. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_input_blocker.py +0 -0
  769. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_metrics_mixin.py +0 -0
  770. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_output_processor_mixin.py +0 -0
  771. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_profiler_mixin.py +0 -0
  772. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_recv_skipper.py +0 -0
  773. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/scheduler_update_weights_mixin.py +0 -0
  774. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/session_controller.py +0 -0
  775. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/template_manager.py +0 -0
  776. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/tokenizer_communicator_mixin.py +0 -0
  777. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/tokenizer_manager.py +0 -0
  778. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/tp_worker.py +0 -0
  779. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/managers/utils.py +0 -0
  780. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/allocator.py +0 -0
  781. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/base_prefix_cache.py +0 -0
  782. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/chunk_cache.py +0 -0
  783. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/cpp_radix_tree/radix_tree.py +0 -0
  784. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/evict_policy.py +0 -0
  785. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/flush_cache.py +0 -0
  786. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/hicache_storage.py +0 -0
  787. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/hiradix_cache.py +0 -0
  788. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/mamba_radix_cache.py +0 -0
  789. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/memory_pool_host.py +0 -0
  790. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/multimodal_cache.py +0 -0
  791. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/radix_cache.py +0 -0
  792. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/radix_cache_cpp.py +0 -0
  793. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/__init__.py +0 -0
  794. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/aibrix_kvcache/aibrix_kvcache_storage.py +0 -0
  795. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py +0 -0
  796. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/backend_factory.py +0 -0
  797. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/eic/eic_storage.py +0 -0
  798. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/eic/test_unit.py +0 -0
  799. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py +0 -0
  800. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/hf3fs_usrbio_client.py +0 -0
  801. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/hf3fs_utils.cpp +0 -0
  802. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py +0 -0
  803. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +0 -0
  804. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/hf3fs/test_hf3fs_utils.py +0 -0
  805. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py +0 -0
  806. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/lmcache/unit_test.py +0 -0
  807. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +0 -0
  808. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/mooncake_store/test_mooncake_store.py +0 -0
  809. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py +0 -0
  810. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/nixl/nixl_utils.py +0 -0
  811. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py +0 -0
  812. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/mem_cache/swa_radix_cache.py +0 -0
  813. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/metrics/collector.py +0 -0
  814. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/metrics/func_timer.py +0 -0
  815. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/metrics/startup_func_log_and_timer.py +0 -0
  816. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/metrics/utils.py +0 -0
  817. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/cpu_graph_runner.py +0 -0
  818. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/cuda_graph_runner.py +0 -0
  819. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/forward_batch_info.py +0 -0
  820. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_executor/piecewise_cuda_graph_runner.py +0 -0
  821. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_loader/__init__.py +0 -0
  822. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_loader/loader.py +0 -0
  823. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_loader/remote_instance_weight_loader_utils.py +0 -0
  824. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/model_loader/utils.py +0 -0
  825. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/apertus.py +0 -0
  826. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/arcee.py +0 -0
  827. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/baichuan.py +0 -0
  828. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/bailing_moe.py +0 -0
  829. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/bailing_moe_nextn.py +0 -0
  830. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/bert.py +0 -0
  831. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/chatglm.py +0 -0
  832. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/clip.py +0 -0
  833. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/commandr.py +0 -0
  834. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/dbrx.py +0 -0
  835. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/deepseek.py +0 -0
  836. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/deepseek_janus_pro.py +0 -0
  837. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/deepseek_nextn.py +0 -0
  838. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/deepseek_vl2.py +0 -0
  839. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/dots_ocr.py +0 -0
  840. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/dots_vlm.py +0 -0
  841. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/dots_vlm_vit.py +0 -0
  842. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/ernie4.py +0 -0
  843. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/ernie4_eagle.py +0 -0
  844. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/exaone.py +0 -0
  845. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/falcon_h1.py +0 -0
  846. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma.py +0 -0
  847. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma2.py +0 -0
  848. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma2_reward.py +0 -0
  849. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma3_causal.py +0 -0
  850. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma3_mm.py +0 -0
  851. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma3n_audio.py +0 -0
  852. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma3n_causal.py +0 -0
  853. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gemma3n_mm.py +0 -0
  854. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/glm4.py +0 -0
  855. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/glm4_moe_nextn.py +0 -0
  856. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/glm4v.py +0 -0
  857. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/glm4v_moe.py +0 -0
  858. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gpt2.py +0 -0
  859. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gpt_bigcode.py +0 -0
  860. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/gpt_oss.py +0 -0
  861. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/granite.py +0 -0
  862. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/granitemoe.py +0 -0
  863. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/grok.py +0 -0
  864. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/hunyuan.py +0 -0
  865. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/idefics2.py +0 -0
  866. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/internlm2.py +0 -0
  867. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/internlm2_reward.py +0 -0
  868. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/interns1.py +0 -0
  869. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/internvl.py +0 -0
  870. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/kimi_vl.py +0 -0
  871. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/kimi_vl_moonvit.py +0 -0
  872. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama.py +0 -0
  873. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama4.py +0 -0
  874. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama_classification.py +0 -0
  875. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama_eagle.py +0 -0
  876. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama_eagle3.py +0 -0
  877. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama_embedding.py +0 -0
  878. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llama_reward.py +0 -0
  879. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llava.py +0 -0
  880. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/llavavid.py +0 -0
  881. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/longcat_flash.py +0 -0
  882. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/longcat_flash_nextn.py +0 -0
  883. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mimo.py +0 -0
  884. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mimo_mtp.py +0 -0
  885. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/minicpm.py +0 -0
  886. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/minicpm3.py +0 -0
  887. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/minicpmo.py +0 -0
  888. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/minicpmv.py +0 -0
  889. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mistral.py +0 -0
  890. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mixtral.py +0 -0
  891. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mixtral_quant.py +0 -0
  892. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mllama.py +0 -0
  893. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/mllama4.py +0 -0
  894. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/nemotron_h.py +0 -0
  895. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/nemotron_nas.py +0 -0
  896. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/olmo.py +0 -0
  897. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/olmo2.py +0 -0
  898. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/olmoe.py +0 -0
  899. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/opt.py +0 -0
  900. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/persimmon.py +0 -0
  901. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phi.py +0 -0
  902. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phi3_small.py +0 -0
  903. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phi4mm.py +0 -0
  904. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phi4mm_audio.py +0 -0
  905. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phi4mm_utils.py +0 -0
  906. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/phimoe.py +0 -0
  907. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/pixtral.py +0 -0
  908. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen.py +0 -0
  909. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2.py +0 -0
  910. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_5_vl.py +0 -0
  911. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_audio.py +0 -0
  912. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_classification.py +0 -0
  913. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_eagle.py +0 -0
  914. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_moe.py +0 -0
  915. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_rm.py +0 -0
  916. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen2_vl.py +0 -0
  917. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3.py +0 -0
  918. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_classification.py +0 -0
  919. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_moe.py +0 -0
  920. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_next.py +0 -0
  921. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_next_mtp.py +0 -0
  922. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_vl.py +0 -0
  923. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/qwen3_vl_moe.py +0 -0
  924. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/registry.py +0 -0
  925. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/roberta.py +0 -0
  926. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/sarashina2_vision.py +0 -0
  927. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/siglip.py +0 -0
  928. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/solar.py +0 -0
  929. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/stablelm.py +0 -0
  930. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/starcoder2.py +0 -0
  931. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/step3_vl.py +0 -0
  932. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/torch_native_llama.py +0 -0
  933. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/transformers.py +0 -0
  934. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/utils.py +0 -0
  935. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/vila.py +0 -0
  936. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/xverse.py +0 -0
  937. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/xverse_moe.py +0 -0
  938. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/models/yivl.py +0 -0
  939. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/mm_utils.py +0 -0
  940. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/base_processor.py +0 -0
  941. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/clip.py +0 -0
  942. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/deepseek_vl_v2.py +0 -0
  943. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/dots_vlm.py +0 -0
  944. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/gemma3.py +0 -0
  945. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/gemma3n.py +0 -0
  946. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/glm4v.py +0 -0
  947. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/internvl.py +0 -0
  948. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/janus_pro.py +0 -0
  949. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/kimi_vl.py +0 -0
  950. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/llava.py +0 -0
  951. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/minicpm.py +0 -0
  952. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/mlama.py +0 -0
  953. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/mllama4.py +0 -0
  954. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/phi4mm.py +0 -0
  955. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/pixtral.py +0 -0
  956. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/qwen_audio.py +0 -0
  957. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/qwen_vl.py +0 -0
  958. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/sarashina2_vision.py +0 -0
  959. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/step3_vl.py +0 -0
  960. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/multimodal/processors/vila.py +0 -0
  961. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/operations.py +0 -0
  962. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/operations_strategy.py +0 -0
  963. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/parser/code_completion_parser.py +0 -0
  964. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/parser/conversation.py +0 -0
  965. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/parser/harmony_parser.py +0 -0
  966. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/parser/jinja_template_utils.py +0 -0
  967. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/parser/reasoning_parser.py +0 -0
  968. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/custom_logit_processor.py +0 -0
  969. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/penaltylib/__init__.py +0 -0
  970. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/penaltylib/frequency_penalty.py +0 -0
  971. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/penaltylib/min_new_tokens.py +0 -0
  972. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/penaltylib/orchestrator.py +0 -0
  973. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/penaltylib/presence_penalty.py +0 -0
  974. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/sampling_batch_info.py +0 -0
  975. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/sampling/sampling_params.py +0 -0
  976. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/server_args_config_parser.py +0 -0
  977. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/single_batch_overlap.py +0 -0
  978. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/ngram.cpp +0 -0
  979. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/ngram.h +0 -0
  980. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/ngram_cache.py +0 -0
  981. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/ngram_cache_binding.cpp +0 -0
  982. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/param.h +0 -0
  983. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/cpp_ngram/queue.h +0 -0
  984. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/draft_utils.py +0 -0
  985. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +0 -0
  986. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +0 -0
  987. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_info.py +0 -0
  988. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_info_v2.py +0 -0
  989. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_utils.py +0 -0
  990. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/eagle_worker_v2.py +0 -0
  991. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/ngram_info.py +0 -0
  992. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/ngram_worker.py +0 -0
  993. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/speculative/spec_utils.py +0 -0
  994. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/tokenizer/tiktoken_tokenizer.py +0 -0
  995. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/tracing/trace.py +0 -0
  996. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/two_batch_overlap.py +0 -0
  997. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/__init__.py +0 -0
  998. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/aio_rwlock.py +0 -0
  999. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/bench_utils.py +0 -0
  1000. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/common.py +0 -0
  1001. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/hf_transformers_utils.py +0 -0
  1002. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/host_shared_memory.py +0 -0
  1003. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/offloader.py +0 -0
  1004. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/patch_torch.py +0 -0
  1005. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/poll_based_barrier.py +0 -0
  1006. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/profile_merger.py +0 -0
  1007. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/rpd_utils.py +0 -0
  1008. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/slow_rank_detector.py +0 -0
  1009. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/utils/torch_memory_saver_adapter.py +0 -0
  1010. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/warmup.py +0 -0
  1011. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/weight_sync/tensor_bucket.py +0 -0
  1012. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/srt/weight_sync/utils.py +0 -0
  1013. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/__init__.py +0 -0
  1014. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/attention/__init__.py +0 -0
  1015. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/attention/test_flashattn_backend.py +0 -0
  1016. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/attention/test_flashattn_mla_backend.py +0 -0
  1017. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/attention/test_prefix_chunk_info.py +0 -0
  1018. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/attention/test_trtllm_mla_backend.py +0 -0
  1019. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/doc_patch.py +0 -0
  1020. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/few_shot_gsm8k.py +0 -0
  1021. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/few_shot_gsm8k_engine.py +0 -0
  1022. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/get_logits_ut.py +0 -0
  1023. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/longbench_v2/__init__.py +0 -0
  1024. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/longbench_v2/test_longbench_v2_eval.py +0 -0
  1025. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/longbench_v2/validate_longbench_v2.py +0 -0
  1026. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/longbench_v2/validate_longbench_v2_standalone.py +0 -0
  1027. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/run_eval.py +0 -0
  1028. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_common.py +0 -0
  1029. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_gpqa.py +0 -0
  1030. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_humaneval.py +0 -0
  1031. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_longbench_v2.py +0 -0
  1032. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_math.py +0 -0
  1033. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_mgsm.py +0 -0
  1034. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_mmlu.py +0 -0
  1035. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/simple_eval_mmmu_vlm.py +0 -0
  1036. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_activation.py +0 -0
  1037. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_block_fp8.py +0 -0
  1038. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_block_fp8_deep_gemm_blackwell.py +0 -0
  1039. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_custom_ops.py +0 -0
  1040. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_cutlass_moe.py +0 -0
  1041. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_cutlass_w4a8_moe.py +0 -0
  1042. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_deepep_utils.py +0 -0
  1043. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_deterministic.py +0 -0
  1044. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_deterministic_utils.py +0 -0
  1045. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_dynamic_grad_mode.py +0 -0
  1046. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_fp4_moe.py +0 -0
  1047. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_layernorm.py +0 -0
  1048. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_marlin_moe.py +0 -0
  1049. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_marlin_utils.py +0 -0
  1050. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/test/test_programs.py +0 -0
  1051. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang/utils.py +0 -0
  1052. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang.egg-info/SOURCES.txt +0 -0
  1053. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang.egg-info/dependency_links.txt +0 -0
  1054. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang.egg-info/requires.txt +0 -0
  1055. {sglang-0.5.3.post2 → sglang-0.5.3.post3}/sglang.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sglang
3
- Version: 0.5.3.post2
3
+ Version: 0.5.3.post3
4
4
  Summary: SGLang is a fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sglang"
7
- version = "0.5.3.post2"
7
+ version = "0.5.3.post3"
8
8
  description = "SGLang is a fast serving framework for large language models and vision language models."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -72,6 +72,8 @@ from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
72
72
  from sglang.srt.utils import (
73
73
  configure_logger,
74
74
  get_bool_env_var,
75
+ is_cuda_alike,
76
+ is_xpu,
75
77
  kill_process_tree,
76
78
  require_mlp_sync,
77
79
  require_mlp_tp_gather,
@@ -80,6 +82,15 @@ from sglang.srt.utils import (
80
82
  )
81
83
  from sglang.srt.utils.hf_transformers_utils import get_tokenizer
82
84
 
85
+ profile_activities = [torch.profiler.ProfilerActivity.CPU] + [
86
+ profiler_activity
87
+ for available, profiler_activity in [
88
+ (is_cuda_alike(), torch.profiler.ProfilerActivity.CUDA),
89
+ (is_xpu(), torch.profiler.ProfilerActivity.XPU),
90
+ ]
91
+ if available
92
+ ]
93
+
83
94
 
84
95
  @dataclasses.dataclass
85
96
  class BenchArgs:
@@ -424,10 +435,7 @@ def latency_test_run_once(
424
435
  profiler = None
425
436
  if profile:
426
437
  profiler = torch.profiler.profile(
427
- activities=[
428
- torch.profiler.ProfilerActivity.CPU,
429
- torch.profiler.ProfilerActivity.CUDA,
430
- ],
438
+ activities=profile_activities,
431
439
  with_stack=True,
432
440
  record_shapes=profile_record_shapes,
433
441
  )
@@ -460,10 +468,7 @@ def latency_test_run_once(
460
468
  if profile and i == output_len / 2:
461
469
  profiler = None
462
470
  profiler = torch.profiler.profile(
463
- activities=[
464
- torch.profiler.ProfilerActivity.CPU,
465
- torch.profiler.ProfilerActivity.CUDA,
466
- ],
471
+ activities=profile_activities,
467
472
  with_stack=True,
468
473
  record_shapes=profile_record_shapes,
469
474
  )
@@ -20,6 +20,10 @@ class KVArgs:
20
20
  aux_data_ptrs: List[int]
21
21
  aux_data_lens: List[int]
22
22
  aux_item_lens: List[int]
23
+ state_data_ptrs: List[int]
24
+ state_data_lens: List[int]
25
+ state_item_lens: List[int]
26
+ state_type: str # "none", "mamba", "swa"
23
27
  ib_device: str
24
28
  ib_traffic_class: str
25
29
  gpu_id: int
@@ -76,9 +80,13 @@ class BaseKVSender(ABC):
76
80
  ...
77
81
 
78
82
  @abstractmethod
79
- def send(self, kv_indices: npt.NDArray[np.int32]):
83
+ def send(
84
+ self,
85
+ kv_indices: npt.NDArray[np.int32],
86
+ state_indices: Optional[List[int]] = None,
87
+ ):
80
88
  """
81
- Send the kv cache at the given kv indices to the decoder server
89
+ Send the kv cache at the given kv indices and the extra cache/state at the given indices to the decoder server
82
90
  """
83
91
  ...
84
92
 
@@ -108,9 +116,14 @@ class BaseKVReceiver(ABC):
108
116
  ): ...
109
117
 
110
118
  @abstractmethod
111
- def init(self, kv_indices: npt.NDArray[np.int32], aux_index: Optional[int] = None):
119
+ def init(
120
+ self,
121
+ kv_indices: npt.NDArray[np.int32],
122
+ aux_index: Optional[int] = None,
123
+ state_indices: Optional[List[int]] = None,
124
+ ):
112
125
  """
113
- Notify the prefill server about the kv indices and aux index
126
+ Notify the prefill server about the kv indices, aux index, and state_indices.
114
127
  """
115
128
  ...
116
129
 
@@ -201,6 +201,7 @@ class CommonKVSender(BaseKVSender):
201
201
  def send(
202
202
  self,
203
203
  kv_indices: npt.NDArray[np.int32],
204
+ state_indices: Optional[List[int]] = None,
204
205
  ):
205
206
  pass
206
207
 
@@ -25,11 +25,12 @@ import time
25
25
  from collections import deque
26
26
  from dataclasses import dataclass
27
27
  from http import HTTPStatus
28
- from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
28
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
29
29
 
30
30
  import torch
31
31
  from torch.distributed import ProcessGroup
32
32
 
33
+ from sglang.srt.configs.mamba_utils import Mamba2CacheParams
33
34
  from sglang.srt.constants import GPU_MEMORY_TYPE_KV_CACHE
34
35
  from sglang.srt.disaggregation.base import BaseKVManager, BaseKVReceiver, KVPoll
35
36
  from sglang.srt.disaggregation.utils import (
@@ -47,9 +48,19 @@ from sglang.srt.disaggregation.utils import (
47
48
  )
48
49
  from sglang.srt.layers.dp_attention import get_attention_tp_size
49
50
  from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch
50
- from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
51
+ from sglang.srt.mem_cache.allocator import (
52
+ BaseTokenToKVPoolAllocator,
53
+ SWATokenToKVPoolAllocator,
54
+ )
51
55
  from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
52
- from sglang.srt.mem_cache.memory_pool import KVCache, ReqToTokenPool
56
+ from sglang.srt.mem_cache.memory_pool import (
57
+ HybridLinearKVPool,
58
+ HybridReqToTokenPool,
59
+ KVCache,
60
+ NSATokenToKVPool,
61
+ ReqToTokenPool,
62
+ SWAKVPool,
63
+ )
53
64
  from sglang.srt.model_executor.forward_batch_info import ForwardMode
54
65
  from sglang.srt.utils import get_int_env_var, require_mlp_sync
55
66
  from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter
@@ -124,6 +135,35 @@ class DecodeReqToTokenPool:
124
135
  self.free_slots = list(range(self.size + self.pre_alloc_size))
125
136
 
126
137
 
138
+ class HybridMambaDecodeReqToTokenPool(HybridReqToTokenPool):
139
+
140
+ def __init__(
141
+ self,
142
+ size: int,
143
+ max_context_len: int,
144
+ device: str,
145
+ enable_memory_saver: bool,
146
+ cache_params: "Mamba2CacheParams",
147
+ speculative_num_draft_tokens: int,
148
+ pre_alloc_size: int,
149
+ ):
150
+ DecodeReqToTokenPool.__init__(
151
+ self,
152
+ size=size,
153
+ max_context_len=max_context_len,
154
+ device=device,
155
+ enable_memory_saver=enable_memory_saver,
156
+ pre_alloc_size=pre_alloc_size,
157
+ )
158
+ self._init_mamba_pool(
159
+ size + pre_alloc_size, cache_params, device, speculative_num_draft_tokens
160
+ )
161
+
162
+ def clear(self):
163
+ self.free_slots = list(range(self.size + self.pre_alloc_size))
164
+ self.mamba_pool.clear()
165
+
166
+
127
167
  @dataclass
128
168
  class DecodeRequest:
129
169
  req: Req
@@ -217,6 +257,28 @@ class DecodePreallocQueue:
217
257
  self.metadata_buffers.get_buf_infos()
218
258
  )
219
259
 
260
+ if hasattr(self.token_to_kv_pool, "get_state_buf_infos"):
261
+ state_data_ptrs, state_data_lens, state_item_lens = (
262
+ self.token_to_kv_pool.get_state_buf_infos()
263
+ )
264
+ kv_args.state_data_ptrs = state_data_ptrs
265
+ kv_args.state_data_lens = state_data_lens
266
+ kv_args.state_item_lens = state_item_lens
267
+
268
+ if isinstance(self.token_to_kv_pool, SWAKVPool):
269
+ kv_args.state_type = "swa"
270
+ elif isinstance(self.token_to_kv_pool, HybridLinearKVPool):
271
+ kv_args.state_type = "mamba"
272
+ elif isinstance(self.token_to_kv_pool, NSATokenToKVPool):
273
+ kv_args.state_type = "nsa"
274
+ else:
275
+ kv_args.state_type = "none"
276
+ else:
277
+ kv_args.state_data_ptrs = []
278
+ kv_args.state_data_lens = []
279
+ kv_args.state_item_lens = []
280
+ kv_args.state_type = "none"
281
+
220
282
  kv_args.ib_device = self.scheduler.server_args.disaggregation_ib_device
221
283
  kv_args.gpu_id = self.scheduler.gpu_id
222
284
  kv_manager_class: Type[BaseKVManager] = get_kv_class(
@@ -414,16 +476,56 @@ class DecodePreallocQueue:
414
476
  .cpu()
415
477
  .numpy()
416
478
  )
479
+ page_size = self.token_to_kv_pool_allocator.page_size
480
+
481
+ # Prepare extra pool indices for hybrid models
482
+ if isinstance(self.token_to_kv_pool, HybridLinearKVPool):
483
+ # Mamba hybrid model: single mamba state index
484
+ state_indices = [
485
+ self.req_to_token_pool.req_index_to_mamba_index_mapping[
486
+ decode_req.req.req_pool_idx
487
+ ]
488
+ .cpu()
489
+ .numpy()
490
+ ]
491
+ elif isinstance(self.token_to_kv_pool, SWAKVPool):
492
+ # SWA hybrid model: send decode-side SWA window indices
493
+ seq_len = len(decode_req.req.origin_input_ids)
494
+ window_size = self.scheduler.sliding_window_size
495
+
496
+ window_start = max(0, seq_len - window_size)
497
+ window_start = (window_start // page_size) * page_size
498
+ window_kv_indices_full = self.req_to_token_pool.req_to_token[
499
+ decode_req.req.req_pool_idx, window_start:seq_len
500
+ ]
501
+
502
+ # Translate to SWA pool indices
503
+ window_kv_indices_swa = (
504
+ self.token_to_kv_pool_allocator.translate_loc_from_full_to_swa(
505
+ window_kv_indices_full
506
+ )
507
+ )
508
+ state_indices = window_kv_indices_swa.cpu().numpy()
509
+ state_indices = kv_to_page_indices(state_indices, page_size)
510
+ elif isinstance(self.token_to_kv_pool, NSATokenToKVPool):
511
+ seq_len = len(decode_req.req.origin_input_ids)
512
+ kv_indices_full = self.req_to_token_pool.req_to_token[
513
+ decode_req.req.req_pool_idx, :seq_len
514
+ ]
515
+ state_indices = kv_indices_full.cpu().numpy()
516
+ state_indices = kv_to_page_indices(state_indices, page_size)
517
+ else:
518
+ state_indices = None
417
519
 
418
520
  decode_req.metadata_buffer_index = (
419
521
  self.req_to_metadata_buffer_idx_allocator.alloc()
420
522
  )
421
523
  assert decode_req.metadata_buffer_index is not None
422
- page_indices = kv_to_page_indices(
423
- kv_indices, self.token_to_kv_pool_allocator.page_size
524
+ page_indices = kv_to_page_indices(kv_indices, page_size)
525
+ decode_req.kv_receiver.init(
526
+ page_indices, decode_req.metadata_buffer_index, state_indices
424
527
  )
425
- decode_req.kv_receiver.init(page_indices, decode_req.metadata_buffer_index)
426
-
528
+ decode_req.req.add_latency(RequestStage.DECODE_BOOTSTRAP)
427
529
  preallocated_reqs.append(decode_req)
428
530
  indices_to_remove.add(i)
429
531
  decode_req.req.time_stats.decode_transfer_queue_entry_time = (
@@ -503,7 +605,10 @@ class DecodePreallocQueue:
503
605
 
504
606
  def _pre_alloc(self, req: Req) -> torch.Tensor:
505
607
  """Pre-allocate the memory for req_to_token and token_kv_pool"""
506
- req_pool_indices = self.req_to_token_pool.alloc(1)
608
+ if isinstance(self.req_to_token_pool, HybridMambaDecodeReqToTokenPool):
609
+ req_pool_indices = self.req_to_token_pool.alloc(1, [req])
610
+ else:
611
+ req_pool_indices = self.req_to_token_pool.alloc(1)
507
612
 
508
613
  assert (
509
614
  req_pool_indices is not None
@@ -48,9 +48,12 @@ class FakeKVSender(BaseKVSender):
48
48
  def send(
49
49
  self,
50
50
  kv_indices: npt.NDArray[np.int32],
51
+ state_indices: Optional[List[int]] = None,
51
52
  ):
52
53
  self.has_sent = True
53
- logger.debug(f"FakeKVSender send with kv_indices: {kv_indices}")
54
+ logger.debug(
55
+ f"FakeKVSender send with kv_indices: {kv_indices}, state_indices: {state_indices}"
56
+ )
54
57
 
55
58
  def failure_exception(self):
56
59
  raise Exception("Fake KVSender Exception")
@@ -75,10 +78,15 @@ class FakeKVReceiver(BaseKVReceiver):
75
78
  logger.debug("FakeKVReceiver poll success")
76
79
  return KVPoll.Success
77
80
 
78
- def init(self, kv_indices: list[int], aux_index: Optional[int] = None):
81
+ def init(
82
+ self,
83
+ kv_indices: list[int],
84
+ aux_index: Optional[int] = None,
85
+ state_indices: Optional[List[int]] = None,
86
+ ):
79
87
  self.has_init = True
80
88
  logger.debug(
81
- f"FakeKVReceiver init with kv_indices: {kv_indices}, aux_index: {aux_index}"
89
+ f"FakeKVReceiver init with kv_indices: {kv_indices}, aux_index: {aux_index}, state_indices: {state_indices}"
82
90
  )
83
91
 
84
92
  def failure_exception(self):
@@ -58,6 +58,7 @@ class TransferKVChunk:
58
58
  index_slice: slice
59
59
  is_last: bool
60
60
  prefill_aux_index: Optional[int]
61
+ state_indices: Optional[List[int]]
61
62
 
62
63
 
63
64
  # decode
@@ -69,6 +70,7 @@ class TransferInfo:
69
70
  mooncake_session_id: str
70
71
  dst_kv_indices: npt.NDArray[np.int32]
71
72
  dst_aux_index: int
73
+ dst_state_indices: List[int]
72
74
  required_dst_info_num: int
73
75
  is_dummy: bool
74
76
 
@@ -78,9 +80,14 @@ class TransferInfo:
78
80
  is_dummy = True
79
81
  dst_kv_indices = np.array([], dtype=np.int32)
80
82
  dst_aux_index = None
83
+ dst_state_indices = []
81
84
  else:
82
85
  dst_kv_indices = np.frombuffer(msg[4], dtype=np.int32)
83
86
  dst_aux_index = int(msg[5].decode("ascii"))
87
+ if msg[6] == b"":
88
+ dst_state_indices = []
89
+ else:
90
+ dst_state_indices = list(np.frombuffer(msg[6], dtype=np.int32))
84
91
  is_dummy = False
85
92
  return cls(
86
93
  room=int(msg[0].decode("ascii")),
@@ -89,7 +96,8 @@ class TransferInfo:
89
96
  mooncake_session_id=msg[3].decode("ascii"),
90
97
  dst_kv_indices=dst_kv_indices,
91
98
  dst_aux_index=dst_aux_index,
92
- required_dst_info_num=int(msg[6].decode("ascii")),
99
+ dst_state_indices=dst_state_indices,
100
+ required_dst_info_num=int(msg[7].decode("ascii")),
93
101
  is_dummy=is_dummy,
94
102
  )
95
103
 
@@ -103,6 +111,7 @@ class KVArgsRegisterInfo:
103
111
  mooncake_session_id: str
104
112
  dst_kv_ptrs: list[int]
105
113
  dst_aux_ptrs: list[int]
114
+ dst_state_data_ptrs: list[int]
106
115
  dst_tp_rank: int
107
116
  dst_attn_tp_size: int
108
117
  dst_kv_item_len: int
@@ -116,9 +125,10 @@ class KVArgsRegisterInfo:
116
125
  mooncake_session_id=msg[3].decode("ascii"),
117
126
  dst_kv_ptrs=list(struct.unpack(f"{len(msg[4])//8}Q", msg[4])),
118
127
  dst_aux_ptrs=list(struct.unpack(f"{len(msg[5])//8}Q", msg[5])),
119
- dst_tp_rank=int(msg[6].decode("ascii")),
120
- dst_attn_tp_size=int(msg[7].decode("ascii")),
121
- dst_kv_item_len=int(msg[8].decode("ascii")),
128
+ dst_state_data_ptrs=list(struct.unpack(f"{len(msg[6])//8}Q", msg[6])),
129
+ dst_tp_rank=int(msg[7].decode("ascii")),
130
+ dst_attn_tp_size=int(msg[8].decode("ascii")),
131
+ dst_kv_item_len=int(msg[9].decode("ascii")),
122
132
  )
123
133
 
124
134
 
@@ -180,6 +190,9 @@ class MooncakeKVManager(CommonKVManager):
180
190
  )
181
191
  for _ in range(transfer_queue_size)
182
192
  ]
193
+ self.state_executors = concurrent.futures.ThreadPoolExecutor(
194
+ transfer_thread_pool_size // transfer_queue_size
195
+ )
183
196
  for queue, executor in zip(self.transfer_queues, self.executors):
184
197
  threading.Thread(
185
198
  target=self.transfer_worker, args=(queue, executor), daemon=True
@@ -239,6 +252,12 @@ class MooncakeKVManager(CommonKVManager):
239
252
  self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens
240
253
  )
241
254
 
255
+ # Batch register state/extra pool data buffers
256
+ if self.kv_args.state_data_ptrs and self.kv_args.state_data_lens:
257
+ self.engine.batch_register(
258
+ self.kv_args.state_data_ptrs, self.kv_args.state_data_lens
259
+ )
260
+
242
261
  def _transfer_data(self, mooncake_session_id, transfer_blocks):
243
262
  if not transfer_blocks:
244
263
  return 0
@@ -248,17 +267,23 @@ class MooncakeKVManager(CommonKVManager):
248
267
  mooncake_session_id, list(src_addrs), list(dst_addrs), list(lengths)
249
268
  )
250
269
 
251
- def send_kvcache(
270
+ def _send_kvcache_generic(
252
271
  self,
253
272
  mooncake_session_id: str,
254
- prefill_kv_indices: npt.NDArray[np.int32],
255
- dst_kv_ptrs: list[int],
256
- dst_kv_indices: npt.NDArray[np.int32],
273
+ src_data_ptrs: list[int],
274
+ dst_data_ptrs: list[int],
275
+ item_lens: list[int],
276
+ prefill_data_indices: npt.NDArray[np.int32],
277
+ dst_data_indices: npt.NDArray[np.int32],
257
278
  executor: concurrent.futures.ThreadPoolExecutor,
258
- ):
259
- # Group by indices
279
+ ) -> int:
280
+ """
281
+ Generic KV cache transfer supporting both MHA and MLA architectures.
282
+ This method is used by both send_kvcache (full pool) and maybe_send_extra.
283
+ """
284
+ # Group by indices for optimization
260
285
  prefill_kv_blocks, dst_kv_blocks = group_concurrent_contiguous(
261
- prefill_kv_indices, dst_kv_indices
286
+ prefill_data_indices, dst_data_indices
262
287
  )
263
288
 
264
289
  layers_params = None
@@ -266,9 +291,9 @@ class MooncakeKVManager(CommonKVManager):
266
291
  # pp is not supported on the decode side yet
267
292
  if self.is_mla_backend:
268
293
  src_kv_ptrs, dst_kv_ptrs, layers_current_pp_stage = (
269
- self.get_mla_kv_ptrs_with_pp(self.kv_args.kv_data_ptrs, dst_kv_ptrs)
294
+ self.get_mla_kv_ptrs_with_pp(src_data_ptrs, dst_data_ptrs)
270
295
  )
271
- kv_item_len = self.kv_args.kv_item_lens[0]
296
+ kv_item_len = item_lens[0]
272
297
  layers_params = [
273
298
  (
274
299
  src_kv_ptrs[layer_id],
@@ -279,9 +304,9 @@ class MooncakeKVManager(CommonKVManager):
279
304
  ]
280
305
  else:
281
306
  src_k_ptrs, src_v_ptrs, dst_k_ptrs, dst_v_ptrs, layers_current_pp_stage = (
282
- self.get_mha_kv_ptrs_with_pp(self.kv_args.kv_data_ptrs, dst_kv_ptrs)
307
+ self.get_mha_kv_ptrs_with_pp(src_data_ptrs, dst_data_ptrs)
283
308
  )
284
- kv_item_len = self.kv_args.kv_item_lens[0]
309
+ kv_item_len = item_lens[0]
285
310
  layers_params = [
286
311
  (
287
312
  src_k_ptrs[layer_id],
@@ -345,6 +370,24 @@ class MooncakeKVManager(CommonKVManager):
345
370
 
346
371
  return 0
347
372
 
373
+ def send_kvcache(
374
+ self,
375
+ mooncake_session_id: str,
376
+ prefill_kv_indices: npt.NDArray[np.int32],
377
+ dst_kv_ptrs: list[int],
378
+ dst_kv_indices: npt.NDArray[np.int32],
379
+ executor: concurrent.futures.ThreadPoolExecutor,
380
+ ):
381
+ return self._send_kvcache_generic(
382
+ mooncake_session_id=mooncake_session_id,
383
+ src_data_ptrs=self.kv_args.kv_data_ptrs,
384
+ dst_data_ptrs=dst_kv_ptrs,
385
+ item_lens=self.kv_args.kv_item_lens,
386
+ prefill_data_indices=prefill_kv_indices,
387
+ dst_data_indices=dst_kv_indices,
388
+ executor=executor,
389
+ )
390
+
348
391
  def send_kvcache_slice(
349
392
  self,
350
393
  mooncake_session_id: str,
@@ -593,6 +636,58 @@ class MooncakeKVManager(CommonKVManager):
593
636
  f"Received AUX_DATA for bootstrap_room {room} with length:{len(data)}"
594
637
  )
595
638
 
639
+ def maybe_send_extra(
640
+ self,
641
+ req: TransferInfo,
642
+ prefill_state_indices: list[int],
643
+ dst_state_data_ptrs: list[int],
644
+ ):
645
+ """Send state or extra pool data with type-specific handling."""
646
+ state_type = getattr(self.kv_args, "state_type", "none")
647
+
648
+ if state_type == "mamba":
649
+ return self._send_mamba_state(
650
+ req,
651
+ prefill_state_indices,
652
+ dst_state_data_ptrs,
653
+ )
654
+ elif state_type in ["swa", "nsa"]:
655
+ # Reuse _send_kvcache_generic interface to send extra pool data
656
+ prefill_state_indices = np.array(prefill_state_indices, dtype=np.int32)
657
+ dst_state_indices = np.array(req.dst_state_indices, dtype=np.int32)
658
+ return self._send_kvcache_generic(
659
+ mooncake_session_id=req.mooncake_session_id,
660
+ src_data_ptrs=self.kv_args.state_data_ptrs,
661
+ dst_data_ptrs=dst_state_data_ptrs,
662
+ item_lens=self.kv_args.state_item_lens,
663
+ prefill_data_indices=prefill_state_indices,
664
+ dst_data_indices=dst_state_indices,
665
+ executor=self.state_executors,
666
+ )
667
+ else:
668
+ return 0
669
+
670
+ def _send_mamba_state(
671
+ self,
672
+ req: TransferInfo,
673
+ prefill_mamba_index: list[int],
674
+ dst_state_data_ptrs: list[int],
675
+ ):
676
+ """Transfer Mamba states."""
677
+ assert len(prefill_mamba_index) == 1, "Mamba should have single state index"
678
+
679
+ transfer_blocks = []
680
+ prefill_state_data_ptrs = self.kv_args.state_data_ptrs
681
+ prefill_state_item_lens = self.kv_args.state_item_lens
682
+
683
+ for i, dst_state_ptr in enumerate(dst_state_data_ptrs):
684
+ length = prefill_state_item_lens[i]
685
+ src_addr = prefill_state_data_ptrs[i] + length * int(prefill_mamba_index[0])
686
+ dst_addr = dst_state_ptr + length * int(req.dst_state_indices[0])
687
+ transfer_blocks.append((src_addr, dst_addr, length))
688
+
689
+ return self._transfer_data(req.mooncake_session_id, transfer_blocks)
690
+
596
691
  def sync_status_to_decode_endpoint(
597
692
  self, remote: str, dst_port: int, room: int, status: int, prefill_rank: int
598
693
  ):
@@ -702,6 +797,21 @@ class MooncakeKVManager(CommonKVManager):
702
797
  break
703
798
 
704
799
  if kv_chunk.is_last:
800
+ if kv_chunk.state_indices is not None:
801
+ if not self.is_mla_backend and (
802
+ self.attn_tp_size
803
+ != target_rank_registration_info.dst_attn_tp_size
804
+ ):
805
+ raise RuntimeError(
806
+ f"PD Disaggregation does NOT support PD different TP sizes for non-MLA hybrid models yet."
807
+ )
808
+
809
+ self.maybe_send_extra(
810
+ req,
811
+ kv_chunk.state_indices,
812
+ target_rank_registration_info.dst_state_data_ptrs,
813
+ )
814
+
705
815
  if self.pp_group.is_last_rank:
706
816
  # Only the last chunk we need to send the aux data
707
817
  ret = self.send_aux(
@@ -765,7 +875,7 @@ class MooncakeKVManager(CommonKVManager):
765
875
  )
766
876
  continue
767
877
  else:
768
- required_dst_info_num = int(waiting_req_bytes[6].decode("ascii"))
878
+ required_dst_info_num = int(waiting_req_bytes[7].decode("ascii"))
769
879
  room = int(room)
770
880
  if room not in self.transfer_infos:
771
881
  self.transfer_infos[room] = {}
@@ -876,6 +986,7 @@ class MooncakeKVManager(CommonKVManager):
876
986
  index_slice: slice,
877
987
  is_last: bool,
878
988
  aux_index: Optional[int] = None,
989
+ state_indices: Optional[List[int]] = None,
879
990
  ):
880
991
  assert self.disaggregation_mode == DisaggregationMode.PREFILL
881
992
  assert not is_last or (is_last and aux_index is not None)
@@ -909,6 +1020,7 @@ class MooncakeKVManager(CommonKVManager):
909
1020
  index_slice=index_slice,
910
1021
  is_last=is_last,
911
1022
  prefill_aux_index=aux_index,
1023
+ state_indices=state_indices,
912
1024
  )
913
1025
  )
914
1026
 
@@ -989,6 +1101,7 @@ class MooncakeKVSender(CommonKVSender):
989
1101
  def send(
990
1102
  self,
991
1103
  kv_indices: npt.NDArray[np.int32],
1104
+ state_indices: Optional[List[int]] = None,
992
1105
  ):
993
1106
  index_slice = slice(self.curr_idx, self.curr_idx + len(kv_indices))
994
1107
  self.curr_idx += len(kv_indices)
@@ -1008,6 +1121,7 @@ class MooncakeKVSender(CommonKVSender):
1008
1121
  index_slice,
1009
1122
  True,
1010
1123
  aux_index=self.aux_index,
1124
+ state_indices=state_indices,
1011
1125
  )
1012
1126
 
1013
1127
  def poll(self) -> KVPoll:
@@ -1110,6 +1224,9 @@ class MooncakeKVReceiver(CommonKVReceiver):
1110
1224
  packed_aux_data_ptrs = b"".join(
1111
1225
  struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.aux_data_ptrs
1112
1226
  )
1227
+ packed_state_data_ptrs = b"".join(
1228
+ struct.pack("Q", ptr) for ptr in self.kv_mgr.kv_args.state_data_ptrs
1229
+ )
1113
1230
  # Note(shangming): No need to add pp rank here since pp is not supported on the decode side yet
1114
1231
  tp_rank = self.kv_mgr.kv_args.engine_rank
1115
1232
  kv_item_len = self.kv_mgr.kv_args.kv_item_lens[0]
@@ -1127,13 +1244,19 @@ class MooncakeKVReceiver(CommonKVReceiver):
1127
1244
  self.session_id.encode("ascii"),
1128
1245
  packed_kv_data_ptrs,
1129
1246
  packed_aux_data_ptrs,
1247
+ packed_state_data_ptrs,
1130
1248
  dst_tp_rank,
1131
1249
  dst_attn_tp_size,
1132
1250
  dst_kv_item_len,
1133
1251
  ]
1134
1252
  )
1135
1253
 
1136
- def init(self, kv_indices: npt.NDArray[np.int32], aux_index: Optional[int] = None):
1254
+ def init(
1255
+ self,
1256
+ kv_indices: npt.NDArray[np.int32],
1257
+ aux_index: Optional[int] = None,
1258
+ state_indices: Optional[List[int]] = None,
1259
+ ):
1137
1260
  for bootstrap_info in self.bootstrap_infos:
1138
1261
  sock, lock = self._connect_to_bootstrap_server(bootstrap_info)
1139
1262
  is_dummy = bootstrap_info["is_dummy"]
@@ -1147,6 +1270,14 @@ class MooncakeKVReceiver(CommonKVReceiver):
1147
1270
  self.session_id.encode("ascii"),
1148
1271
  kv_indices.tobytes() if not is_dummy else b"",
1149
1272
  str(aux_index).encode("ascii") if not is_dummy else b"",
1273
+ (
1274
+ np.array(
1275
+ state_indices,
1276
+ dtype=np.int32,
1277
+ ).tobytes()
1278
+ if not is_dummy and state_indices is not None
1279
+ else b""
1280
+ ),
1150
1281
  str(self.required_dst_info_num).encode("ascii"),
1151
1282
  ]
1152
1283
  )
@@ -704,6 +704,7 @@ class NixlKVSender(CommonKVSender):
704
704
  def send(
705
705
  self,
706
706
  kv_indices: npt.NDArray[np.int32],
707
+ state_indices: Optional[List[int]] = None,
707
708
  ):
708
709
  index_slice = slice(self.curr_idx, self.curr_idx + len(kv_indices))
709
710
  self.curr_idx += len(kv_indices)
@@ -755,7 +756,12 @@ class NixlKVReceiver(CommonKVReceiver):
755
756
  self.bootstrap_room
756
757
  )
757
758
 
758
- def init(self, kv_indices: npt.NDArray[np.int32], aux_index: Optional[int] = None):
759
+ def init(
760
+ self,
761
+ kv_indices: npt.NDArray[np.int32],
762
+ aux_index: Optional[int] = None,
763
+ state_indices: Optional[List[int]] = None,
764
+ ):
759
765
  for bootstrap_info in self.bootstrap_infos:
760
766
  logger.debug(
761
767
  f"Fetched bootstrap info: {bootstrap_info} for engine rank: {self.kv_mgr.kv_args.engine_rank}"