sglang 0.5.4__py3-none-any.whl → 0.5.4.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_one_batch.py +149 -34
 - sglang/bench_serving.py +73 -14
 - sglang/compile_deep_gemm.py +13 -7
 - sglang/launch_server.py +2 -0
 - sglang/srt/batch_invariant_ops/__init__.py +2 -0
 - sglang/srt/batch_invariant_ops/batch_invariant_ops.py +221 -4
 - sglang/srt/checkpoint_engine/__init__.py +9 -0
 - sglang/srt/checkpoint_engine/update.py +317 -0
 - sglang/srt/compilation/backend.py +1 -1
 - sglang/srt/configs/__init__.py +2 -0
 - sglang/srt/configs/deepseek_ocr.py +542 -10
 - sglang/srt/configs/deepseekvl2.py +95 -194
 - sglang/srt/configs/kimi_linear.py +160 -0
 - sglang/srt/configs/mamba_utils.py +66 -0
 - sglang/srt/configs/model_config.py +30 -7
 - sglang/srt/constants.py +7 -0
 - sglang/srt/debug_utils/tensor_dump_forward_hook.py +149 -0
 - sglang/srt/disaggregation/decode.py +34 -6
 - sglang/srt/disaggregation/nixl/conn.py +2 -2
 - sglang/srt/disaggregation/prefill.py +25 -3
 - sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -1
 - sglang/srt/distributed/parallel_state.py +9 -12
 - sglang/srt/entrypoints/engine.py +31 -20
 - sglang/srt/entrypoints/grpc_server.py +0 -1
 - sglang/srt/entrypoints/http_server.py +94 -94
 - sglang/srt/entrypoints/openai/protocol.py +7 -1
 - sglang/srt/entrypoints/openai/serving_chat.py +42 -0
 - sglang/srt/entrypoints/openai/serving_completions.py +10 -0
 - sglang/srt/entrypoints/openai/serving_embedding.py +1 -0
 - sglang/srt/environ.py +23 -2
 - sglang/srt/eplb/expert_distribution.py +64 -1
 - sglang/srt/eplb/expert_location.py +106 -36
 - sglang/srt/function_call/function_call_parser.py +2 -0
 - sglang/srt/function_call/minimax_m2.py +367 -0
 - sglang/srt/grpc/compile_proto.py +3 -0
 - sglang/srt/layers/activation.py +6 -0
 - sglang/srt/layers/attention/ascend_backend.py +233 -5
 - sglang/srt/layers/attention/attention_registry.py +3 -0
 - sglang/srt/layers/attention/fla/chunk_delta_h.py +61 -32
 - sglang/srt/layers/attention/fla/fused_recurrent.py +17 -4
 - sglang/srt/layers/attention/fla/kda.py +1359 -0
 - sglang/srt/layers/attention/fla/layernorm_gated.py +7 -1
 - sglang/srt/layers/attention/flashattention_backend.py +19 -8
 - sglang/srt/layers/attention/flashinfer_backend.py +10 -1
 - sglang/srt/layers/attention/flashinfer_mla_backend.py +21 -11
 - sglang/srt/layers/attention/flashmla_backend.py +1 -1
 - sglang/srt/layers/attention/hybrid_linear_attn_backend.py +223 -0
 - sglang/srt/layers/attention/mamba/mamba.py +20 -11
 - sglang/srt/layers/attention/nsa/dequant_k_cache.py +138 -6
 - sglang/srt/layers/attention/nsa/nsa_indexer.py +45 -22
 - sglang/srt/layers/attention/nsa/quant_k_cache.py +44 -12
 - sglang/srt/layers/attention/nsa/transform_index.py +1 -1
 - sglang/srt/layers/attention/nsa_backend.py +157 -23
 - sglang/srt/layers/attention/triton_backend.py +4 -1
 - sglang/srt/layers/attention/trtllm_mha_backend.py +10 -4
 - sglang/srt/layers/attention/trtllm_mla_backend.py +11 -15
 - sglang/srt/layers/attention/utils.py +78 -0
 - sglang/srt/layers/communicator.py +24 -1
 - sglang/srt/layers/deep_gemm_wrapper/compile_utils.py +1 -1
 - sglang/srt/layers/layernorm.py +35 -6
 - sglang/srt/layers/logits_processor.py +9 -20
 - sglang/srt/layers/moe/cutlass_w4a8_moe.py +138 -0
 - sglang/srt/layers/moe/ep_moe/kernels.py +194 -0
 - sglang/srt/layers/moe/ep_moe/layer.py +78 -289
 - sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
 - sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128]_down.json +164 -0
 - sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +68 -22
 - sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +43 -3
 - sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +106 -26
 - sglang/srt/layers/moe/fused_moe_triton/layer.py +3 -3
 - sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +7 -4
 - sglang/srt/layers/moe/moe_runner/deep_gemm.py +340 -55
 - sglang/srt/layers/moe/moe_runner/runner.py +3 -0
 - sglang/srt/layers/moe/moe_runner/triton_kernels.py +194 -0
 - sglang/srt/layers/moe/token_dispatcher/__init__.py +4 -4
 - sglang/srt/layers/moe/token_dispatcher/base.py +11 -5
 - sglang/srt/layers/moe/token_dispatcher/deepep.py +25 -18
 - sglang/srt/layers/moe/token_dispatcher/standard.py +1 -1
 - sglang/srt/layers/moe/topk.py +35 -10
 - sglang/srt/layers/moe/utils.py +3 -4
 - sglang/srt/layers/pooler.py +21 -2
 - sglang/srt/layers/quantization/__init__.py +13 -84
 - sglang/srt/layers/quantization/auto_round.py +394 -0
 - sglang/srt/layers/quantization/awq.py +0 -3
 - sglang/srt/layers/quantization/base_config.py +7 -0
 - sglang/srt/layers/quantization/fp8.py +68 -63
 - sglang/srt/layers/quantization/fp8_kernel.py +1 -1
 - sglang/srt/layers/quantization/fp8_utils.py +2 -2
 - sglang/srt/layers/quantization/gguf.py +566 -0
 - sglang/srt/layers/quantization/modelopt_quant.py +168 -11
 - sglang/srt/layers/quantization/mxfp4.py +30 -38
 - sglang/srt/layers/quantization/unquant.py +23 -45
 - sglang/srt/layers/quantization/w4afp8.py +38 -2
 - sglang/srt/layers/radix_attention.py +5 -2
 - sglang/srt/layers/rotary_embedding.py +130 -46
 - sglang/srt/layers/sampler.py +12 -1
 - sglang/srt/lora/lora_registry.py +9 -0
 - sglang/srt/managers/async_mm_data_processor.py +122 -0
 - sglang/srt/managers/data_parallel_controller.py +30 -3
 - sglang/srt/managers/detokenizer_manager.py +3 -0
 - sglang/srt/managers/io_struct.py +29 -4
 - sglang/srt/managers/multi_tokenizer_mixin.py +22 -1
 - sglang/srt/managers/schedule_batch.py +74 -15
 - sglang/srt/managers/scheduler.py +185 -144
 - sglang/srt/managers/scheduler_metrics_mixin.py +22 -14
 - sglang/srt/managers/scheduler_output_processor_mixin.py +40 -3
 - sglang/srt/managers/scheduler_pp_mixin.py +7 -2
 - sglang/srt/managers/scheduler_profiler_mixin.py +3 -4
 - sglang/srt/managers/scheduler_runtime_checker_mixin.py +45 -0
 - sglang/srt/managers/scheduler_update_weights_mixin.py +18 -3
 - sglang/srt/managers/session_controller.py +6 -5
 - sglang/srt/managers/tokenizer_manager.py +165 -78
 - sglang/srt/managers/tp_worker.py +24 -1
 - sglang/srt/mem_cache/base_prefix_cache.py +23 -4
 - sglang/srt/mem_cache/common.py +1 -0
 - sglang/srt/mem_cache/hicache_storage.py +7 -1
 - sglang/srt/mem_cache/memory_pool.py +253 -57
 - sglang/srt/mem_cache/memory_pool_host.py +12 -5
 - sglang/srt/mem_cache/radix_cache.py +4 -0
 - sglang/srt/mem_cache/storage/hf3fs/mini_3fs_metadata_server.py +3 -2
 - sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +1 -1
 - sglang/srt/metrics/collector.py +46 -3
 - sglang/srt/model_executor/cuda_graph_runner.py +15 -3
 - sglang/srt/model_executor/forward_batch_info.py +55 -14
 - sglang/srt/model_executor/model_runner.py +77 -170
 - sglang/srt/model_executor/npu_graph_runner.py +7 -3
 - sglang/srt/model_executor/piecewise_cuda_graph_runner.py +22 -12
 - sglang/srt/model_loader/weight_utils.py +1 -1
 - sglang/srt/models/bailing_moe.py +9 -2
 - sglang/srt/models/deepseek_nextn.py +11 -2
 - sglang/srt/models/deepseek_v2.py +296 -78
 - sglang/srt/models/glm4.py +391 -77
 - sglang/srt/models/glm4_moe.py +322 -354
 - sglang/srt/models/glm4_moe_nextn.py +4 -14
 - sglang/srt/models/glm4v.py +196 -55
 - sglang/srt/models/glm4v_moe.py +29 -197
 - sglang/srt/models/gpt_oss.py +1 -10
 - sglang/srt/models/kimi_linear.py +678 -0
 - sglang/srt/models/llama4.py +1 -1
 - sglang/srt/models/llama_eagle3.py +11 -1
 - sglang/srt/models/longcat_flash.py +2 -2
 - sglang/srt/models/minimax_m2.py +922 -0
 - sglang/srt/models/nvila.py +355 -0
 - sglang/srt/models/nvila_lite.py +184 -0
 - sglang/srt/models/qwen2.py +23 -2
 - sglang/srt/models/qwen2_moe.py +30 -15
 - sglang/srt/models/qwen3.py +35 -5
 - sglang/srt/models/qwen3_moe.py +18 -12
 - sglang/srt/models/qwen3_next.py +7 -0
 - sglang/srt/multimodal/customized_mm_processor_utils.py +35 -0
 - sglang/srt/multimodal/processors/base_processor.py +1 -0
 - sglang/srt/multimodal/processors/glm4v.py +1 -1
 - sglang/srt/multimodal/processors/{vila.py → nvila.py} +32 -24
 - sglang/srt/multimodal/processors/points_v15_chat.py +2 -2
 - sglang/srt/multiplex/multiplexing_mixin.py +209 -0
 - sglang/srt/multiplex/pdmux_context.py +164 -0
 - sglang/srt/parser/conversation.py +7 -1
 - sglang/srt/parser/reasoning_parser.py +28 -1
 - sglang/srt/sampling/custom_logit_processor.py +67 -1
 - sglang/srt/sampling/penaltylib/frequency_penalty.py +6 -8
 - sglang/srt/sampling/penaltylib/min_new_tokens.py +7 -8
 - sglang/srt/sampling/penaltylib/orchestrator.py +43 -3
 - sglang/srt/sampling/penaltylib/presence_penalty.py +6 -8
 - sglang/srt/server_args.py +459 -199
 - sglang/srt/single_batch_overlap.py +2 -4
 - sglang/srt/speculative/draft_utils.py +16 -0
 - sglang/srt/speculative/eagle_info.py +42 -36
 - sglang/srt/speculative/eagle_info_v2.py +68 -25
 - sglang/srt/speculative/eagle_utils.py +261 -16
 - sglang/srt/speculative/eagle_worker.py +11 -3
 - sglang/srt/speculative/eagle_worker_v2.py +15 -9
 - sglang/srt/speculative/spec_info.py +305 -31
 - sglang/srt/speculative/spec_utils.py +44 -8
 - sglang/srt/tracing/trace.py +121 -12
 - sglang/srt/utils/common.py +142 -74
 - sglang/srt/utils/hf_transformers_utils.py +38 -12
 - sglang/srt/utils/torch_memory_saver_adapter.py +20 -0
 - sglang/test/kits/radix_cache_server_kit.py +50 -0
 - sglang/test/runners.py +31 -7
 - sglang/test/simple_eval_common.py +5 -3
 - sglang/test/simple_eval_humaneval.py +1 -0
 - sglang/test/simple_eval_math.py +1 -0
 - sglang/test/simple_eval_mmlu.py +1 -0
 - sglang/test/simple_eval_mmmu_vlm.py +1 -0
 - sglang/test/test_deterministic.py +235 -12
 - sglang/test/test_deterministic_utils.py +2 -1
 - sglang/test/test_utils.py +7 -1
 - sglang/version.py +1 -1
 - {sglang-0.5.4.dist-info → sglang-0.5.4.post2.dist-info}/METADATA +15 -28
 - {sglang-0.5.4.dist-info → sglang-0.5.4.post2.dist-info}/RECORD +194 -175
 - sglang/srt/models/vila.py +0 -306
 - /sglang/test/{kit_matched_stop.py → kits/matched_stop_kit.py} +0 -0
 - {sglang-0.5.4.dist-info → sglang-0.5.4.post2.dist-info}/WHEEL +0 -0
 - {sglang-0.5.4.dist-info → sglang-0.5.4.post2.dist-info}/licenses/LICENSE +0 -0
 - {sglang-0.5.4.dist-info → sglang-0.5.4.post2.dist-info}/top_level.txt +0 -0
 
| 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Metadata-Version: 2.4
         
     | 
| 
       2 
2 
     | 
    
         
             
            Name: sglang
         
     | 
| 
       3 
     | 
    
         
            -
            Version: 0.5.4
         
     | 
| 
      
 3 
     | 
    
         
            +
            Version: 0.5.4.post2
         
     | 
| 
       4 
4 
     | 
    
         
             
            Summary: SGLang is a fast serving framework for large language models and vision language models.
         
     | 
| 
       5 
5 
     | 
    
         
             
            License:                                  Apache License
         
     | 
| 
       6 
6 
     | 
    
         
             
                                               Version 2.0, January 2004
         
     | 
| 
         @@ -223,6 +223,7 @@ Requires-Dist: datasets 
     | 
|
| 
       223 
223 
     | 
    
         
             
            Requires-Dist: einops
         
     | 
| 
       224 
224 
     | 
    
         
             
            Requires-Dist: fastapi
         
     | 
| 
       225 
225 
     | 
    
         
             
            Requires-Dist: flashinfer_python==0.4.1
         
     | 
| 
      
 226 
     | 
    
         
            +
            Requires-Dist: gguf
         
     | 
| 
       226 
227 
     | 
    
         
             
            Requires-Dist: hf_transfer
         
     | 
| 
       227 
228 
     | 
    
         
             
            Requires-Dist: huggingface_hub
         
     | 
| 
       228 
229 
     | 
    
         
             
            Requires-Dist: interegular
         
     | 
| 
         @@ -233,7 +234,7 @@ Requires-Dist: ninja 
     | 
|
| 
       233 
234 
     | 
    
         
             
            Requires-Dist: numpy
         
     | 
| 
       234 
235 
     | 
    
         
             
            Requires-Dist: nvidia-cutlass-dsl==4.2.1
         
     | 
| 
       235 
236 
     | 
    
         
             
            Requires-Dist: openai-harmony==0.0.4
         
     | 
| 
       236 
     | 
    
         
            -
            Requires-Dist: openai== 
     | 
| 
      
 237 
     | 
    
         
            +
            Requires-Dist: openai==2.6.1
         
     | 
| 
       237 
238 
     | 
    
         
             
            Requires-Dist: orjson
         
     | 
| 
       238 
239 
     | 
    
         
             
            Requires-Dist: outlines==0.1.11
         
     | 
| 
       239 
240 
     | 
    
         
             
            Requires-Dist: packaging
         
     | 
| 
         @@ -251,15 +252,15 @@ Requires-Dist: requests 
     | 
|
| 
       251 
252 
     | 
    
         
             
            Requires-Dist: scipy
         
     | 
| 
       252 
253 
     | 
    
         
             
            Requires-Dist: sentencepiece
         
     | 
| 
       253 
254 
     | 
    
         
             
            Requires-Dist: setproctitle
         
     | 
| 
       254 
     | 
    
         
            -
            Requires-Dist: sgl-kernel==0.3.16. 
     | 
| 
      
 255 
     | 
    
         
            +
            Requires-Dist: sgl-kernel==0.3.16.post4
         
     | 
| 
       255 
256 
     | 
    
         
             
            Requires-Dist: soundfile==0.13.1
         
     | 
| 
       256 
257 
     | 
    
         
             
            Requires-Dist: tiktoken
         
     | 
| 
       257 
258 
     | 
    
         
             
            Requires-Dist: timm==1.0.16
         
     | 
| 
       258 
     | 
    
         
            -
            Requires-Dist: torch==2.8.0
         
     | 
| 
       259 
259 
     | 
    
         
             
            Requires-Dist: torch_memory_saver==0.0.9
         
     | 
| 
       260 
     | 
    
         
            -
            Requires-Dist:  
     | 
| 
      
 260 
     | 
    
         
            +
            Requires-Dist: torch==2.8.0
         
     | 
| 
       261 
261 
     | 
    
         
             
            Requires-Dist: torchaudio==2.8.0
         
     | 
| 
       262 
262 
     | 
    
         
             
            Requires-Dist: torchvision
         
     | 
| 
      
 263 
     | 
    
         
            +
            Requires-Dist: torchao==0.9.0
         
     | 
| 
       263 
264 
     | 
    
         
             
            Requires-Dist: tqdm
         
     | 
| 
       264 
265 
     | 
    
         
             
            Requires-Dist: transformers==4.57.1
         
     | 
| 
       265 
266 
     | 
    
         
             
            Requires-Dist: uvicorn
         
     | 
| 
         @@ -269,12 +270,11 @@ Requires-Dist: grpcio==1.75.1 
     | 
|
| 
       269 
270 
     | 
    
         
             
            Requires-Dist: grpcio-tools==1.75.1
         
     | 
| 
       270 
271 
     | 
    
         
             
            Requires-Dist: grpcio-reflection==1.75.1
         
     | 
| 
       271 
272 
     | 
    
         
             
            Requires-Dist: grpcio-health-checking==1.75.1
         
     | 
| 
       272 
     | 
    
         
            -
            Provides-Extra:  
     | 
| 
       273 
     | 
    
         
            -
            Requires-Dist:  
     | 
| 
      
 273 
     | 
    
         
            +
            Provides-Extra: checkpoint-engine
         
     | 
| 
      
 274 
     | 
    
         
            +
            Requires-Dist: checkpoint-engine==0.1.2; extra == "checkpoint-engine"
         
     | 
| 
       274 
275 
     | 
    
         
             
            Provides-Extra: test
         
     | 
| 
       275 
276 
     | 
    
         
             
            Requires-Dist: accelerate; extra == "test"
         
     | 
| 
       276 
277 
     | 
    
         
             
            Requires-Dist: expecttest; extra == "test"
         
     | 
| 
       277 
     | 
    
         
            -
            Requires-Dist: gguf; extra == "test"
         
     | 
| 
       278 
278 
     | 
    
         
             
            Requires-Dist: jsonlines; extra == "test"
         
     | 
| 
       279 
279 
     | 
    
         
             
            Requires-Dist: matplotlib; extra == "test"
         
     | 
| 
       280 
280 
     | 
    
         
             
            Requires-Dist: pandas; extra == "test"
         
     | 
| 
         @@ -282,28 +282,13 @@ Requires-Dist: peft; extra == "test" 
     | 
|
| 
       282 
282 
     | 
    
         
             
            Requires-Dist: pytest; extra == "test"
         
     | 
| 
       283 
283 
     | 
    
         
             
            Requires-Dist: sentence_transformers; extra == "test"
         
     | 
| 
       284 
284 
     | 
    
         
             
            Requires-Dist: tabulate; extra == "test"
         
     | 
| 
       285 
     | 
    
         
            -
            Provides-Extra: checkpoint-engine
         
     | 
| 
       286 
     | 
    
         
            -
            Requires-Dist: checkpoint-engine==0.1.2; extra == "checkpoint-engine"
         
     | 
| 
       287 
     | 
    
         
            -
            Provides-Extra: all
         
     | 
| 
       288 
285 
     | 
    
         
             
            Provides-Extra: dev
         
     | 
| 
       289 
286 
     | 
    
         
             
            Requires-Dist: sglang[test]; extra == "dev"
         
     | 
| 
       290 
     | 
    
         
            -
            Provides-Extra: cu130
         
     | 
| 
       291 
     | 
    
         
            -
            Requires-Dist: torch==2.9.0; extra == "cu130"
         
     | 
| 
       292 
     | 
    
         
            -
            Requires-Dist: torchaudio==2.9.0; extra == "cu130"
         
     | 
| 
       293 
     | 
    
         
            -
            Requires-Dist: torchvision==0.24.0; extra == "cu130"
         
     | 
| 
       294 
     | 
    
         
            -
            Provides-Extra: cu130-all
         
     | 
| 
       295 
     | 
    
         
            -
            Requires-Dist: sglang[test]; extra == "cu130-all"
         
     | 
| 
       296 
     | 
    
         
            -
            Requires-Dist: sglang[decord]; extra == "cu130-all"
         
     | 
| 
       297 
     | 
    
         
            -
            Requires-Dist: sglang[cu130]; extra == "cu130-all"
         
     | 
| 
       298 
287 
     | 
    
         
             
            Provides-Extra: tracing
         
     | 
| 
       299 
288 
     | 
    
         
             
            Requires-Dist: opentelemetry-api; extra == "tracing"
         
     | 
| 
       300 
289 
     | 
    
         
             
            Requires-Dist: opentelemetry-exporter-otlp; extra == "tracing"
         
     | 
| 
       301 
290 
     | 
    
         
             
            Requires-Dist: opentelemetry-exporter-otlp-proto-grpc; extra == "tracing"
         
     | 
| 
       302 
291 
     | 
    
         
             
            Requires-Dist: opentelemetry-sdk; extra == "tracing"
         
     | 
| 
       303 
     | 
    
         
            -
            Provides-Extra: blackwell
         
     | 
| 
       304 
     | 
    
         
            -
            Requires-Dist: sglang[dev]; extra == "blackwell"
         
     | 
| 
       305 
     | 
    
         
            -
            Provides-Extra: blackwell-aarch64
         
     | 
| 
       306 
     | 
    
         
            -
            Requires-Dist: sglang[dev]; extra == "blackwell-aarch64"
         
     | 
| 
       307 
292 
     | 
    
         
             
            Dynamic: license-file
         
     | 
| 
       308 
293 
     | 
    
         | 
| 
       309 
294 
     | 
    
         
             
            <div align="center" id="sglangtop">
         
     | 
| 
         @@ -320,7 +305,7 @@ Dynamic: license-file 
     | 
|
| 
       320 
305 
     | 
    
         | 
| 
       321 
306 
     | 
    
         
             
            --------------------------------------------------------------------------------
         
     | 
| 
       322 
307 
     | 
    
         | 
| 
       323 
     | 
    
         
            -
            | [**Blog**](https://lmsys.org/blog/ 
     | 
| 
      
 308 
     | 
    
         
            +
            | [**Blog**](https://lmsys.org/blog/)
         
     | 
| 
       324 
309 
     | 
    
         
             
            | [**Documentation**](https://docs.sglang.ai/)
         
     | 
| 
       325 
310 
     | 
    
         
             
            | [**Join Slack**](https://slack.sglang.ai/)
         
     | 
| 
       326 
311 
     | 
    
         
             
            | [**Join Bi-Weekly Development Meeting**](https://meeting.sglang.ai/)
         
     | 
| 
         @@ -328,13 +313,14 @@ Dynamic: license-file 
     | 
|
| 
       328 
313 
     | 
    
         
             
            | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
         
     | 
| 
       329 
314 
     | 
    
         | 
| 
       330 
315 
     | 
    
         
             
            ## News
         
     | 
| 
      
 316 
     | 
    
         
            +
            - [2025/10] 🔥 SGLang now runs natively on TPU with the SGLang-Jax backend ([blog](https://lmsys.org/blog/2025-10-29-sglang-jax/)).
         
     | 
| 
      
 317 
     | 
    
         
            +
            - [2025/10] AMD AI Dev Day 2025 SGLang ([slide](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/sglang_amd_ai_devday_2025.pdf)), PyTorch Conference 2025 SGLang ([slide](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/sglang_pytorch_2025.pdf)).
         
     | 
| 
       331 
318 
     | 
    
         
             
            - [2025/09] 🔥 Deploying DeepSeek on GB200 NVL72 with PD and Large Scale EP (Part II): 3.8x Prefill, 4.8x Decode Throughput ([blog](https://lmsys.org/blog/2025-09-25-gb200-part-2/)).
         
     | 
| 
       332 
     | 
    
         
            -
            - [2025/09]  
     | 
| 
       333 
     | 
    
         
            -
            - [2025/08]  
     | 
| 
      
 319 
     | 
    
         
            +
            - [2025/09] SGLang Day 0 Support for DeepSeek-V3.2 with Sparse Attention ([blog](https://lmsys.org/blog/2025-09-29-deepseek-V32/)).
         
     | 
| 
      
 320 
     | 
    
         
            +
            - [2025/08] SGLang x AMD SF Meetup on 8/22: Hands-on GPU workshop, tech talks by AMD/xAI/SGLang, and networking ([Roadmap](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_roadmap.pdf), [Large-scale EP](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_sglang_ep.pdf), [Highlights](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_highlights.pdf), [AITER/MoRI](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_aiter_mori.pdf), [Wave](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_meetup_wave.pdf)).
         
     | 
| 
       334 
321 
     | 
    
         
             
            - [2025/08] SGLang provides day-0 support for OpenAI gpt-oss model ([instructions](https://github.com/sgl-project/sglang/issues/8833))
         
     | 
| 
       335 
322 
     | 
    
         
             
            - [2025/05] Deploying DeepSeek with PD Disaggregation and Large-scale Expert Parallelism on 96 H100 GPUs ([blog](https://lmsys.org/blog/2025-05-05-large-scale-ep/)).
         
     | 
| 
       336 
323 
     | 
    
         
             
            - [2025/03] SGLang Joins PyTorch Ecosystem: Efficient LLM Serving Engine ([PyTorch blog](https://pytorch.org/blog/sglang-joins-pytorch/))
         
     | 
| 
       337 
     | 
    
         
            -
            - [2024/12] v0.4 Release: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
         
     | 
| 
       338 
324 
     | 
    
         | 
| 
       339 
325 
     | 
    
         
             
            <details>
         
     | 
| 
       340 
326 
     | 
    
         
             
            <summary>More</summary>
         
     | 
| 
         @@ -344,6 +330,7 @@ Dynamic: license-file 
     | 
|
| 
       344 
330 
     | 
    
         
             
            - [2025/03] Supercharge DeepSeek-R1 Inference on AMD Instinct MI300X ([AMD blog](https://rocm.blogs.amd.com/artificial-intelligence/DeepSeekR1-Part2/README.html))
         
     | 
| 
       345 
331 
     | 
    
         
             
            - [2025/02] Unlock DeepSeek-R1 Inference Performance on AMD Instinct™ MI300X GPU ([AMD blog](https://rocm.blogs.amd.com/artificial-intelligence/DeepSeekR1_Perf/README.html))
         
     | 
| 
       346 
332 
     | 
    
         
             
            - [2025/01] SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with DeepSeek-specific optimizations. ([instructions](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3), [AMD blog](https://www.amd.com/en/developer/resources/technical-articles/amd-instinct-gpus-power-deepseek-v3-revolutionizing-ai-development-with-sglang.html), [10+ other companies](https://x.com/lmsysorg/status/1887262321636221412))
         
     | 
| 
      
 333 
     | 
    
         
            +
            - [2024/12] v0.4 Release: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
         
     | 
| 
       347 
334 
     | 
    
         
             
            - [2024/10] The First SGLang Online Meetup ([slides](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#the-first-sglang-online-meetup)).
         
     | 
| 
       348 
335 
     | 
    
         
             
            - [2024/09] v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
         
     | 
| 
       349 
336 
     | 
    
         
             
            - [2024/07] v0.2 Release: Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
         
     | 
| 
         @@ -384,7 +371,7 @@ SGLang is currently hosted under the non-profit open-source organization [LMSYS] 
     | 
|
| 
       384 
371 
     | 
    
         
             
            <img src="https://raw.githubusercontent.com/sgl-project/sgl-learning-materials/refs/heads/main/slides/adoption.png" alt="logo" width="800" margin="10px"></img>
         
     | 
| 
       385 
372 
     | 
    
         | 
| 
       386 
373 
     | 
    
         
             
            ## Contact Us
         
     | 
| 
       387 
     | 
    
         
            -
            For enterprises interested in adopting or deploying SGLang at scale, including technical consulting, sponsorship opportunities, or partnership inquiries, please contact us at  
     | 
| 
      
 374 
     | 
    
         
            +
            For enterprises interested in adopting or deploying SGLang at scale, including technical consulting, sponsorship opportunities, or partnership inquiries, please contact us at sglang@lmsys.org
         
     | 
| 
       388 
375 
     | 
    
         | 
| 
       389 
376 
     | 
    
         
             
            ## Acknowledgment
         
     | 
| 
       390 
377 
     | 
    
         
             
            We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
         
     |