sglang 0.5.2rc2__py3-none-any.whl → 0.5.3rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_one_batch.py +7 -9
- sglang/bench_one_batch_server.py +330 -31
- sglang/bench_serving.py +267 -32
- sglang/global_config.py +2 -2
- sglang/lang/backend/runtime_endpoint.py +1 -1
- sglang/launch_server.py +14 -0
- sglang/profiler.py +2 -2
- sglang/srt/batch_invariant_ops/__init__.py +27 -0
- sglang/srt/batch_invariant_ops/batch_invariant_ops.py +549 -0
- sglang/srt/configs/__init__.py +8 -0
- sglang/srt/configs/device_config.py +3 -1
- sglang/srt/configs/dots_ocr.py +64 -0
- sglang/srt/configs/dots_vlm.py +139 -0
- sglang/srt/configs/falcon_h1.py +360 -0
- sglang/srt/configs/load_config.py +9 -0
- sglang/srt/configs/model_config.py +181 -82
- sglang/srt/configs/qwen3_next.py +326 -0
- sglang/srt/configs/qwen3_vl.py +586 -0
- sglang/srt/connector/__init__.py +8 -1
- sglang/srt/connector/remote_instance.py +82 -0
- sglang/srt/constrained/base_grammar_backend.py +49 -12
- sglang/srt/constrained/llguidance_backend.py +0 -1
- sglang/srt/constrained/outlines_backend.py +0 -1
- sglang/srt/constrained/outlines_jump_forward.py +1 -1
- sglang/srt/constrained/xgrammar_backend.py +30 -9
- sglang/srt/custom_op.py +11 -1
- sglang/srt/debug_utils/dump_comparator.py +81 -44
- sglang/srt/debug_utils/dump_loader.py +97 -0
- sglang/srt/debug_utils/dumper.py +21 -6
- sglang/srt/debug_utils/text_comparator.py +73 -11
- sglang/srt/disaggregation/ascend/conn.py +2 -2
- sglang/srt/disaggregation/ascend/transfer_engine.py +47 -9
- sglang/srt/disaggregation/base/conn.py +1 -1
- sglang/srt/disaggregation/common/conn.py +279 -108
- sglang/srt/disaggregation/decode.py +71 -19
- sglang/srt/disaggregation/decode_kvcache_offload_manager.py +185 -0
- sglang/srt/disaggregation/decode_schedule_batch_mixin.py +29 -17
- sglang/srt/disaggregation/fake/conn.py +1 -1
- sglang/srt/disaggregation/mini_lb.py +6 -445
- sglang/srt/disaggregation/mooncake/conn.py +55 -537
- sglang/srt/disaggregation/nixl/conn.py +326 -53
- sglang/srt/disaggregation/prefill.py +36 -17
- sglang/srt/disaggregation/utils.py +40 -54
- sglang/srt/distributed/device_communicators/all_reduce_utils.py +16 -0
- sglang/srt/distributed/device_communicators/shm_broadcast.py +4 -2
- sglang/srt/distributed/device_communicators/symm_mem.py +164 -0
- sglang/srt/distributed/parallel_state.py +156 -80
- sglang/srt/entrypoints/engine.py +59 -18
- sglang/srt/entrypoints/grpc_request_manager.py +855 -0
- sglang/srt/entrypoints/grpc_server.py +810 -0
- sglang/srt/entrypoints/http_server.py +130 -59
- sglang/srt/entrypoints/openai/protocol.py +112 -4
- sglang/srt/entrypoints/openai/serving_base.py +65 -3
- sglang/srt/entrypoints/openai/serving_chat.py +204 -55
- sglang/srt/entrypoints/openai/serving_completions.py +14 -3
- sglang/srt/entrypoints/openai/serving_embedding.py +9 -3
- sglang/srt/entrypoints/openai/serving_rerank.py +3 -1
- sglang/srt/entrypoints/openai/serving_responses.py +48 -3
- sglang/srt/entrypoints/openai/serving_score.py +1 -0
- sglang/srt/environ.py +285 -0
- sglang/srt/eplb/eplb_manager.py +2 -2
- sglang/srt/eplb/expert_distribution.py +26 -13
- sglang/srt/eplb/expert_location.py +38 -8
- sglang/srt/eplb/expert_location_updater.py +1 -1
- sglang/srt/function_call/base_format_detector.py +3 -6
- sglang/srt/function_call/ebnf_composer.py +11 -9
- sglang/srt/function_call/function_call_parser.py +9 -2
- sglang/srt/function_call/glm4_moe_detector.py +4 -4
- sglang/srt/function_call/gpt_oss_detector.py +23 -0
- sglang/srt/function_call/json_array_parser.py +63 -0
- sglang/srt/function_call/kimik2_detector.py +17 -4
- sglang/srt/function_call/qwen3_coder_detector.py +1 -1
- sglang/srt/function_call/utils.py +96 -5
- sglang/srt/grpc/__init__.py +1 -0
- sglang/srt/grpc/compile_proto.py +245 -0
- sglang/srt/grpc/sglang_scheduler_pb2.py +111 -0
- sglang/srt/grpc/sglang_scheduler_pb2.pyi +434 -0
- sglang/srt/grpc/sglang_scheduler_pb2_grpc.py +239 -0
- sglang/srt/layers/activation.py +143 -9
- sglang/srt/layers/attention/aiter_backend.py +14 -15
- sglang/srt/layers/attention/ascend_backend.py +115 -9
- sglang/srt/layers/attention/attention_registry.py +206 -0
- sglang/srt/layers/attention/base_attn_backend.py +12 -3
- sglang/srt/layers/attention/cutlass_mla_backend.py +3 -3
- sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +1 -1
- sglang/srt/layers/attention/fla/chunk.py +242 -0
- sglang/srt/layers/attention/fla/chunk_delta_h.py +314 -0
- sglang/srt/layers/attention/fla/chunk_o.py +178 -0
- sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py +151 -0
- sglang/srt/layers/attention/fla/cumsum.py +300 -0
- sglang/srt/layers/attention/fla/fused_recurrent.py +640 -0
- sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py +232 -0
- sglang/srt/layers/attention/fla/index.py +37 -0
- sglang/srt/layers/attention/fla/l2norm.py +150 -0
- sglang/srt/layers/attention/fla/layernorm_gated.py +326 -0
- sglang/srt/layers/attention/fla/op.py +66 -0
- sglang/srt/layers/attention/fla/solve_tril.py +465 -0
- sglang/srt/layers/attention/fla/utils.py +331 -0
- sglang/srt/layers/attention/fla/wy_fast.py +158 -0
- sglang/srt/layers/attention/flashattention_backend.py +41 -8
- sglang/srt/layers/attention/flashinfer_backend.py +118 -198
- sglang/srt/layers/attention/flashinfer_mla_backend.py +27 -27
- sglang/srt/layers/attention/flashmla_backend.py +7 -5
- sglang/srt/layers/attention/hybrid_attn_backend.py +68 -53
- sglang/srt/layers/attention/hybrid_linear_attn_backend.py +602 -0
- sglang/srt/layers/attention/intel_amx_backend.py +3 -0
- sglang/srt/layers/attention/mamba/causal_conv1d.py +129 -0
- sglang/srt/layers/attention/mamba/causal_conv1d_triton.py +969 -0
- sglang/srt/layers/attention/mamba/mamba.py +629 -0
- sglang/srt/layers/attention/mamba/mamba_utils.py +81 -0
- sglang/srt/layers/attention/mamba/ops/__init__.py +2 -0
- sglang/srt/layers/attention/mamba/ops/layernorm_gated.py +172 -0
- sglang/srt/layers/attention/mamba/ops/mamba_ssm.py +442 -0
- sglang/srt/layers/attention/mamba/ops/ssd_bmm.py +264 -0
- sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py +622 -0
- sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py +757 -0
- sglang/srt/layers/attention/mamba/ops/ssd_combined.py +262 -0
- sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py +275 -0
- sglang/srt/layers/attention/npu_ops/mla_preprocess.py +393 -0
- sglang/srt/layers/attention/nsa/dequant_k_cache.py +163 -0
- sglang/srt/layers/attention/nsa/index_buf_accessor.py +354 -0
- sglang/srt/layers/attention/nsa/nsa_indexer.py +761 -0
- sglang/srt/layers/attention/nsa/quant_k_cache.py +255 -0
- sglang/srt/layers/attention/nsa/tilelang_kernel.py +785 -0
- sglang/srt/layers/attention/nsa/transform_index.py +144 -0
- sglang/srt/layers/attention/nsa/utils.py +24 -0
- sglang/srt/layers/attention/nsa_backend.py +887 -0
- sglang/srt/layers/attention/tbo_backend.py +6 -6
- sglang/srt/layers/attention/torch_flex_backend.py +325 -0
- sglang/srt/layers/attention/torch_native_backend.py +12 -6
- sglang/srt/layers/attention/triton_backend.py +57 -7
- sglang/srt/layers/attention/trtllm_mha_backend.py +5 -7
- sglang/srt/layers/attention/trtllm_mla_backend.py +276 -39
- sglang/srt/layers/attention/vision.py +58 -0
- sglang/srt/layers/attention/wave_backend.py +4 -4
- sglang/srt/layers/attention/wave_ops/decode_attention.py +2 -4
- sglang/srt/layers/attention/wave_ops/extend_attention.py +1 -3
- sglang/srt/layers/communicator.py +8 -0
- sglang/srt/layers/dp_attention.py +41 -2
- sglang/srt/layers/elementwise.py +3 -1
- sglang/srt/layers/layernorm.py +34 -15
- sglang/srt/layers/linear.py +55 -7
- sglang/srt/layers/logits_processor.py +44 -12
- sglang/srt/layers/moe/__init__.py +2 -1
- sglang/srt/layers/moe/cutlass_w4a8_moe.py +3 -3
- sglang/srt/layers/moe/ep_moe/kernels.py +2 -2
- sglang/srt/layers/moe/ep_moe/layer.py +256 -63
- sglang/srt/layers/moe/flashinfer_cutedsl_moe.py +183 -0
- sglang/srt/layers/moe/fused_moe_native.py +5 -3
- sglang/srt/layers/moe/fused_moe_triton/configs/{triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json } +35 -35
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=352,device_name=NVIDIA_RTX_5880_Ada_Generation,dtype=fp8_w8a8.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=512,device_name=NVIDIA_H20.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_B200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +5 -2
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +7 -3
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +23 -20
- sglang/srt/layers/moe/fused_moe_triton/layer.py +71 -70
- sglang/srt/layers/moe/moe_runner/__init__.py +2 -1
- sglang/srt/layers/moe/moe_runner/base.py +274 -1
- sglang/srt/layers/moe/moe_runner/runner.py +80 -0
- sglang/srt/layers/moe/moe_runner/triton.py +448 -0
- sglang/srt/layers/moe/token_dispatcher/__init__.py +16 -4
- sglang/srt/layers/moe/token_dispatcher/{base_dispatcher.py → base.py} +67 -17
- sglang/srt/layers/moe/token_dispatcher/deepep.py +118 -56
- sglang/srt/layers/moe/token_dispatcher/standard.py +44 -2
- sglang/srt/layers/moe/topk.py +30 -9
- sglang/srt/layers/moe/utils.py +22 -6
- sglang/srt/layers/parameter.py +23 -6
- sglang/srt/layers/quantization/awq.py +19 -7
- sglang/srt/layers/quantization/base_config.py +11 -6
- sglang/srt/layers/quantization/blockwise_int8.py +38 -27
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +1 -0
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +50 -30
- sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +2 -0
- sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +13 -1
- sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +173 -0
- sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +2 -10
- sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py +27 -0
- sglang/srt/layers/quantization/fp8.py +78 -49
- sglang/srt/layers/quantization/fp8_utils.py +51 -32
- sglang/srt/layers/quantization/gptq.py +25 -17
- sglang/srt/layers/quantization/modelopt_quant.py +190 -55
- sglang/srt/layers/quantization/moe_wna16.py +21 -18
- sglang/srt/layers/quantization/mxfp4.py +74 -42
- sglang/srt/layers/quantization/quark/quark_moe.py +48 -30
- sglang/srt/layers/quantization/unquant.py +135 -47
- sglang/srt/layers/quantization/w4afp8.py +26 -17
- sglang/srt/layers/quantization/w8a8_fp8.py +35 -20
- sglang/srt/layers/quantization/w8a8_int8.py +91 -41
- sglang/srt/layers/rotary_embedding.py +78 -31
- sglang/srt/layers/sampler.py +213 -21
- sglang/srt/layers/utils.py +23 -0
- sglang/srt/lora/backend/base_backend.py +50 -8
- sglang/srt/lora/backend/chunked_backend.py +348 -0
- sglang/srt/lora/backend/triton_backend.py +99 -5
- sglang/srt/lora/layers.py +32 -0
- sglang/srt/lora/lora.py +8 -3
- sglang/srt/lora/lora_manager.py +52 -118
- sglang/srt/lora/mem_pool.py +25 -11
- sglang/srt/lora/triton_ops/__init__.py +4 -0
- sglang/srt/lora/triton_ops/chunked_sgmv_expand.py +214 -0
- sglang/srt/lora/triton_ops/chunked_sgmv_shrink.py +174 -0
- sglang/srt/lora/utils.py +22 -11
- sglang/srt/managers/async_dynamic_batch_tokenizer.py +170 -0
- sglang/srt/managers/cache_controller.py +199 -301
- sglang/srt/managers/data_parallel_controller.py +115 -80
- sglang/srt/managers/detokenizer_manager.py +19 -15
- sglang/srt/managers/disagg_service.py +46 -0
- sglang/srt/managers/io_struct.py +340 -109
- sglang/srt/managers/mm_utils.py +44 -6
- sglang/srt/managers/multi_tokenizer_mixin.py +357 -407
- sglang/srt/managers/multimodal_processor.py +1 -2
- sglang/srt/managers/overlap_utils.py +53 -0
- sglang/srt/managers/schedule_batch.py +240 -138
- sglang/srt/managers/schedule_policy.py +144 -17
- sglang/srt/managers/scheduler.py +502 -209
- sglang/srt/managers/scheduler_input_blocker.py +1 -1
- sglang/srt/managers/scheduler_metrics_mixin.py +99 -126
- sglang/srt/managers/scheduler_output_processor_mixin.py +75 -22
- sglang/srt/managers/scheduler_profiler_mixin.py +6 -6
- sglang/srt/managers/scheduler_update_weights_mixin.py +7 -0
- sglang/srt/managers/tokenizer_communicator_mixin.py +675 -0
- sglang/srt/managers/tokenizer_manager.py +320 -632
- sglang/srt/managers/tp_worker.py +81 -22
- sglang/srt/managers/tp_worker_overlap_thread.py +71 -56
- sglang/srt/managers/utils.py +1 -45
- sglang/srt/mem_cache/allocator.py +14 -20
- sglang/srt/mem_cache/allocator_ascend.py +41 -27
- sglang/srt/mem_cache/base_prefix_cache.py +1 -1
- sglang/srt/mem_cache/chunk_cache.py +8 -1
- sglang/srt/mem_cache/evict_policy.py +23 -0
- sglang/srt/mem_cache/hicache_storage.py +43 -24
- sglang/srt/mem_cache/hiradix_cache.py +222 -75
- sglang/srt/mem_cache/memory_pool.py +535 -58
- sglang/srt/mem_cache/memory_pool_host.py +239 -228
- sglang/srt/mem_cache/radix_cache.py +222 -73
- sglang/srt/mem_cache/radix_cache_cpp.py +11 -8
- sglang/srt/mem_cache/storage/__init__.py +10 -0
- sglang/srt/mem_cache/storage/aibrix_kvcache/aibrix_kvcache_storage.py +151 -0
- sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py +109 -0
- sglang/srt/mem_cache/storage/backend_factory.py +223 -0
- sglang/srt/mem_cache/storage/eic/eic_storage.py +778 -0
- sglang/srt/mem_cache/storage/eic/test_unit.py +115 -0
- sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py +164 -0
- sglang/srt/mem_cache/storage/hf3fs/{client_hf3fs.py → hf3fs_usrbio_client.py} +5 -1
- sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +259 -62
- sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py +284 -0
- sglang/srt/mem_cache/storage/lmcache/unit_test.py +121 -0
- sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +166 -17
- sglang/srt/mem_cache/swa_radix_cache.py +25 -36
- sglang/srt/metrics/collector.py +511 -132
- sglang/srt/metrics/func_timer.py +2 -7
- sglang/srt/metrics/startup_func_log_and_timer.py +150 -0
- sglang/srt/metrics/utils.py +8 -1
- sglang/srt/model_executor/cpu_graph_runner.py +640 -0
- sglang/srt/model_executor/cuda_graph_runner.py +52 -37
- sglang/srt/model_executor/forward_batch_info.py +82 -40
- sglang/srt/model_executor/model_runner.py +432 -157
- sglang/srt/model_executor/npu_graph_runner.py +12 -5
- sglang/srt/model_loader/__init__.py +9 -3
- sglang/srt/model_loader/loader.py +133 -5
- sglang/srt/model_loader/remote_instance_weight_loader_utils.py +69 -0
- sglang/srt/model_loader/weight_utils.py +158 -3
- sglang/srt/models/apertus.py +686 -0
- sglang/srt/models/bailing_moe.py +820 -217
- sglang/srt/models/bailing_moe_nextn.py +168 -0
- sglang/srt/models/deepseek_nextn.py +6 -1
- sglang/srt/models/deepseek_v2.py +607 -130
- sglang/srt/models/dots_ocr.py +173 -0
- sglang/srt/models/dots_vlm.py +174 -0
- sglang/srt/models/dots_vlm_vit.py +337 -0
- sglang/srt/models/ernie4.py +1 -1
- sglang/srt/models/falcon_h1.py +576 -0
- sglang/srt/models/gemma3_causal.py +0 -2
- sglang/srt/models/gemma3_mm.py +1 -1
- sglang/srt/models/gemma3n_mm.py +2 -2
- sglang/srt/models/glm4_moe.py +4 -4
- sglang/srt/models/glm4_moe_nextn.py +2 -2
- sglang/srt/models/glm4v.py +5 -3
- sglang/srt/models/glm4v_moe.py +4 -1
- sglang/srt/models/gpt_oss.py +8 -31
- sglang/srt/models/kimi_vl_moonvit.py +2 -2
- sglang/srt/models/llama.py +4 -0
- sglang/srt/models/llama4.py +9 -0
- sglang/srt/models/llama_eagle3.py +13 -0
- sglang/srt/models/longcat_flash.py +3 -3
- sglang/srt/models/longcat_flash_nextn.py +1 -1
- sglang/srt/models/mllama4.py +40 -4
- sglang/srt/models/opt.py +637 -0
- sglang/srt/models/qwen2_5_vl.py +29 -5
- sglang/srt/models/qwen2_audio.py +1 -1
- sglang/srt/models/qwen2_moe.py +120 -13
- sglang/srt/models/qwen2_vl.py +1 -1
- sglang/srt/models/qwen3.py +18 -3
- sglang/srt/models/qwen3_moe.py +32 -4
- sglang/srt/models/qwen3_next.py +1069 -0
- sglang/srt/models/qwen3_next_mtp.py +112 -0
- sglang/srt/models/qwen3_vl.py +787 -0
- sglang/srt/models/qwen3_vl_moe.py +471 -0
- sglang/srt/models/registry.py +15 -3
- sglang/srt/models/sarashina2_vision.py +269 -0
- sglang/srt/models/solar.py +505 -0
- sglang/srt/models/starcoder2.py +357 -0
- sglang/srt/models/step3_vl.py +1 -1
- sglang/srt/models/torch_native_llama.py +9 -2
- sglang/srt/models/utils.py +51 -0
- sglang/srt/multimodal/processors/base_processor.py +15 -7
- sglang/srt/multimodal/processors/dots_vlm.py +98 -0
- sglang/srt/multimodal/processors/glm4v.py +9 -9
- sglang/srt/multimodal/processors/internvl.py +153 -129
- sglang/srt/multimodal/processors/qwen_vl.py +23 -6
- sglang/srt/multimodal/processors/sarashina2_vision.py +81 -0
- sglang/srt/offloader.py +27 -3
- sglang/srt/parser/jinja_template_utils.py +6 -0
- sglang/srt/sampling/sampling_batch_info.py +38 -17
- sglang/srt/sampling/sampling_params.py +7 -0
- sglang/srt/server_args.py +966 -267
- sglang/srt/server_args_config_parser.py +146 -0
- sglang/srt/single_batch_overlap.py +151 -0
- sglang/srt/speculative/cpp_ngram/ngram.cpp +374 -0
- sglang/srt/speculative/cpp_ngram/ngram.h +110 -0
- sglang/srt/speculative/cpp_ngram/ngram_cache.py +138 -0
- sglang/srt/speculative/cpp_ngram/ngram_cache_binding.cpp +43 -0
- sglang/srt/speculative/cpp_ngram/param.h +125 -0
- sglang/srt/speculative/cpp_ngram/queue.h +71 -0
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +7 -1
- sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +13 -2
- sglang/srt/speculative/{eagle_utils.py → eagle_info.py} +207 -757
- sglang/srt/speculative/eagle_worker.py +99 -28
- sglang/srt/speculative/ngram_utils.py +428 -0
- sglang/srt/speculative/ngram_worker.py +245 -0
- sglang/srt/speculative/spec_info.py +52 -0
- sglang/srt/speculative/spec_utils.py +606 -0
- sglang/srt/speculative/standalone_worker.py +109 -0
- sglang/srt/torch_memory_saver_adapter.py +5 -7
- sglang/srt/tracing/trace.py +578 -0
- sglang/srt/two_batch_overlap.py +8 -5
- sglang/srt/utils/__init__.py +2 -0
- sglang/srt/{utils.py → utils/common.py} +433 -77
- sglang/srt/{hf_transformers_utils.py → utils/hf_transformers_utils.py} +53 -5
- sglang/srt/{patch_torch.py → utils/patch_torch.py} +8 -0
- sglang/srt/utils/rpd_utils.py +452 -0
- sglang/srt/utils/slow_rank_detector.py +71 -0
- sglang/srt/warmup.py +8 -4
- sglang/srt/weight_sync/utils.py +2 -2
- sglang/test/attention/test_trtllm_mla_backend.py +169 -5
- sglang/test/get_logits_ut.py +57 -0
- sglang/test/run_eval.py +79 -11
- sglang/test/runners.py +5 -1
- sglang/test/simple_eval_common.py +5 -2
- sglang/test/simple_eval_mmmu_vlm.py +441 -0
- sglang/test/test_block_fp8.py +2 -2
- sglang/test/test_cutlass_moe.py +24 -6
- sglang/test/test_deterministic.py +297 -0
- sglang/test/test_disaggregation_utils.py +77 -0
- sglang/test/test_fp4_moe.py +370 -1
- sglang/test/test_programs.py +1 -1
- sglang/test/test_utils.py +383 -5
- sglang/utils.py +21 -1
- sglang/version.py +1 -1
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/METADATA +69 -124
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/RECORD +375 -245
- sglang/srt/disaggregation/launch_lb.py +0 -118
- sglang/srt/mem_cache/lora_radix_cache.py +0 -421
- /sglang/srt/{poll_based_barrier.py → utils/poll_based_barrier.py} +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/WHEEL +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/top_level.txt +0 -0
@@ -5,14 +5,21 @@ import logging
|
|
5
5
|
import os
|
6
6
|
import signal
|
7
7
|
import threading
|
8
|
+
import time
|
8
9
|
from abc import ABC, abstractmethod
|
9
10
|
from functools import wraps
|
10
11
|
from typing import Any, List, Optional, Tuple
|
11
12
|
|
12
13
|
import torch
|
13
14
|
|
14
|
-
from sglang.srt.mem_cache.hicache_storage import
|
15
|
-
|
15
|
+
from sglang.srt.mem_cache.hicache_storage import (
|
16
|
+
HiCacheStorage,
|
17
|
+
HiCacheStorageConfig,
|
18
|
+
HiCacheStorageExtraInfo,
|
19
|
+
)
|
20
|
+
from sglang.srt.mem_cache.memory_pool_host import HostKVCache
|
21
|
+
from sglang.srt.mem_cache.storage.hf3fs.hf3fs_client import Hf3fsClient
|
22
|
+
from sglang.srt.metrics.collector import StorageMetrics
|
16
23
|
|
17
24
|
logger = logging.getLogger(__name__)
|
18
25
|
|
@@ -112,6 +119,33 @@ def synchronized():
|
|
112
119
|
return _decorator
|
113
120
|
|
114
121
|
|
122
|
+
def create_hf3fs_client(
|
123
|
+
path: str, size: int, bytes_per_page: int, entries: int, use_mock: bool = False
|
124
|
+
) -> Hf3fsClient:
|
125
|
+
"""Factory function to create appropriate HF3FS client.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
path: File path for storage
|
129
|
+
size: Total size of storage file
|
130
|
+
bytes_per_page: Bytes per page
|
131
|
+
entries: Number of entries for batch operations
|
132
|
+
use_mock: Whether to use mock client instead of real usrbio client
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
"""
|
136
|
+
if use_mock:
|
137
|
+
from sglang.srt.mem_cache.storage.hf3fs.hf3fs_client import Hf3fsMockClient
|
138
|
+
|
139
|
+
logger.info(f"[Rank Using Hf3fsMockClient for testing")
|
140
|
+
return Hf3fsMockClient(path, size, bytes_per_page, entries)
|
141
|
+
else:
|
142
|
+
from sglang.srt.mem_cache.storage.hf3fs.hf3fs_usrbio_client import (
|
143
|
+
Hf3fsUsrBioClient,
|
144
|
+
)
|
145
|
+
|
146
|
+
return Hf3fsUsrBioClient(path, size, bytes_per_page, entries)
|
147
|
+
|
148
|
+
|
115
149
|
class HiCacheHF3FS(HiCacheStorage):
|
116
150
|
"""HiCache backend that stores KV cache pages in HF3FS files."""
|
117
151
|
|
@@ -129,12 +163,14 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
129
163
|
metadata_client: Hf3fsMetadataInterface,
|
130
164
|
is_mla_model: bool = False,
|
131
165
|
is_page_first_layout: bool = False,
|
166
|
+
use_mock_client: bool = False,
|
132
167
|
):
|
133
168
|
self.rank = rank
|
134
169
|
self.file_path = file_path
|
135
170
|
self.file_size = file_size
|
136
171
|
self.numjobs = numjobs
|
137
172
|
self.bytes_per_page = bytes_per_page
|
173
|
+
self.gb_per_page = bytes_per_page / (1 << 30)
|
138
174
|
self.entries = entries
|
139
175
|
self.dtype = dtype
|
140
176
|
self.metadata_client = metadata_client
|
@@ -147,17 +183,24 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
147
183
|
self.skip_backup = True
|
148
184
|
self.rank = 0
|
149
185
|
|
186
|
+
self.is_zero_copy = False
|
187
|
+
|
150
188
|
logger.info(
|
151
189
|
f"[Rank {self.rank}] HiCacheHF3FS Client Initializing: "
|
152
190
|
f"file_path={self.file_path}, "
|
153
191
|
f"file_size={self.file_size / (2 ** 30):.2f} GB, "
|
154
|
-
f"num_pages={self.num_pages}"
|
192
|
+
f"num_pages={self.num_pages}, "
|
193
|
+
f"is_mla_model={self.is_mla_model}"
|
155
194
|
)
|
156
195
|
|
157
196
|
self.ac = AtomicCounter(self.numjobs)
|
158
197
|
self.clients = [
|
159
|
-
|
160
|
-
self.file_path,
|
198
|
+
create_hf3fs_client(
|
199
|
+
self.file_path,
|
200
|
+
self.file_size,
|
201
|
+
self.bytes_per_page,
|
202
|
+
self.entries,
|
203
|
+
use_mock_client,
|
161
204
|
)
|
162
205
|
for _ in range(numjobs)
|
163
206
|
]
|
@@ -174,6 +217,11 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
174
217
|
signal.signal(signal.SIGTERM, lambda sig, frame: self.close())
|
175
218
|
signal.signal(signal.SIGQUIT, lambda sig, frame: self.close())
|
176
219
|
|
220
|
+
self.prefetch_pgs = []
|
221
|
+
self.backup_pgs = []
|
222
|
+
self.prefetch_bandwidth = []
|
223
|
+
self.backup_bandwidth = []
|
224
|
+
|
177
225
|
@staticmethod
|
178
226
|
def from_env_config(
|
179
227
|
bytes_per_page: int,
|
@@ -194,14 +242,24 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
194
242
|
Hf3fsLocalMetadataClient,
|
195
243
|
)
|
196
244
|
|
245
|
+
use_mock_client = False
|
197
246
|
if storage_config is not None:
|
198
247
|
rank, is_mla_model, is_page_first_layout = (
|
199
248
|
storage_config.tp_rank,
|
200
249
|
storage_config.is_mla_model,
|
201
250
|
storage_config.is_page_first_layout,
|
202
251
|
)
|
252
|
+
|
253
|
+
if storage_config.extra_config is not None:
|
254
|
+
use_mock_client = storage_config.extra_config.get(
|
255
|
+
"use_mock_hf3fs_client", False
|
256
|
+
)
|
203
257
|
else:
|
204
|
-
rank, is_mla_model, is_page_first_layout =
|
258
|
+
rank, is_mla_model, is_page_first_layout = (
|
259
|
+
0,
|
260
|
+
False,
|
261
|
+
False,
|
262
|
+
)
|
205
263
|
|
206
264
|
mla_unsupported_msg = f"MLA model is not supported without global metadata server, please refer to https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/mem_cache/storage/hf3fs/docs/deploy_sglang_3fs_multinode.md"
|
207
265
|
|
@@ -220,6 +278,7 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
220
278
|
dtype=dtype,
|
221
279
|
metadata_client=Hf3fsLocalMetadataClient(),
|
222
280
|
is_page_first_layout=is_page_first_layout,
|
281
|
+
use_mock_client=use_mock_client,
|
223
282
|
)
|
224
283
|
|
225
284
|
try:
|
@@ -269,27 +328,15 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
269
328
|
metadata_client=metadata_client,
|
270
329
|
is_mla_model=is_mla_model,
|
271
330
|
is_page_first_layout=is_page_first_layout,
|
331
|
+
use_mock_client=use_mock_client,
|
272
332
|
)
|
273
333
|
|
274
|
-
def get(
|
275
|
-
self,
|
276
|
-
key: str,
|
277
|
-
target_location: Optional[Any] = None,
|
278
|
-
target_sizes: Optional[Any] = None,
|
279
|
-
) -> torch.Tensor | None:
|
280
|
-
return self.batch_get(
|
281
|
-
[key],
|
282
|
-
[target_location] if target_location is not None else None,
|
283
|
-
[target_sizes] if target_sizes is not None else None,
|
284
|
-
)[0]
|
285
|
-
|
286
334
|
@synchronized()
|
287
|
-
def
|
335
|
+
def _batch_get(
|
288
336
|
self,
|
289
337
|
keys: List[str],
|
290
|
-
|
291
|
-
|
292
|
-
) -> List[torch.Tensor | None]:
|
338
|
+
values: List[torch.Tensor],
|
339
|
+
) -> List[bool]:
|
293
340
|
page_indices = self.metadata_client.get_page_indices(self.rank, keys)
|
294
341
|
|
295
342
|
batch_indices, file_offsets = [], []
|
@@ -298,15 +345,11 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
298
345
|
batch_indices.append(i)
|
299
346
|
file_offsets.append(page_index * self.bytes_per_page)
|
300
347
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
file_results = [
|
307
|
-
torch.empty(self.numel, dtype=self.dtype)
|
308
|
-
for _ in range(len(batch_indices))
|
309
|
-
]
|
348
|
+
for target_location in values:
|
349
|
+
assert target_location.is_contiguous()
|
350
|
+
file_results = values
|
351
|
+
|
352
|
+
start_time = time.perf_counter()
|
310
353
|
|
311
354
|
futures = [
|
312
355
|
self.executor.submit(
|
@@ -318,12 +361,17 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
318
361
|
]
|
319
362
|
read_results = [result for future in futures for result in future.result()]
|
320
363
|
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
364
|
+
end_time = time.perf_counter()
|
365
|
+
ionum = len(batch_indices)
|
366
|
+
self.prefetch_pgs.append(ionum)
|
367
|
+
self.prefetch_bandwidth.append(
|
368
|
+
ionum / (end_time - start_time) * self.gb_per_page
|
369
|
+
)
|
370
|
+
|
371
|
+
results = [False] * len(keys)
|
372
|
+
for batch_index, read_result in zip(batch_indices, read_results):
|
325
373
|
if read_result == self.bytes_per_page:
|
326
|
-
results[batch_index] =
|
374
|
+
results[batch_index] = True
|
327
375
|
else:
|
328
376
|
logger.error(
|
329
377
|
f"[Rank {self.rank}] HiCacheHF3FS get {keys[batch_index]} failed"
|
@@ -331,27 +379,12 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
331
379
|
|
332
380
|
return results
|
333
381
|
|
334
|
-
|
335
|
-
|
336
|
-
key: str,
|
337
|
-
value: Optional[Any] = None,
|
338
|
-
target_location: Optional[Any] = None,
|
339
|
-
target_sizes: Optional[Any] = None,
|
340
|
-
) -> bool:
|
341
|
-
return self.batch_set(
|
342
|
-
[key],
|
343
|
-
[value] if value is not None else None,
|
344
|
-
[target_location] if target_location is not None else None,
|
345
|
-
[target_sizes] if target_sizes is not None else None,
|
346
|
-
)
|
347
|
-
|
348
|
-
def batch_set(
|
382
|
+
@synchronized()
|
383
|
+
def _batch_set(
|
349
384
|
self,
|
350
385
|
keys: List[str],
|
351
386
|
values: Optional[Any] = None,
|
352
|
-
|
353
|
-
target_sizes: Optional[Any] = None,
|
354
|
-
) -> bool:
|
387
|
+
) -> List[bool]:
|
355
388
|
# In MLA backend, only one rank needs to backup the KV cache
|
356
389
|
if self.skip_backup:
|
357
390
|
return True
|
@@ -374,6 +407,8 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
374
407
|
assert value.is_contiguous()
|
375
408
|
file_values.append(value)
|
376
409
|
|
410
|
+
start_time = time.perf_counter()
|
411
|
+
|
377
412
|
futures = [
|
378
413
|
self.executor.submit(
|
379
414
|
self.clients[self.ac.next()].batch_write,
|
@@ -388,6 +423,11 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
388
423
|
for result in future.result()
|
389
424
|
]
|
390
425
|
|
426
|
+
end_time = time.perf_counter()
|
427
|
+
ionum = len(batch_indices)
|
428
|
+
self.backup_pgs.append(ionum)
|
429
|
+
self.backup_bandwidth.append(ionum / (end_time - start_time) * self.gb_per_page)
|
430
|
+
|
391
431
|
written_keys_to_confirm = []
|
392
432
|
results = [index[0] for index in indices]
|
393
433
|
for batch_index, write_result in zip(batch_indices, write_results):
|
@@ -405,7 +445,7 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
405
445
|
self.rank, written_keys_to_confirm, pages_to_release
|
406
446
|
)
|
407
447
|
|
408
|
-
return
|
448
|
+
return results
|
409
449
|
|
410
450
|
def delete(self, key: str) -> None:
|
411
451
|
self.metadata_client.delete_keys(self.rank, [key])
|
@@ -415,21 +455,25 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
415
455
|
return result[0] if result else False
|
416
456
|
|
417
457
|
def batch_exists(self, keys: List[str]) -> int:
|
458
|
+
factor = 1
|
459
|
+
if self.is_zero_copy and not self.is_mla_model:
|
460
|
+
keys = self._get_mha_zero_copy_keys(keys)
|
461
|
+
factor = 2
|
462
|
+
|
418
463
|
results = self.metadata_client.exists(self.rank, keys)
|
419
|
-
for i in range(len(keys)):
|
420
|
-
if not results[i]:
|
421
|
-
return i
|
422
464
|
|
423
|
-
|
465
|
+
i = 0
|
466
|
+
while i < len(keys) and results[i]:
|
467
|
+
i += 1
|
424
468
|
|
425
|
-
|
469
|
+
return i // factor
|
470
|
+
|
471
|
+
def clear(self) -> None:
|
426
472
|
try:
|
427
473
|
self.metadata_client.clear(self.rank)
|
428
474
|
logger.info(f"Cleared HiCacheHF3FS for rank {self.rank}")
|
429
|
-
return True
|
430
475
|
except Exception as e:
|
431
476
|
logger.error(f"Failed to clear HiCacheHF3FS: {e}")
|
432
|
-
return False
|
433
477
|
|
434
478
|
def close(self) -> None:
|
435
479
|
try:
|
@@ -439,3 +483,156 @@ class HiCacheHF3FS(HiCacheStorage):
|
|
439
483
|
except Exception as e:
|
440
484
|
logger.error(f"close HiCacheHF3FS: {e}")
|
441
485
|
logger.info("close HiCacheHF3FS")
|
486
|
+
|
487
|
+
@synchronized()
|
488
|
+
def get_stats(self):
|
489
|
+
storage_metrics = StorageMetrics()
|
490
|
+
storage_metrics.prefetch_pgs.extend(self.prefetch_pgs)
|
491
|
+
storage_metrics.backup_pgs.extend(self.backup_pgs)
|
492
|
+
storage_metrics.prefetch_bandwidth.extend(self.prefetch_bandwidth)
|
493
|
+
storage_metrics.backup_bandwidth.extend(self.backup_bandwidth)
|
494
|
+
self.prefetch_pgs.clear()
|
495
|
+
self.backup_pgs.clear()
|
496
|
+
self.prefetch_bandwidth.clear()
|
497
|
+
self.backup_bandwidth.clear()
|
498
|
+
return storage_metrics
|
499
|
+
|
500
|
+
def register_mem_pool_host(self, mem_pool_host: HostKVCache):
|
501
|
+
super().register_mem_pool_host(mem_pool_host)
|
502
|
+
self.is_zero_copy = self.mem_pool_host.layout == "page_first"
|
503
|
+
logger.info(f"{self.is_zero_copy=}")
|
504
|
+
|
505
|
+
def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]:
|
506
|
+
_keys = []
|
507
|
+
for k in keys:
|
508
|
+
_keys.append(f"{k}-k")
|
509
|
+
_keys.append(f"{k}-v")
|
510
|
+
return _keys
|
511
|
+
|
512
|
+
def _get_mha_zero_copy_values(
|
513
|
+
self, values: List[torch.Tensor]
|
514
|
+
) -> List[torch.Tensor]:
|
515
|
+
_values = []
|
516
|
+
for value in values:
|
517
|
+
_values.append(value[0])
|
518
|
+
_values.append(value[1])
|
519
|
+
return _values
|
520
|
+
|
521
|
+
def _batch_get_preprocess(self, keys, host_indices):
|
522
|
+
page_num = len(host_indices) // self.mem_pool_host.page_size
|
523
|
+
# host_indices to kv_buffer
|
524
|
+
flat = not self.is_zero_copy
|
525
|
+
values = (
|
526
|
+
[
|
527
|
+
self.mem_pool_host.get_data_page(
|
528
|
+
host_indices[i * self.mem_pool_host.page_size], flat=flat
|
529
|
+
)
|
530
|
+
for i in range(page_num)
|
531
|
+
]
|
532
|
+
if self.is_zero_copy
|
533
|
+
else [
|
534
|
+
self.mem_pool_host.get_dummy_flat_data_page() for _ in range(page_num)
|
535
|
+
]
|
536
|
+
)
|
537
|
+
|
538
|
+
if self.is_zero_copy and not self.is_mla_model:
|
539
|
+
keys = self._get_mha_zero_copy_keys(keys)
|
540
|
+
values = self._get_mha_zero_copy_values(values)
|
541
|
+
|
542
|
+
return keys, values
|
543
|
+
|
544
|
+
def _batch_get_postprocess(self, host_indices, values, results):
|
545
|
+
page_num = len(host_indices) // self.mem_pool_host.page_size
|
546
|
+
|
547
|
+
if self.is_zero_copy:
|
548
|
+
if not self.is_mla_model:
|
549
|
+
results = [
|
550
|
+
(results[2 * i] and results[2 * i + 1]) for i in range(page_num)
|
551
|
+
]
|
552
|
+
results = results[:page_num]
|
553
|
+
return results
|
554
|
+
|
555
|
+
for i in range(page_num):
|
556
|
+
if not results[i]:
|
557
|
+
break
|
558
|
+
self.mem_pool_host.set_from_flat_data_page(
|
559
|
+
host_indices[i * self.mem_pool_host.page_size], values[i]
|
560
|
+
)
|
561
|
+
|
562
|
+
return results
|
563
|
+
|
564
|
+
def batch_get_v1(
|
565
|
+
self,
|
566
|
+
keys: List[str],
|
567
|
+
host_indices: torch.Tensor,
|
568
|
+
extra_info: Optional[HiCacheStorageExtraInfo] = None,
|
569
|
+
) -> List[bool]:
|
570
|
+
keys, values = self._batch_get_preprocess(keys, host_indices)
|
571
|
+
results = self._batch_get(keys, values)
|
572
|
+
return self._batch_get_postprocess(host_indices, values, results)
|
573
|
+
|
574
|
+
def _batch_set_preprocess(self, keys, host_indices):
|
575
|
+
page_num = len(host_indices) // self.mem_pool_host.page_size
|
576
|
+
# host_indices to kv_buffer
|
577
|
+
flat = not self.is_zero_copy
|
578
|
+
values = [
|
579
|
+
self.mem_pool_host.get_data_page(
|
580
|
+
host_indices[i * self.mem_pool_host.page_size], flat=flat
|
581
|
+
)
|
582
|
+
for i in range(page_num)
|
583
|
+
]
|
584
|
+
|
585
|
+
if self.is_zero_copy and not self.is_mla_model:
|
586
|
+
keys = self._get_mha_zero_copy_keys(keys)
|
587
|
+
values = self._get_mha_zero_copy_values(values)
|
588
|
+
|
589
|
+
return keys, values
|
590
|
+
|
591
|
+
def batch_set_v1(
|
592
|
+
self,
|
593
|
+
keys: List[str],
|
594
|
+
host_indices: torch.Tensor,
|
595
|
+
extra_info: Optional[HiCacheStorageExtraInfo] = None,
|
596
|
+
) -> List[bool]:
|
597
|
+
len_keys = len(keys)
|
598
|
+
keys, values = self._batch_set_preprocess(keys, host_indices)
|
599
|
+
results = self._batch_set(keys, values)
|
600
|
+
return results
|
601
|
+
|
602
|
+
# Deprecated
|
603
|
+
def get(
|
604
|
+
self,
|
605
|
+
key: str,
|
606
|
+
target_location: Optional[Any] = None,
|
607
|
+
target_sizes: Optional[Any] = None,
|
608
|
+
) -> torch.Tensor | None:
|
609
|
+
pass
|
610
|
+
|
611
|
+
# Deprecated
|
612
|
+
def batch_get(
|
613
|
+
self,
|
614
|
+
keys: List[str],
|
615
|
+
target_locations: Optional[Any] = None,
|
616
|
+
target_sizes: Optional[Any] = None,
|
617
|
+
) -> List[torch.Tensor | None] | int:
|
618
|
+
pass
|
619
|
+
|
620
|
+
# Deprecated
|
621
|
+
def set(
|
622
|
+
self,
|
623
|
+
key: str,
|
624
|
+
value: Optional[Any] = None,
|
625
|
+
target_location: Optional[Any] = None,
|
626
|
+
target_sizes: Optional[Any] = None,
|
627
|
+
) -> bool:
|
628
|
+
pass
|
629
|
+
|
630
|
+
# Deprecated
|
631
|
+
def batch_set(
|
632
|
+
self,
|
633
|
+
keys: List[str],
|
634
|
+
values: Optional[Any] = None,
|
635
|
+
target_locations: Optional[Any] = None,
|
636
|
+
target_sizes: Optional[Any] = None,
|
637
|
+
) -> bool:
|
638
|
+
pass
|