sglang 0.5.2rc2__py3-none-any.whl → 0.5.3rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_one_batch.py +7 -9
- sglang/bench_one_batch_server.py +330 -31
- sglang/bench_serving.py +267 -32
- sglang/global_config.py +2 -2
- sglang/lang/backend/runtime_endpoint.py +1 -1
- sglang/launch_server.py +14 -0
- sglang/profiler.py +2 -2
- sglang/srt/batch_invariant_ops/__init__.py +27 -0
- sglang/srt/batch_invariant_ops/batch_invariant_ops.py +549 -0
- sglang/srt/configs/__init__.py +8 -0
- sglang/srt/configs/device_config.py +3 -1
- sglang/srt/configs/dots_ocr.py +64 -0
- sglang/srt/configs/dots_vlm.py +139 -0
- sglang/srt/configs/falcon_h1.py +360 -0
- sglang/srt/configs/load_config.py +9 -0
- sglang/srt/configs/model_config.py +181 -82
- sglang/srt/configs/qwen3_next.py +326 -0
- sglang/srt/configs/qwen3_vl.py +586 -0
- sglang/srt/connector/__init__.py +8 -1
- sglang/srt/connector/remote_instance.py +82 -0
- sglang/srt/constrained/base_grammar_backend.py +49 -12
- sglang/srt/constrained/llguidance_backend.py +0 -1
- sglang/srt/constrained/outlines_backend.py +0 -1
- sglang/srt/constrained/outlines_jump_forward.py +1 -1
- sglang/srt/constrained/xgrammar_backend.py +30 -9
- sglang/srt/custom_op.py +11 -1
- sglang/srt/debug_utils/dump_comparator.py +81 -44
- sglang/srt/debug_utils/dump_loader.py +97 -0
- sglang/srt/debug_utils/dumper.py +21 -6
- sglang/srt/debug_utils/text_comparator.py +73 -11
- sglang/srt/disaggregation/ascend/conn.py +2 -2
- sglang/srt/disaggregation/ascend/transfer_engine.py +47 -9
- sglang/srt/disaggregation/base/conn.py +1 -1
- sglang/srt/disaggregation/common/conn.py +279 -108
- sglang/srt/disaggregation/decode.py +71 -19
- sglang/srt/disaggregation/decode_kvcache_offload_manager.py +185 -0
- sglang/srt/disaggregation/decode_schedule_batch_mixin.py +29 -17
- sglang/srt/disaggregation/fake/conn.py +1 -1
- sglang/srt/disaggregation/mini_lb.py +6 -445
- sglang/srt/disaggregation/mooncake/conn.py +55 -537
- sglang/srt/disaggregation/nixl/conn.py +326 -53
- sglang/srt/disaggregation/prefill.py +36 -17
- sglang/srt/disaggregation/utils.py +40 -54
- sglang/srt/distributed/device_communicators/all_reduce_utils.py +16 -0
- sglang/srt/distributed/device_communicators/shm_broadcast.py +4 -2
- sglang/srt/distributed/device_communicators/symm_mem.py +164 -0
- sglang/srt/distributed/parallel_state.py +156 -80
- sglang/srt/entrypoints/engine.py +59 -18
- sglang/srt/entrypoints/grpc_request_manager.py +855 -0
- sglang/srt/entrypoints/grpc_server.py +810 -0
- sglang/srt/entrypoints/http_server.py +130 -59
- sglang/srt/entrypoints/openai/protocol.py +112 -4
- sglang/srt/entrypoints/openai/serving_base.py +65 -3
- sglang/srt/entrypoints/openai/serving_chat.py +204 -55
- sglang/srt/entrypoints/openai/serving_completions.py +14 -3
- sglang/srt/entrypoints/openai/serving_embedding.py +9 -3
- sglang/srt/entrypoints/openai/serving_rerank.py +3 -1
- sglang/srt/entrypoints/openai/serving_responses.py +48 -3
- sglang/srt/entrypoints/openai/serving_score.py +1 -0
- sglang/srt/environ.py +285 -0
- sglang/srt/eplb/eplb_manager.py +2 -2
- sglang/srt/eplb/expert_distribution.py +26 -13
- sglang/srt/eplb/expert_location.py +38 -8
- sglang/srt/eplb/expert_location_updater.py +1 -1
- sglang/srt/function_call/base_format_detector.py +3 -6
- sglang/srt/function_call/ebnf_composer.py +11 -9
- sglang/srt/function_call/function_call_parser.py +9 -2
- sglang/srt/function_call/glm4_moe_detector.py +4 -4
- sglang/srt/function_call/gpt_oss_detector.py +23 -0
- sglang/srt/function_call/json_array_parser.py +63 -0
- sglang/srt/function_call/kimik2_detector.py +17 -4
- sglang/srt/function_call/qwen3_coder_detector.py +1 -1
- sglang/srt/function_call/utils.py +96 -5
- sglang/srt/grpc/__init__.py +1 -0
- sglang/srt/grpc/compile_proto.py +245 -0
- sglang/srt/grpc/sglang_scheduler_pb2.py +111 -0
- sglang/srt/grpc/sglang_scheduler_pb2.pyi +434 -0
- sglang/srt/grpc/sglang_scheduler_pb2_grpc.py +239 -0
- sglang/srt/layers/activation.py +143 -9
- sglang/srt/layers/attention/aiter_backend.py +14 -15
- sglang/srt/layers/attention/ascend_backend.py +115 -9
- sglang/srt/layers/attention/attention_registry.py +206 -0
- sglang/srt/layers/attention/base_attn_backend.py +12 -3
- sglang/srt/layers/attention/cutlass_mla_backend.py +3 -3
- sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +1 -1
- sglang/srt/layers/attention/fla/chunk.py +242 -0
- sglang/srt/layers/attention/fla/chunk_delta_h.py +314 -0
- sglang/srt/layers/attention/fla/chunk_o.py +178 -0
- sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py +151 -0
- sglang/srt/layers/attention/fla/cumsum.py +300 -0
- sglang/srt/layers/attention/fla/fused_recurrent.py +640 -0
- sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py +232 -0
- sglang/srt/layers/attention/fla/index.py +37 -0
- sglang/srt/layers/attention/fla/l2norm.py +150 -0
- sglang/srt/layers/attention/fla/layernorm_gated.py +326 -0
- sglang/srt/layers/attention/fla/op.py +66 -0
- sglang/srt/layers/attention/fla/solve_tril.py +465 -0
- sglang/srt/layers/attention/fla/utils.py +331 -0
- sglang/srt/layers/attention/fla/wy_fast.py +158 -0
- sglang/srt/layers/attention/flashattention_backend.py +41 -8
- sglang/srt/layers/attention/flashinfer_backend.py +118 -198
- sglang/srt/layers/attention/flashinfer_mla_backend.py +27 -27
- sglang/srt/layers/attention/flashmla_backend.py +7 -5
- sglang/srt/layers/attention/hybrid_attn_backend.py +68 -53
- sglang/srt/layers/attention/hybrid_linear_attn_backend.py +602 -0
- sglang/srt/layers/attention/intel_amx_backend.py +3 -0
- sglang/srt/layers/attention/mamba/causal_conv1d.py +129 -0
- sglang/srt/layers/attention/mamba/causal_conv1d_triton.py +969 -0
- sglang/srt/layers/attention/mamba/mamba.py +629 -0
- sglang/srt/layers/attention/mamba/mamba_utils.py +81 -0
- sglang/srt/layers/attention/mamba/ops/__init__.py +2 -0
- sglang/srt/layers/attention/mamba/ops/layernorm_gated.py +172 -0
- sglang/srt/layers/attention/mamba/ops/mamba_ssm.py +442 -0
- sglang/srt/layers/attention/mamba/ops/ssd_bmm.py +264 -0
- sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py +622 -0
- sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py +757 -0
- sglang/srt/layers/attention/mamba/ops/ssd_combined.py +262 -0
- sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py +275 -0
- sglang/srt/layers/attention/npu_ops/mla_preprocess.py +393 -0
- sglang/srt/layers/attention/nsa/dequant_k_cache.py +163 -0
- sglang/srt/layers/attention/nsa/index_buf_accessor.py +354 -0
- sglang/srt/layers/attention/nsa/nsa_indexer.py +761 -0
- sglang/srt/layers/attention/nsa/quant_k_cache.py +255 -0
- sglang/srt/layers/attention/nsa/tilelang_kernel.py +785 -0
- sglang/srt/layers/attention/nsa/transform_index.py +144 -0
- sglang/srt/layers/attention/nsa/utils.py +24 -0
- sglang/srt/layers/attention/nsa_backend.py +887 -0
- sglang/srt/layers/attention/tbo_backend.py +6 -6
- sglang/srt/layers/attention/torch_flex_backend.py +325 -0
- sglang/srt/layers/attention/torch_native_backend.py +12 -6
- sglang/srt/layers/attention/triton_backend.py +57 -7
- sglang/srt/layers/attention/trtllm_mha_backend.py +5 -7
- sglang/srt/layers/attention/trtllm_mla_backend.py +276 -39
- sglang/srt/layers/attention/vision.py +58 -0
- sglang/srt/layers/attention/wave_backend.py +4 -4
- sglang/srt/layers/attention/wave_ops/decode_attention.py +2 -4
- sglang/srt/layers/attention/wave_ops/extend_attention.py +1 -3
- sglang/srt/layers/communicator.py +8 -0
- sglang/srt/layers/dp_attention.py +41 -2
- sglang/srt/layers/elementwise.py +3 -1
- sglang/srt/layers/layernorm.py +34 -15
- sglang/srt/layers/linear.py +55 -7
- sglang/srt/layers/logits_processor.py +44 -12
- sglang/srt/layers/moe/__init__.py +2 -1
- sglang/srt/layers/moe/cutlass_w4a8_moe.py +3 -3
- sglang/srt/layers/moe/ep_moe/kernels.py +2 -2
- sglang/srt/layers/moe/ep_moe/layer.py +256 -63
- sglang/srt/layers/moe/flashinfer_cutedsl_moe.py +183 -0
- sglang/srt/layers/moe/fused_moe_native.py +5 -3
- sglang/srt/layers/moe/fused_moe_triton/configs/{triton_3_4_0/E=128,N=768,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_3_1/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json } +35 -35
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=128,N=352,device_name=NVIDIA_RTX_5880_Ada_Generation,dtype=fp8_w8a8.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=512,device_name=NVIDIA_H20.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_B200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=64,device_name=NVIDIA_H200.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +5 -2
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +7 -3
- sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_kernels.py +23 -20
- sglang/srt/layers/moe/fused_moe_triton/layer.py +71 -70
- sglang/srt/layers/moe/moe_runner/__init__.py +2 -1
- sglang/srt/layers/moe/moe_runner/base.py +274 -1
- sglang/srt/layers/moe/moe_runner/runner.py +80 -0
- sglang/srt/layers/moe/moe_runner/triton.py +448 -0
- sglang/srt/layers/moe/token_dispatcher/__init__.py +16 -4
- sglang/srt/layers/moe/token_dispatcher/{base_dispatcher.py → base.py} +67 -17
- sglang/srt/layers/moe/token_dispatcher/deepep.py +118 -56
- sglang/srt/layers/moe/token_dispatcher/standard.py +44 -2
- sglang/srt/layers/moe/topk.py +30 -9
- sglang/srt/layers/moe/utils.py +22 -6
- sglang/srt/layers/parameter.py +23 -6
- sglang/srt/layers/quantization/awq.py +19 -7
- sglang/srt/layers/quantization/base_config.py +11 -6
- sglang/srt/layers/quantization/blockwise_int8.py +38 -27
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +1 -0
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +50 -30
- sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +2 -0
- sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +13 -1
- sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +173 -0
- sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +2 -10
- sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py +27 -0
- sglang/srt/layers/quantization/fp8.py +78 -49
- sglang/srt/layers/quantization/fp8_utils.py +51 -32
- sglang/srt/layers/quantization/gptq.py +25 -17
- sglang/srt/layers/quantization/modelopt_quant.py +190 -55
- sglang/srt/layers/quantization/moe_wna16.py +21 -18
- sglang/srt/layers/quantization/mxfp4.py +74 -42
- sglang/srt/layers/quantization/quark/quark_moe.py +48 -30
- sglang/srt/layers/quantization/unquant.py +135 -47
- sglang/srt/layers/quantization/w4afp8.py +26 -17
- sglang/srt/layers/quantization/w8a8_fp8.py +35 -20
- sglang/srt/layers/quantization/w8a8_int8.py +91 -41
- sglang/srt/layers/rotary_embedding.py +78 -31
- sglang/srt/layers/sampler.py +213 -21
- sglang/srt/layers/utils.py +23 -0
- sglang/srt/lora/backend/base_backend.py +50 -8
- sglang/srt/lora/backend/chunked_backend.py +348 -0
- sglang/srt/lora/backend/triton_backend.py +99 -5
- sglang/srt/lora/layers.py +32 -0
- sglang/srt/lora/lora.py +8 -3
- sglang/srt/lora/lora_manager.py +52 -118
- sglang/srt/lora/mem_pool.py +25 -11
- sglang/srt/lora/triton_ops/__init__.py +4 -0
- sglang/srt/lora/triton_ops/chunked_sgmv_expand.py +214 -0
- sglang/srt/lora/triton_ops/chunked_sgmv_shrink.py +174 -0
- sglang/srt/lora/utils.py +22 -11
- sglang/srt/managers/async_dynamic_batch_tokenizer.py +170 -0
- sglang/srt/managers/cache_controller.py +199 -301
- sglang/srt/managers/data_parallel_controller.py +115 -80
- sglang/srt/managers/detokenizer_manager.py +19 -15
- sglang/srt/managers/disagg_service.py +46 -0
- sglang/srt/managers/io_struct.py +340 -109
- sglang/srt/managers/mm_utils.py +44 -6
- sglang/srt/managers/multi_tokenizer_mixin.py +357 -407
- sglang/srt/managers/multimodal_processor.py +1 -2
- sglang/srt/managers/overlap_utils.py +53 -0
- sglang/srt/managers/schedule_batch.py +240 -138
- sglang/srt/managers/schedule_policy.py +144 -17
- sglang/srt/managers/scheduler.py +502 -209
- sglang/srt/managers/scheduler_input_blocker.py +1 -1
- sglang/srt/managers/scheduler_metrics_mixin.py +99 -126
- sglang/srt/managers/scheduler_output_processor_mixin.py +75 -22
- sglang/srt/managers/scheduler_profiler_mixin.py +6 -6
- sglang/srt/managers/scheduler_update_weights_mixin.py +7 -0
- sglang/srt/managers/tokenizer_communicator_mixin.py +675 -0
- sglang/srt/managers/tokenizer_manager.py +320 -632
- sglang/srt/managers/tp_worker.py +81 -22
- sglang/srt/managers/tp_worker_overlap_thread.py +71 -56
- sglang/srt/managers/utils.py +1 -45
- sglang/srt/mem_cache/allocator.py +14 -20
- sglang/srt/mem_cache/allocator_ascend.py +41 -27
- sglang/srt/mem_cache/base_prefix_cache.py +1 -1
- sglang/srt/mem_cache/chunk_cache.py +8 -1
- sglang/srt/mem_cache/evict_policy.py +23 -0
- sglang/srt/mem_cache/hicache_storage.py +43 -24
- sglang/srt/mem_cache/hiradix_cache.py +222 -75
- sglang/srt/mem_cache/memory_pool.py +535 -58
- sglang/srt/mem_cache/memory_pool_host.py +239 -228
- sglang/srt/mem_cache/radix_cache.py +222 -73
- sglang/srt/mem_cache/radix_cache_cpp.py +11 -8
- sglang/srt/mem_cache/storage/__init__.py +10 -0
- sglang/srt/mem_cache/storage/aibrix_kvcache/aibrix_kvcache_storage.py +151 -0
- sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py +109 -0
- sglang/srt/mem_cache/storage/backend_factory.py +223 -0
- sglang/srt/mem_cache/storage/eic/eic_storage.py +778 -0
- sglang/srt/mem_cache/storage/eic/test_unit.py +115 -0
- sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py +164 -0
- sglang/srt/mem_cache/storage/hf3fs/{client_hf3fs.py → hf3fs_usrbio_client.py} +5 -1
- sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +259 -62
- sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py +284 -0
- sglang/srt/mem_cache/storage/lmcache/unit_test.py +121 -0
- sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +166 -17
- sglang/srt/mem_cache/swa_radix_cache.py +25 -36
- sglang/srt/metrics/collector.py +511 -132
- sglang/srt/metrics/func_timer.py +2 -7
- sglang/srt/metrics/startup_func_log_and_timer.py +150 -0
- sglang/srt/metrics/utils.py +8 -1
- sglang/srt/model_executor/cpu_graph_runner.py +640 -0
- sglang/srt/model_executor/cuda_graph_runner.py +52 -37
- sglang/srt/model_executor/forward_batch_info.py +82 -40
- sglang/srt/model_executor/model_runner.py +432 -157
- sglang/srt/model_executor/npu_graph_runner.py +12 -5
- sglang/srt/model_loader/__init__.py +9 -3
- sglang/srt/model_loader/loader.py +133 -5
- sglang/srt/model_loader/remote_instance_weight_loader_utils.py +69 -0
- sglang/srt/model_loader/weight_utils.py +158 -3
- sglang/srt/models/apertus.py +686 -0
- sglang/srt/models/bailing_moe.py +820 -217
- sglang/srt/models/bailing_moe_nextn.py +168 -0
- sglang/srt/models/deepseek_nextn.py +6 -1
- sglang/srt/models/deepseek_v2.py +607 -130
- sglang/srt/models/dots_ocr.py +173 -0
- sglang/srt/models/dots_vlm.py +174 -0
- sglang/srt/models/dots_vlm_vit.py +337 -0
- sglang/srt/models/ernie4.py +1 -1
- sglang/srt/models/falcon_h1.py +576 -0
- sglang/srt/models/gemma3_causal.py +0 -2
- sglang/srt/models/gemma3_mm.py +1 -1
- sglang/srt/models/gemma3n_mm.py +2 -2
- sglang/srt/models/glm4_moe.py +4 -4
- sglang/srt/models/glm4_moe_nextn.py +2 -2
- sglang/srt/models/glm4v.py +5 -3
- sglang/srt/models/glm4v_moe.py +4 -1
- sglang/srt/models/gpt_oss.py +8 -31
- sglang/srt/models/kimi_vl_moonvit.py +2 -2
- sglang/srt/models/llama.py +4 -0
- sglang/srt/models/llama4.py +9 -0
- sglang/srt/models/llama_eagle3.py +13 -0
- sglang/srt/models/longcat_flash.py +3 -3
- sglang/srt/models/longcat_flash_nextn.py +1 -1
- sglang/srt/models/mllama4.py +40 -4
- sglang/srt/models/opt.py +637 -0
- sglang/srt/models/qwen2_5_vl.py +29 -5
- sglang/srt/models/qwen2_audio.py +1 -1
- sglang/srt/models/qwen2_moe.py +120 -13
- sglang/srt/models/qwen2_vl.py +1 -1
- sglang/srt/models/qwen3.py +18 -3
- sglang/srt/models/qwen3_moe.py +32 -4
- sglang/srt/models/qwen3_next.py +1069 -0
- sglang/srt/models/qwen3_next_mtp.py +112 -0
- sglang/srt/models/qwen3_vl.py +787 -0
- sglang/srt/models/qwen3_vl_moe.py +471 -0
- sglang/srt/models/registry.py +15 -3
- sglang/srt/models/sarashina2_vision.py +269 -0
- sglang/srt/models/solar.py +505 -0
- sglang/srt/models/starcoder2.py +357 -0
- sglang/srt/models/step3_vl.py +1 -1
- sglang/srt/models/torch_native_llama.py +9 -2
- sglang/srt/models/utils.py +51 -0
- sglang/srt/multimodal/processors/base_processor.py +15 -7
- sglang/srt/multimodal/processors/dots_vlm.py +98 -0
- sglang/srt/multimodal/processors/glm4v.py +9 -9
- sglang/srt/multimodal/processors/internvl.py +153 -129
- sglang/srt/multimodal/processors/qwen_vl.py +23 -6
- sglang/srt/multimodal/processors/sarashina2_vision.py +81 -0
- sglang/srt/offloader.py +27 -3
- sglang/srt/parser/jinja_template_utils.py +6 -0
- sglang/srt/sampling/sampling_batch_info.py +38 -17
- sglang/srt/sampling/sampling_params.py +7 -0
- sglang/srt/server_args.py +966 -267
- sglang/srt/server_args_config_parser.py +146 -0
- sglang/srt/single_batch_overlap.py +151 -0
- sglang/srt/speculative/cpp_ngram/ngram.cpp +374 -0
- sglang/srt/speculative/cpp_ngram/ngram.h +110 -0
- sglang/srt/speculative/cpp_ngram/ngram_cache.py +138 -0
- sglang/srt/speculative/cpp_ngram/ngram_cache_binding.cpp +43 -0
- sglang/srt/speculative/cpp_ngram/param.h +125 -0
- sglang/srt/speculative/cpp_ngram/queue.h +71 -0
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +7 -1
- sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +13 -2
- sglang/srt/speculative/{eagle_utils.py → eagle_info.py} +207 -757
- sglang/srt/speculative/eagle_worker.py +99 -28
- sglang/srt/speculative/ngram_utils.py +428 -0
- sglang/srt/speculative/ngram_worker.py +245 -0
- sglang/srt/speculative/spec_info.py +52 -0
- sglang/srt/speculative/spec_utils.py +606 -0
- sglang/srt/speculative/standalone_worker.py +109 -0
- sglang/srt/torch_memory_saver_adapter.py +5 -7
- sglang/srt/tracing/trace.py +578 -0
- sglang/srt/two_batch_overlap.py +8 -5
- sglang/srt/utils/__init__.py +2 -0
- sglang/srt/{utils.py → utils/common.py} +433 -77
- sglang/srt/{hf_transformers_utils.py → utils/hf_transformers_utils.py} +53 -5
- sglang/srt/{patch_torch.py → utils/patch_torch.py} +8 -0
- sglang/srt/utils/rpd_utils.py +452 -0
- sglang/srt/utils/slow_rank_detector.py +71 -0
- sglang/srt/warmup.py +8 -4
- sglang/srt/weight_sync/utils.py +2 -2
- sglang/test/attention/test_trtllm_mla_backend.py +169 -5
- sglang/test/get_logits_ut.py +57 -0
- sglang/test/run_eval.py +79 -11
- sglang/test/runners.py +5 -1
- sglang/test/simple_eval_common.py +5 -2
- sglang/test/simple_eval_mmmu_vlm.py +441 -0
- sglang/test/test_block_fp8.py +2 -2
- sglang/test/test_cutlass_moe.py +24 -6
- sglang/test/test_deterministic.py +297 -0
- sglang/test/test_disaggregation_utils.py +77 -0
- sglang/test/test_fp4_moe.py +370 -1
- sglang/test/test_programs.py +1 -1
- sglang/test/test_utils.py +383 -5
- sglang/utils.py +21 -1
- sglang/version.py +1 -1
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/METADATA +69 -124
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/RECORD +375 -245
- sglang/srt/disaggregation/launch_lb.py +0 -118
- sglang/srt/mem_cache/lora_radix_cache.py +0 -421
- /sglang/srt/{poll_based_barrier.py → utils/poll_based_barrier.py} +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/WHEEL +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.5.2rc2.dist-info → sglang-0.5.3rc2.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,7 @@ def parse_arguments(json_value):
|
|
39
39
|
|
40
40
|
class Glm4MoeDetector(BaseFormatDetector):
|
41
41
|
"""
|
42
|
-
Detector for GLM-4.5 models.
|
42
|
+
Detector for GLM-4.5 and GLM-4.6 models.
|
43
43
|
Assumes function call format:
|
44
44
|
<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
|
45
45
|
"""
|
@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector):
|
|
53
53
|
self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
|
54
54
|
|
55
55
|
def has_tool_call(self, text: str) -> bool:
|
56
|
-
"""Check if the text contains a glm-4.5 format tool call."""
|
56
|
+
"""Check if the text contains a glm-4.5 / glm-4.6 format tool call."""
|
57
57
|
return self.bot_token in text
|
58
58
|
|
59
59
|
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector):
|
|
102
102
|
self, new_text: str, tools: List[Tool]
|
103
103
|
) -> StreamingParseResult:
|
104
104
|
"""
|
105
|
-
Streaming incremental parsing tool calls for GLM-4.5 format.
|
105
|
+
Streaming incremental parsing tool calls for GLM-4.5 and GLM-4.6 format.
|
106
106
|
"""
|
107
107
|
self._buffer += new_text
|
108
108
|
current_text = self._buffer
|
@@ -160,5 +160,5 @@ class Glm4MoeDetector(BaseFormatDetector):
|
|
160
160
|
function_format="xml",
|
161
161
|
call_rule_fmt='"{name}" "\\n" ( {arguments_rule} "\\n" )?',
|
162
162
|
key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"',
|
163
|
-
key_value_separator="\\n",
|
163
|
+
key_value_separator='"\\n"',
|
164
164
|
)
|
@@ -81,6 +81,29 @@ class GptOssDetector(BaseFormatDetector):
|
|
81
81
|
# Always use HarmonyParser for parsing to ensure proper filtering
|
82
82
|
events = self.harmony_parser.parse(new_text)
|
83
83
|
|
84
|
+
# If there are no parsed events and the chunk contains no Harmony structural
|
85
|
+
# markers, treat it as plain text and pass it through. This fixes a bug where
|
86
|
+
# normal content was held in the buffer when tools were provided but not used.
|
87
|
+
if not events:
|
88
|
+
has_harmony_markers = any(
|
89
|
+
marker in self._buffer
|
90
|
+
for marker in (
|
91
|
+
"<|start|>",
|
92
|
+
"<|channel|>",
|
93
|
+
"<|message|>",
|
94
|
+
"<|constrain|>",
|
95
|
+
"<|end|>",
|
96
|
+
"<|call|>",
|
97
|
+
"<|return|>",
|
98
|
+
"assistantfinal",
|
99
|
+
)
|
100
|
+
)
|
101
|
+
if not has_harmony_markers:
|
102
|
+
# Plain text with no tool markers — emit as normal content
|
103
|
+
out = self._buffer
|
104
|
+
self._buffer = ""
|
105
|
+
return StreamingParseResult(normal_text=out, calls=[])
|
106
|
+
|
84
107
|
# Quick check if we might have tool calls
|
85
108
|
if (
|
86
109
|
"<|channel|>commentary to=" not in self._buffer
|
@@ -0,0 +1,63 @@
|
|
1
|
+
import json
|
2
|
+
import re
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from sglang.srt.entrypoints.openai.protocol import Tool
|
6
|
+
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
|
7
|
+
from sglang.srt.function_call.core_types import StreamingParseResult
|
8
|
+
|
9
|
+
|
10
|
+
class JsonArrayParser(BaseFormatDetector):
|
11
|
+
"""
|
12
|
+
Parser for JSON array tool calls when JSON schema constraints are active.
|
13
|
+
|
14
|
+
This parser is used when tool_choice="required" or a specific tool is named,
|
15
|
+
bypassing model-specific parsers in favor of direct JSON array parsing.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self):
|
19
|
+
super().__init__()
|
20
|
+
# Configure for JSON array parsing
|
21
|
+
self.bot_token = "["
|
22
|
+
self.eot_token = "]"
|
23
|
+
self.tool_call_separator = ","
|
24
|
+
|
25
|
+
def has_tool_call(self, text: str) -> bool:
|
26
|
+
"""
|
27
|
+
Check if the given text contains a JSON tool call (array or single object).
|
28
|
+
"""
|
29
|
+
return "[" in text or "{" in text
|
30
|
+
|
31
|
+
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
32
|
+
"""
|
33
|
+
Parse JSON tool calls using the base class implementation.
|
34
|
+
"""
|
35
|
+
raise NotImplementedError(
|
36
|
+
"Detect and parse not supported for JSON schema constraints."
|
37
|
+
)
|
38
|
+
|
39
|
+
def build_ebnf(self, tools: List[Tool]) -> str:
|
40
|
+
"""
|
41
|
+
Build an EBNF grammar for constrained generation.
|
42
|
+
This is not used for JSON schema constraints as they are handled
|
43
|
+
by the constraint backends directly.
|
44
|
+
"""
|
45
|
+
raise NotImplementedError(
|
46
|
+
"EBNF generation is not supported for JSON schema constraints."
|
47
|
+
)
|
48
|
+
|
49
|
+
def parse_streaming_increment(
|
50
|
+
self, new_text: str, tools: List[Tool]
|
51
|
+
) -> StreamingParseResult:
|
52
|
+
"""
|
53
|
+
Streaming incremental parsing with tool validation.
|
54
|
+
"""
|
55
|
+
return super().parse_streaming_increment(new_text, tools)
|
56
|
+
|
57
|
+
def structure_info(self) -> callable:
|
58
|
+
"""
|
59
|
+
Return a function that creates StructureInfo for constrained generation.
|
60
|
+
This is not used for JSON schema constraints as they are handled
|
61
|
+
by the constraint backends directly.
|
62
|
+
"""
|
63
|
+
raise NotImplementedError("structure_info not used for JSON schema constraints")
|
@@ -50,6 +50,11 @@ class KimiK2Detector(BaseFormatDetector):
|
|
50
50
|
|
51
51
|
self._last_arguments = ""
|
52
52
|
|
53
|
+
# Robust parser for ids like "functions.search:0" or fallback "search:0"
|
54
|
+
self.tool_call_id_regex = re.compile(
|
55
|
+
r"^(?:functions\.)?(?P<name>[\w\.]+):(?P<index>\d+)$"
|
56
|
+
)
|
57
|
+
|
53
58
|
def has_tool_call(self, text: str) -> bool:
|
54
59
|
"""Check if the text contains a KimiK2 format tool call."""
|
55
60
|
return self.bot_token in text
|
@@ -76,14 +81,18 @@ class KimiK2Detector(BaseFormatDetector):
|
|
76
81
|
tool_calls = []
|
77
82
|
for match in function_call_tuples:
|
78
83
|
function_id, function_args = match
|
79
|
-
|
80
|
-
|
84
|
+
m = self.tool_call_id_regex.match(function_id)
|
85
|
+
if not m:
|
86
|
+
logger.warning("Unexpected tool_call_id format: %s", function_id)
|
87
|
+
continue
|
88
|
+
function_name = m.group("name")
|
89
|
+
function_idx = int(m.group("index"))
|
81
90
|
|
82
91
|
logger.info(f"function_name {function_name}")
|
83
92
|
|
84
93
|
tool_calls.append(
|
85
94
|
ToolCallItem(
|
86
|
-
tool_index=function_idx,
|
95
|
+
tool_index=function_idx,
|
87
96
|
name=function_name,
|
88
97
|
parameters=function_args,
|
89
98
|
)
|
@@ -128,7 +137,11 @@ class KimiK2Detector(BaseFormatDetector):
|
|
128
137
|
function_id = match.group("tool_call_id")
|
129
138
|
function_args = match.group("function_arguments")
|
130
139
|
|
131
|
-
|
140
|
+
m = self.tool_call_id_regex.match(function_id)
|
141
|
+
if not m:
|
142
|
+
logger.warning("Unexpected tool_call_id format: %s", function_id)
|
143
|
+
return StreamingParseResult(normal_text="", calls=calls)
|
144
|
+
function_name = m.group("name")
|
132
145
|
|
133
146
|
# Initialize state if this is the first tool call
|
134
147
|
if self.current_tool_id == -1:
|
@@ -358,5 +358,5 @@ class Qwen3CoderDetector(BaseFormatDetector):
|
|
358
358
|
function_format="xml",
|
359
359
|
call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',
|
360
360
|
key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
|
361
|
-
key_value_separator="\\n",
|
361
|
+
key_value_separator='"\\n"',
|
362
362
|
)
|
@@ -1,10 +1,13 @@
|
|
1
1
|
import json
|
2
2
|
from json import JSONDecodeError, JSONDecoder
|
3
|
-
from
|
3
|
+
from json.decoder import WHITESPACE
|
4
|
+
from typing import Any, List, Literal, Optional, Tuple, Union
|
4
5
|
|
5
6
|
import partial_json_parser
|
6
7
|
from partial_json_parser.core.options import Allow
|
7
8
|
|
9
|
+
from sglang.srt.entrypoints.openai.protocol import Tool, ToolChoice
|
10
|
+
|
8
11
|
|
9
12
|
def _find_common_prefix(s1: str, s2: str) -> str:
|
10
13
|
prefix = ""
|
@@ -37,10 +40,12 @@ def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
|
|
37
40
|
"""
|
38
41
|
try:
|
39
42
|
return (partial_json_parser.loads(input_str, flags), len(input_str))
|
40
|
-
except JSONDecodeError as e:
|
41
|
-
|
42
|
-
|
43
|
-
|
43
|
+
except (JSONDecodeError, IndexError) as e:
|
44
|
+
msg = getattr(e, "msg", str(e))
|
45
|
+
if "Extra data" in msg or "pop from empty list" in msg:
|
46
|
+
start = WHITESPACE.match(input_str, 0).end()
|
47
|
+
obj, end = JSONDecoder().raw_decode(input_str, start)
|
48
|
+
return obj, end
|
44
49
|
raise
|
45
50
|
|
46
51
|
|
@@ -50,3 +55,89 @@ def _is_complete_json(input_str: str) -> bool:
|
|
50
55
|
return True
|
51
56
|
except JSONDecodeError:
|
52
57
|
return False
|
58
|
+
|
59
|
+
|
60
|
+
def _get_tool_schema_defs(tools: List[Tool]) -> dict:
|
61
|
+
"""
|
62
|
+
Get consolidated $defs from all tools, validating for conflicts.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
tools: List of tools to process
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
Dictionary of consolidated $defs from all tools
|
69
|
+
|
70
|
+
Raises:
|
71
|
+
ValueError: If conflicting $defs are found
|
72
|
+
"""
|
73
|
+
all_defs = {}
|
74
|
+
for tool in tools:
|
75
|
+
if tool.function.parameters is None:
|
76
|
+
continue
|
77
|
+
defs = tool.function.parameters.get("$defs", {})
|
78
|
+
for def_name, def_schema in defs.items():
|
79
|
+
if def_name in all_defs and all_defs[def_name] != def_schema:
|
80
|
+
raise ValueError(
|
81
|
+
f"Tool definition '{def_name}' has "
|
82
|
+
"multiple schemas, which is not "
|
83
|
+
"supported."
|
84
|
+
)
|
85
|
+
else:
|
86
|
+
all_defs[def_name] = def_schema
|
87
|
+
return all_defs
|
88
|
+
|
89
|
+
|
90
|
+
def _get_tool_schema(tool: Tool) -> dict:
|
91
|
+
return {
|
92
|
+
"properties": {
|
93
|
+
"name": {"type": "string", "enum": [tool.function.name]},
|
94
|
+
"parameters": (
|
95
|
+
tool.function.parameters
|
96
|
+
if tool.function.parameters
|
97
|
+
else {"type": "object", "properties": {}}
|
98
|
+
),
|
99
|
+
},
|
100
|
+
"required": ["name", "parameters"],
|
101
|
+
}
|
102
|
+
|
103
|
+
|
104
|
+
def get_json_schema_constraint(
|
105
|
+
tools: List[Tool], tool_choice: Union[ToolChoice, Literal["required"]]
|
106
|
+
) -> Optional[dict]:
|
107
|
+
"""
|
108
|
+
Get the JSON schema constraint for the specified tool choice.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
tool_choice: The tool choice specification
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
JSON schema dict, or None if no valid tools found
|
115
|
+
"""
|
116
|
+
|
117
|
+
if isinstance(tool_choice, ToolChoice):
|
118
|
+
# For specific function choice, return the user's parameters schema directly
|
119
|
+
fn_name = tool_choice.function.name
|
120
|
+
for tool in tools:
|
121
|
+
if tool.function.name == fn_name:
|
122
|
+
return {
|
123
|
+
"type": "array",
|
124
|
+
"minItems": 1,
|
125
|
+
"maxItems": 1,
|
126
|
+
"items": _get_tool_schema(tool),
|
127
|
+
}
|
128
|
+
return None
|
129
|
+
elif tool_choice == "required":
|
130
|
+
json_schema = {
|
131
|
+
"type": "array",
|
132
|
+
"minItems": 1,
|
133
|
+
"items": {
|
134
|
+
"type": "object",
|
135
|
+
"anyOf": [_get_tool_schema(tool) for tool in tools],
|
136
|
+
},
|
137
|
+
}
|
138
|
+
json_schema_defs = _get_tool_schema_defs(tools)
|
139
|
+
if json_schema_defs:
|
140
|
+
json_schema["$defs"] = json_schema_defs
|
141
|
+
return json_schema
|
142
|
+
|
143
|
+
return None
|
@@ -0,0 +1 @@
|
|
1
|
+
# SGLang gRPC module
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Compile protobuf files for SGLang gRPC server.
|
4
|
+
|
5
|
+
This script compiles .proto files to Python code using grpc_tools.protoc.
|
6
|
+
It generates:
|
7
|
+
- *_pb2.py (protobuf message classes)
|
8
|
+
- *_pb2_grpc.py (gRPC service classes)
|
9
|
+
- *_pb2.pyi (type hints for mypy/IDEs)
|
10
|
+
|
11
|
+
Usage:
|
12
|
+
python compile_proto.py [--check] [--proto-file PROTO_FILE]
|
13
|
+
|
14
|
+
Options:
|
15
|
+
--check Check if regeneration is needed (exit 1 if needed)
|
16
|
+
--proto-file Specify proto file (default: sglang_scheduler.proto)
|
17
|
+
|
18
|
+
### Install Dependencies
|
19
|
+
pip install "grpcio==1.74.0" "grpcio-tools==1.74.0"
|
20
|
+
|
21
|
+
### Run Script
|
22
|
+
cd python/sglang/srt/grpc
|
23
|
+
python compile_proto.py
|
24
|
+
"""
|
25
|
+
|
26
|
+
|
27
|
+
import argparse
|
28
|
+
import subprocess
|
29
|
+
import sys
|
30
|
+
from importlib.metadata import version
|
31
|
+
from pathlib import Path
|
32
|
+
|
33
|
+
GRPC_VERSION = "1.74.0"
|
34
|
+
|
35
|
+
|
36
|
+
def get_file_mtime(path: Path) -> float:
|
37
|
+
"""Get file modification time, return 0 if file doesn't exist."""
|
38
|
+
try:
|
39
|
+
return path.stat().st_mtime
|
40
|
+
except FileNotFoundError:
|
41
|
+
return 0.0
|
42
|
+
|
43
|
+
|
44
|
+
def check_regeneration_needed(proto_file: Path, output_dir: Path) -> bool:
|
45
|
+
"""Check if proto files are newer than generated files."""
|
46
|
+
proto_mtime = get_file_mtime(proto_file)
|
47
|
+
|
48
|
+
generated_files = [
|
49
|
+
output_dir / f"{proto_file.stem}_pb2.py",
|
50
|
+
output_dir / f"{proto_file.stem}_pb2_grpc.py",
|
51
|
+
output_dir / f"{proto_file.stem}_pb2.pyi",
|
52
|
+
]
|
53
|
+
|
54
|
+
for gen_file in generated_files:
|
55
|
+
if get_file_mtime(gen_file) < proto_mtime:
|
56
|
+
return True
|
57
|
+
|
58
|
+
return False
|
59
|
+
|
60
|
+
|
61
|
+
def compile_proto(proto_file: Path, output_dir: Path, verbose: bool = True) -> bool:
|
62
|
+
"""Compile the protobuf file to Python."""
|
63
|
+
|
64
|
+
if not proto_file.exists():
|
65
|
+
print(f"Error: Proto file not found: {proto_file}")
|
66
|
+
return False
|
67
|
+
|
68
|
+
if verbose:
|
69
|
+
print(f"Found proto file: {proto_file}")
|
70
|
+
|
71
|
+
# Check if grpc_tools is available
|
72
|
+
try:
|
73
|
+
import grpc_tools.protoc
|
74
|
+
except ImportError:
|
75
|
+
print("Error: grpcio-tools not installed")
|
76
|
+
print(
|
77
|
+
f'Install with: pip install "grpcio-tools=={GRPC_VERSION}" "grpcio=={GRPC_VERSION}"'
|
78
|
+
)
|
79
|
+
return False
|
80
|
+
|
81
|
+
grpc_tools_version = version("grpcio-tools")
|
82
|
+
grpc_version = version("grpcio")
|
83
|
+
if grpc_tools_version != GRPC_VERSION or grpc_version != GRPC_VERSION:
|
84
|
+
raise RuntimeError(
|
85
|
+
f"Error: grpcio-tools version {grpc_tools_version} and grpcio version {grpc_version} detected, but {GRPC_VERSION} is required."
|
86
|
+
)
|
87
|
+
|
88
|
+
# Compile command
|
89
|
+
cmd = [
|
90
|
+
sys.executable,
|
91
|
+
"-m",
|
92
|
+
"grpc_tools.protoc",
|
93
|
+
f"-I{proto_file.parent}",
|
94
|
+
f"--python_out={output_dir}",
|
95
|
+
f"--grpc_python_out={output_dir}",
|
96
|
+
f"--pyi_out={output_dir}", # Generate type stubs
|
97
|
+
str(proto_file.name),
|
98
|
+
]
|
99
|
+
|
100
|
+
if verbose:
|
101
|
+
print(f"Running: {' '.join(cmd)}")
|
102
|
+
|
103
|
+
# Run protoc
|
104
|
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=proto_file.parent)
|
105
|
+
|
106
|
+
if result.returncode != 0:
|
107
|
+
print(f"Error compiling proto:")
|
108
|
+
print(result.stderr)
|
109
|
+
if result.stdout:
|
110
|
+
print(result.stdout)
|
111
|
+
return False
|
112
|
+
|
113
|
+
# Verify generated files exist
|
114
|
+
generated_files = [
|
115
|
+
f"{proto_file.stem}_pb2.py",
|
116
|
+
f"{proto_file.stem}_pb2_grpc.py",
|
117
|
+
f"{proto_file.stem}_pb2.pyi",
|
118
|
+
]
|
119
|
+
|
120
|
+
missing_files = []
|
121
|
+
for gen_file in generated_files:
|
122
|
+
if not (output_dir / gen_file).exists():
|
123
|
+
missing_files.append(gen_file)
|
124
|
+
|
125
|
+
if missing_files:
|
126
|
+
print(f"Error: Expected generated files not found: {missing_files}")
|
127
|
+
return False
|
128
|
+
|
129
|
+
if verbose:
|
130
|
+
print("Successfully compiled protobuf files:")
|
131
|
+
for gen_file in generated_files:
|
132
|
+
print(f" - {output_dir}/{gen_file}")
|
133
|
+
|
134
|
+
# Fix imports in generated files
|
135
|
+
fix_imports(output_dir, proto_file.stem, verbose)
|
136
|
+
|
137
|
+
return True
|
138
|
+
|
139
|
+
|
140
|
+
def fix_imports(output_dir: Path, proto_stem: str, verbose: bool = True) -> None:
|
141
|
+
"""Fix imports in generated files to use relative imports."""
|
142
|
+
grpc_file = output_dir / f"{proto_stem}_pb2_grpc.py"
|
143
|
+
|
144
|
+
if grpc_file.exists():
|
145
|
+
content = grpc_file.read_text()
|
146
|
+
# Change absolute import to relative import
|
147
|
+
old_import = f"import {proto_stem}_pb2"
|
148
|
+
new_import = f"from . import {proto_stem}_pb2"
|
149
|
+
|
150
|
+
if old_import in content:
|
151
|
+
content = content.replace(old_import, new_import)
|
152
|
+
grpc_file.write_text(content)
|
153
|
+
if verbose:
|
154
|
+
print("Fixed imports in generated files")
|
155
|
+
|
156
|
+
|
157
|
+
def add_generation_header(output_dir: Path, proto_stem: str) -> None:
|
158
|
+
"""Add header to generated files indicating they are auto-generated."""
|
159
|
+
header = """# This file is auto-generated. Do not edit manually.
|
160
|
+
# Regenerate with: python compile_proto.py
|
161
|
+
|
162
|
+
"""
|
163
|
+
|
164
|
+
files_to_update = [f"{proto_stem}_pb2.py", f"{proto_stem}_pb2_grpc.py"]
|
165
|
+
|
166
|
+
for filename in files_to_update:
|
167
|
+
file_path = output_dir / filename
|
168
|
+
if file_path.exists():
|
169
|
+
content = file_path.read_text()
|
170
|
+
if not content.startswith("# This file is auto-generated"):
|
171
|
+
file_path.write_text(header + content)
|
172
|
+
|
173
|
+
|
174
|
+
def main():
|
175
|
+
"""Main entry point."""
|
176
|
+
parser = argparse.ArgumentParser(
|
177
|
+
description="Compile protobuf files for SGLang gRPC server",
|
178
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
179
|
+
epilog=__doc__,
|
180
|
+
)
|
181
|
+
|
182
|
+
parser.add_argument(
|
183
|
+
"--check",
|
184
|
+
action="store_true",
|
185
|
+
help="Check if regeneration is needed (exit 1 if needed)",
|
186
|
+
)
|
187
|
+
|
188
|
+
parser.add_argument(
|
189
|
+
"--proto-file",
|
190
|
+
type=str,
|
191
|
+
default="sglang_scheduler.proto",
|
192
|
+
help="Proto file to compile (default: sglang_scheduler.proto)",
|
193
|
+
)
|
194
|
+
|
195
|
+
parser.add_argument(
|
196
|
+
"-v",
|
197
|
+
"--verbose",
|
198
|
+
action="store_true",
|
199
|
+
default=True,
|
200
|
+
help="Verbose output (default: True)",
|
201
|
+
)
|
202
|
+
|
203
|
+
parser.add_argument(
|
204
|
+
"-q", "--quiet", action="store_true", help="Quiet mode (overrides verbose)"
|
205
|
+
)
|
206
|
+
|
207
|
+
args = parser.parse_args()
|
208
|
+
|
209
|
+
# Handle verbosity
|
210
|
+
verbose = args.verbose and not args.quiet
|
211
|
+
|
212
|
+
# Get paths
|
213
|
+
script_dir = Path(__file__).parent
|
214
|
+
proto_file = script_dir / args.proto_file
|
215
|
+
output_dir = script_dir
|
216
|
+
|
217
|
+
# Check mode
|
218
|
+
if args.check:
|
219
|
+
if check_regeneration_needed(proto_file, output_dir):
|
220
|
+
if verbose:
|
221
|
+
print("Proto files need regeneration")
|
222
|
+
sys.exit(1)
|
223
|
+
else:
|
224
|
+
if verbose:
|
225
|
+
print("Generated files are up to date")
|
226
|
+
sys.exit(0)
|
227
|
+
|
228
|
+
# Compile mode
|
229
|
+
success = compile_proto(proto_file, output_dir, verbose)
|
230
|
+
|
231
|
+
if success:
|
232
|
+
# Add generation headers
|
233
|
+
add_generation_header(output_dir, proto_file.stem)
|
234
|
+
|
235
|
+
if verbose:
|
236
|
+
print("\n✅ Protobuf compilation successful!")
|
237
|
+
print("Generated files are ready for use")
|
238
|
+
else:
|
239
|
+
if verbose:
|
240
|
+
print("\n❌ Protobuf compilation failed!")
|
241
|
+
sys.exit(1)
|
242
|
+
|
243
|
+
|
244
|
+
if __name__ == "__main__":
|
245
|
+
main()
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# This file is auto-generated. Do not edit manually.
|
2
|
+
# Regenerate with: python compile_proto.py
|
3
|
+
|
4
|
+
# -*- coding: utf-8 -*-
|
5
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
6
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
7
|
+
# source: sglang_scheduler.proto
|
8
|
+
# Protobuf Python Version: 6.31.1
|
9
|
+
"""Generated protocol buffer code."""
|
10
|
+
from google.protobuf import descriptor as _descriptor
|
11
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
12
|
+
from google.protobuf import runtime_version as _runtime_version
|
13
|
+
from google.protobuf import symbol_database as _symbol_database
|
14
|
+
from google.protobuf.internal import builder as _builder
|
15
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
16
|
+
_runtime_version.Domain.PUBLIC,
|
17
|
+
6,
|
18
|
+
31,
|
19
|
+
1,
|
20
|
+
'',
|
21
|
+
'sglang_scheduler.proto'
|
22
|
+
)
|
23
|
+
# @@protoc_insertion_point(imports)
|
24
|
+
|
25
|
+
_sym_db = _symbol_database.Default()
|
26
|
+
|
27
|
+
|
28
|
+
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
29
|
+
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
|
30
|
+
|
31
|
+
|
32
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16sglang_scheduler.proto\x12\x15sglang.grpc.scheduler\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\"\xe1\x05\n\x0eSamplingParams\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_p\x18\x02 \x01(\x02\x12\r\n\x05top_k\x18\x03 \x01(\x05\x12\r\n\x05min_p\x18\x04 \x01(\x02\x12\x19\n\x11\x66requency_penalty\x18\x05 \x01(\x02\x12\x18\n\x10presence_penalty\x18\x06 \x01(\x02\x12\x1a\n\x12repetition_penalty\x18\x07 \x01(\x02\x12\x1b\n\x0emax_new_tokens\x18\x08 \x01(\x05H\x01\x88\x01\x01\x12\x0c\n\x04stop\x18\t \x03(\t\x12\x16\n\x0estop_token_ids\x18\n \x03(\r\x12\x1b\n\x13skip_special_tokens\x18\x0b \x01(\x08\x12%\n\x1dspaces_between_special_tokens\x18\x0c \x01(\x08\x12\x0f\n\x05regex\x18\r \x01(\tH\x00\x12\x15\n\x0bjson_schema\x18\x0e \x01(\tH\x00\x12\x16\n\x0c\x65\x62nf_grammar\x18\x0f \x01(\tH\x00\x12\x18\n\x0estructural_tag\x18\x10 \x01(\tH\x00\x12\x11\n\tlora_path\x18\x11 \x01(\t\x12\t\n\x01n\x18\x12 \x01(\x05\x12\x15\n\rtoken_healing\x18\x13 \x01(\x08\x12\x16\n\x0emin_new_tokens\x18\x14 \x01(\x05\x12\x12\n\nignore_eos\x18\x15 \x01(\x08\x12\x14\n\x0cno_stop_trim\x18\x16 \x01(\x08\x12\x17\n\x0fstream_interval\x18\x17 \x01(\x05\x12H\n\nlogit_bias\x18\x18 \x03(\x0b\x32\x34.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry\x12.\n\rcustom_params\x18\x19 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x30\n\x0eLogitBiasEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x42\x0c\n\nconstraintB\x11\n\x0f_max_new_tokens\"]\n\x13\x44isaggregatedParams\x12\x16\n\x0e\x62ootstrap_host\x18\x01 \x01(\t\x12\x16\n\x0e\x62ootstrap_port\x18\x02 \x01(\x05\x12\x16\n\x0e\x62ootstrap_room\x18\x03 \x01(\x05\"\xe2\x04\n\x0fGenerateRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x04 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x16\n\x0ereturn_logprob\x18\x05 \x01(\x08\x12\x19\n\x11logprob_start_len\x18\x06 \x01(\x05\x12\x18\n\x10top_logprobs_num\x18\x07 \x01(\x05\x12\x19\n\x11token_ids_logprob\x18\x08 \x03(\r\x12\x1c\n\x14return_hidden_states\x18\t \x01(\x08\x12H\n\x14\x64isaggregated_params\x18\n \x01(\x0b\x32*.sglang.grpc.scheduler.DisaggregatedParams\x12\x1e\n\x16\x63ustom_logit_processor\x18\x0b \x01(\t\x12-\n\ttimestamp\x18\x0c \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x13\n\x0blog_metrics\x18\r \x01(\x08\x12\x14\n\x0cinput_embeds\x18\x0e \x03(\x02\x12\x0f\n\x07lora_id\x18\x0f \x01(\t\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x10 \x01(\x05\x12\x0e\n\x06stream\x18\x11 \x01(\x08\":\n\x0eTokenizedInput\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x11\n\tinput_ids\x18\x02 \x03(\r\"\xd3\x01\n\x10MultimodalInputs\x12\x12\n\nimage_urls\x18\x01 \x03(\t\x12\x12\n\nvideo_urls\x18\x02 \x03(\t\x12\x12\n\naudio_urls\x18\x03 \x03(\t\x12\x33\n\x12processed_features\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x12\n\nimage_data\x18\x05 \x03(\x0c\x12\x12\n\nvideo_data\x18\x06 \x03(\x0c\x12\x12\n\naudio_data\x18\x07 \x03(\x0c\x12\x12\n\nmodalities\x18\x08 \x03(\t\"\xe3\x01\n\x10GenerateResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12;\n\x05\x63hunk\x18\x02 \x01(\x0b\x32*.sglang.grpc.scheduler.GenerateStreamChunkH\x00\x12;\n\x08\x63omplete\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.GenerateCompleteH\x00\x12\x35\n\x05\x65rror\x18\x04 \x01(\x0b\x32$.sglang.grpc.scheduler.GenerateErrorH\x00\x42\n\n\x08response\"\x95\x02\n\x13GenerateStreamChunk\x12\x11\n\ttoken_ids\x18\x01 \x03(\r\x12\x15\n\rprompt_tokens\x18\x02 \x01(\x05\x12\x19\n\x11\x63ompletion_tokens\x18\x03 \x01(\x05\x12\x15\n\rcached_tokens\x18\x04 \x01(\x05\x12>\n\x0foutput_logprobs\x18\x05 \x01(\x0b\x32%.sglang.grpc.scheduler.OutputLogProbs\x12\x15\n\rhidden_states\x18\x06 \x03(\x02\x12<\n\x0einput_logprobs\x18\x07 \x01(\x0b\x32$.sglang.grpc.scheduler.InputLogProbs\x12\r\n\x05index\x18\x08 \x01(\r\"\x9b\x03\n\x10GenerateComplete\x12\x12\n\noutput_ids\x18\x01 \x03(\r\x12\x15\n\rfinish_reason\x18\x02 \x01(\t\x12\x15\n\rprompt_tokens\x18\x03 \x01(\x05\x12\x19\n\x11\x63ompletion_tokens\x18\x04 \x01(\x05\x12\x15\n\rcached_tokens\x18\x05 \x01(\x05\x12>\n\x0foutput_logprobs\x18\x06 \x01(\x0b\x32%.sglang.grpc.scheduler.OutputLogProbs\x12>\n\x11\x61ll_hidden_states\x18\x07 \x03(\x0b\x32#.sglang.grpc.scheduler.HiddenStates\x12\x1a\n\x10matched_token_id\x18\x08 \x01(\rH\x00\x12\x1a\n\x10matched_stop_str\x18\t \x01(\tH\x00\x12<\n\x0einput_logprobs\x18\n \x01(\x0b\x32$.sglang.grpc.scheduler.InputLogProbs\x12\r\n\x05index\x18\x0b \x01(\rB\x0e\n\x0cmatched_stop\"K\n\rGenerateError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x18\n\x10http_status_code\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"u\n\x0eOutputLogProbs\x12\x16\n\x0etoken_logprobs\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x38\n\x0ctop_logprobs\x18\x03 \x03(\x0b\x32\".sglang.grpc.scheduler.TopLogProbs\"\x9e\x01\n\rInputLogProbs\x12@\n\x0etoken_logprobs\x18\x01 \x03(\x0b\x32(.sglang.grpc.scheduler.InputTokenLogProb\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x38\n\x0ctop_logprobs\x18\x03 \x03(\x0b\x32\".sglang.grpc.scheduler.TopLogProbs\"1\n\x11InputTokenLogProb\x12\x12\n\x05value\x18\x01 \x01(\x02H\x00\x88\x01\x01\x42\x08\n\x06_value\"0\n\x0bTopLogProbs\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\"?\n\x0cHiddenStates\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05layer\x18\x02 \x01(\x05\x12\x10\n\x08position\x18\x03 \x01(\x05\"\xca\x02\n\x0c\x45mbedRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x04 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x05 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x13\n\x0blog_metrics\x18\x06 \x01(\x08\x12\x16\n\x0etoken_type_ids\x18\x07 \x03(\x05\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x08 \x01(\x05\x12\x18\n\x10is_cross_encoder\x18\t \x01(\x08\x12\r\n\x05texts\x18\n \x03(\t\"\x9d\x01\n\rEmbedResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\x08\x63omplete\x18\x02 \x01(\x0b\x32$.sglang.grpc.scheduler.EmbedCompleteH\x00\x12\x32\n\x05\x65rror\x18\x03 \x01(\x0b\x32!.sglang.grpc.scheduler.EmbedErrorH\x00\x42\n\n\x08response\"\xa3\x01\n\rEmbedComplete\x12\x11\n\tembedding\x18\x01 \x03(\x02\x12\x15\n\rprompt_tokens\x18\x02 \x01(\x05\x12\x15\n\rcached_tokens\x18\x03 \x01(\x05\x12\x15\n\rembedding_dim\x18\x04 \x01(\x05\x12:\n\x10\x62\x61tch_embeddings\x18\x05 \x03(\x0b\x32 .sglang.grpc.scheduler.Embedding\"*\n\tEmbedding\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05index\x18\x02 \x01(\x05\"<\n\nEmbedError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"N\n\x12HealthCheckRequest\x12\x38\n\ttokenized\x18\x01 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\"7\n\x13HealthCheckResponse\x12\x0f\n\x07healthy\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"2\n\x0c\x41\x62ortRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06reason\x18\x02 \x01(\t\"1\n\rAbortResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"I\n\x0fLoadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\x12\x14\n\x0c\x61\x64\x61pter_path\x18\x02 \x01(\t\x12\x0c\n\x04rank\x18\x03 \x01(\x05\"H\n\x10LoadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x12\n\nadapter_id\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\"\'\n\x11UnloadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\"6\n\x12UnloadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"w\n\x14UpdateWeightsRequest\x12\x13\n\tdisk_path\x18\x01 \x01(\tH\x00\x12\x15\n\x0btensor_data\x18\x02 \x01(\x0cH\x00\x12\x14\n\nremote_url\x18\x03 \x01(\tH\x00\x12\x13\n\x0bweight_name\x18\x04 \x01(\tB\x08\n\x06source\"9\n\x15UpdateWeightsResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"-\n\x17GetInternalStateRequest\x12\x12\n\nstate_keys\x18\x01 \x03(\t\"B\n\x18GetInternalStateResponse\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"A\n\x17SetInternalStateRequest\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"<\n\x18SetInternalStateResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xfe\x02\n\x0fSglangScheduler\x12]\n\x08Generate\x12&.sglang.grpc.scheduler.GenerateRequest\x1a\'.sglang.grpc.scheduler.GenerateResponse0\x01\x12R\n\x05\x45mbed\x12#.sglang.grpc.scheduler.EmbedRequest\x1a$.sglang.grpc.scheduler.EmbedResponse\x12\x64\n\x0bHealthCheck\x12).sglang.grpc.scheduler.HealthCheckRequest\x1a*.sglang.grpc.scheduler.HealthCheckResponse\x12R\n\x05\x41\x62ort\x12#.sglang.grpc.scheduler.AbortRequest\x1a$.sglang.grpc.scheduler.AbortResponseb\x06proto3')
|
33
|
+
|
34
|
+
_globals = globals()
|
35
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
36
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sglang_scheduler_pb2', _globals)
|
37
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
38
|
+
DESCRIPTOR._loaded_options = None
|
39
|
+
_globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._loaded_options = None
|
40
|
+
_globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_options = b'8\001'
|
41
|
+
_globals['_SAMPLINGPARAMS']._serialized_start=113
|
42
|
+
_globals['_SAMPLINGPARAMS']._serialized_end=850
|
43
|
+
_globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_start=769
|
44
|
+
_globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_end=817
|
45
|
+
_globals['_DISAGGREGATEDPARAMS']._serialized_start=852
|
46
|
+
_globals['_DISAGGREGATEDPARAMS']._serialized_end=945
|
47
|
+
_globals['_GENERATEREQUEST']._serialized_start=948
|
48
|
+
_globals['_GENERATEREQUEST']._serialized_end=1558
|
49
|
+
_globals['_TOKENIZEDINPUT']._serialized_start=1560
|
50
|
+
_globals['_TOKENIZEDINPUT']._serialized_end=1618
|
51
|
+
_globals['_MULTIMODALINPUTS']._serialized_start=1621
|
52
|
+
_globals['_MULTIMODALINPUTS']._serialized_end=1832
|
53
|
+
_globals['_GENERATERESPONSE']._serialized_start=1835
|
54
|
+
_globals['_GENERATERESPONSE']._serialized_end=2062
|
55
|
+
_globals['_GENERATESTREAMCHUNK']._serialized_start=2065
|
56
|
+
_globals['_GENERATESTREAMCHUNK']._serialized_end=2342
|
57
|
+
_globals['_GENERATECOMPLETE']._serialized_start=2345
|
58
|
+
_globals['_GENERATECOMPLETE']._serialized_end=2756
|
59
|
+
_globals['_GENERATEERROR']._serialized_start=2758
|
60
|
+
_globals['_GENERATEERROR']._serialized_end=2833
|
61
|
+
_globals['_OUTPUTLOGPROBS']._serialized_start=2835
|
62
|
+
_globals['_OUTPUTLOGPROBS']._serialized_end=2952
|
63
|
+
_globals['_INPUTLOGPROBS']._serialized_start=2955
|
64
|
+
_globals['_INPUTLOGPROBS']._serialized_end=3113
|
65
|
+
_globals['_INPUTTOKENLOGPROB']._serialized_start=3115
|
66
|
+
_globals['_INPUTTOKENLOGPROB']._serialized_end=3164
|
67
|
+
_globals['_TOPLOGPROBS']._serialized_start=3166
|
68
|
+
_globals['_TOPLOGPROBS']._serialized_end=3214
|
69
|
+
_globals['_HIDDENSTATES']._serialized_start=3216
|
70
|
+
_globals['_HIDDENSTATES']._serialized_end=3279
|
71
|
+
_globals['_EMBEDREQUEST']._serialized_start=3282
|
72
|
+
_globals['_EMBEDREQUEST']._serialized_end=3612
|
73
|
+
_globals['_EMBEDRESPONSE']._serialized_start=3615
|
74
|
+
_globals['_EMBEDRESPONSE']._serialized_end=3772
|
75
|
+
_globals['_EMBEDCOMPLETE']._serialized_start=3775
|
76
|
+
_globals['_EMBEDCOMPLETE']._serialized_end=3938
|
77
|
+
_globals['_EMBEDDING']._serialized_start=3940
|
78
|
+
_globals['_EMBEDDING']._serialized_end=3982
|
79
|
+
_globals['_EMBEDERROR']._serialized_start=3984
|
80
|
+
_globals['_EMBEDERROR']._serialized_end=4044
|
81
|
+
_globals['_HEALTHCHECKREQUEST']._serialized_start=4046
|
82
|
+
_globals['_HEALTHCHECKREQUEST']._serialized_end=4124
|
83
|
+
_globals['_HEALTHCHECKRESPONSE']._serialized_start=4126
|
84
|
+
_globals['_HEALTHCHECKRESPONSE']._serialized_end=4181
|
85
|
+
_globals['_ABORTREQUEST']._serialized_start=4183
|
86
|
+
_globals['_ABORTREQUEST']._serialized_end=4233
|
87
|
+
_globals['_ABORTRESPONSE']._serialized_start=4235
|
88
|
+
_globals['_ABORTRESPONSE']._serialized_end=4284
|
89
|
+
_globals['_LOADLORAREQUEST']._serialized_start=4286
|
90
|
+
_globals['_LOADLORAREQUEST']._serialized_end=4359
|
91
|
+
_globals['_LOADLORARESPONSE']._serialized_start=4361
|
92
|
+
_globals['_LOADLORARESPONSE']._serialized_end=4433
|
93
|
+
_globals['_UNLOADLORAREQUEST']._serialized_start=4435
|
94
|
+
_globals['_UNLOADLORAREQUEST']._serialized_end=4474
|
95
|
+
_globals['_UNLOADLORARESPONSE']._serialized_start=4476
|
96
|
+
_globals['_UNLOADLORARESPONSE']._serialized_end=4530
|
97
|
+
_globals['_UPDATEWEIGHTSREQUEST']._serialized_start=4532
|
98
|
+
_globals['_UPDATEWEIGHTSREQUEST']._serialized_end=4651
|
99
|
+
_globals['_UPDATEWEIGHTSRESPONSE']._serialized_start=4653
|
100
|
+
_globals['_UPDATEWEIGHTSRESPONSE']._serialized_end=4710
|
101
|
+
_globals['_GETINTERNALSTATEREQUEST']._serialized_start=4712
|
102
|
+
_globals['_GETINTERNALSTATEREQUEST']._serialized_end=4757
|
103
|
+
_globals['_GETINTERNALSTATERESPONSE']._serialized_start=4759
|
104
|
+
_globals['_GETINTERNALSTATERESPONSE']._serialized_end=4825
|
105
|
+
_globals['_SETINTERNALSTATEREQUEST']._serialized_start=4827
|
106
|
+
_globals['_SETINTERNALSTATEREQUEST']._serialized_end=4892
|
107
|
+
_globals['_SETINTERNALSTATERESPONSE']._serialized_start=4894
|
108
|
+
_globals['_SETINTERNALSTATERESPONSE']._serialized_end=4954
|
109
|
+
_globals['_SGLANGSCHEDULER']._serialized_start=4957
|
110
|
+
_globals['_SGLANGSCHEDULER']._serialized_end=5339
|
111
|
+
# @@protoc_insertion_point(module_scope)
|