sglang 0.5.3rc0__py3-none-any.whl → 0.5.3rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. sglang/bench_one_batch.py +7 -9
  2. sglang/bench_one_batch_server.py +321 -31
  3. sglang/bench_serving.py +10 -3
  4. sglang/global_config.py +2 -2
  5. sglang/lang/backend/runtime_endpoint.py +1 -1
  6. sglang/launch_server.py +14 -0
  7. sglang/profiler.py +2 -2
  8. sglang/srt/batch_invariant_ops/__init__.py +27 -0
  9. sglang/srt/batch_invariant_ops/batch_invariant_ops.py +549 -0
  10. sglang/srt/configs/__init__.py +4 -0
  11. sglang/srt/configs/dots_ocr.py +64 -0
  12. sglang/srt/configs/falcon_h1.py +360 -0
  13. sglang/srt/configs/load_config.py +8 -0
  14. sglang/srt/configs/model_config.py +160 -105
  15. sglang/srt/configs/qwen3_vl.py +586 -0
  16. sglang/srt/constrained/base_grammar_backend.py +1 -0
  17. sglang/srt/constrained/outlines_jump_forward.py +1 -1
  18. sglang/srt/constrained/xgrammar_backend.py +6 -4
  19. sglang/srt/debug_utils/dumper.py +10 -3
  20. sglang/srt/disaggregation/ascend/conn.py +2 -2
  21. sglang/srt/disaggregation/ascend/transfer_engine.py +47 -9
  22. sglang/srt/disaggregation/common/conn.py +266 -98
  23. sglang/srt/disaggregation/decode.py +50 -9
  24. sglang/srt/disaggregation/decode_kvcache_offload_manager.py +185 -0
  25. sglang/srt/disaggregation/decode_schedule_batch_mixin.py +25 -16
  26. sglang/srt/disaggregation/mooncake/conn.py +51 -541
  27. sglang/srt/disaggregation/nixl/conn.py +148 -39
  28. sglang/srt/disaggregation/prefill.py +31 -14
  29. sglang/srt/disaggregation/utils.py +36 -5
  30. sglang/srt/distributed/device_communicators/all_reduce_utils.py +16 -0
  31. sglang/srt/distributed/device_communicators/shm_broadcast.py +4 -2
  32. sglang/srt/distributed/device_communicators/symm_mem.py +164 -0
  33. sglang/srt/distributed/parallel_state.py +135 -80
  34. sglang/srt/entrypoints/engine.py +23 -3
  35. sglang/srt/entrypoints/grpc_request_manager.py +330 -55
  36. sglang/srt/entrypoints/grpc_server.py +232 -102
  37. sglang/srt/entrypoints/http_server.py +49 -9
  38. sglang/srt/entrypoints/openai/protocol.py +110 -5
  39. sglang/srt/entrypoints/openai/serving_base.py +25 -6
  40. sglang/srt/entrypoints/openai/serving_chat.py +178 -49
  41. sglang/srt/entrypoints/openai/serving_completions.py +5 -3
  42. sglang/srt/entrypoints/openai/serving_embedding.py +1 -0
  43. sglang/srt/entrypoints/openai/serving_responses.py +42 -0
  44. sglang/srt/environ.py +285 -0
  45. sglang/srt/eplb/expert_location.py +30 -5
  46. sglang/srt/function_call/function_call_parser.py +3 -2
  47. sglang/srt/function_call/glm4_moe_detector.py +3 -3
  48. sglang/srt/function_call/gpt_oss_detector.py +23 -0
  49. sglang/srt/function_call/json_array_parser.py +63 -0
  50. sglang/srt/function_call/kimik2_detector.py +17 -4
  51. sglang/srt/function_call/utils.py +96 -5
  52. sglang/srt/grpc/compile_proto.py +245 -0
  53. sglang/srt/grpc/sglang_scheduler_pb2.py +73 -68
  54. sglang/srt/grpc/sglang_scheduler_pb2.pyi +60 -53
  55. sglang/srt/grpc/sglang_scheduler_pb2_grpc.py +3 -0
  56. sglang/srt/layers/activation.py +7 -6
  57. sglang/srt/layers/attention/aiter_backend.py +14 -15
  58. sglang/srt/layers/attention/ascend_backend.py +108 -9
  59. sglang/srt/layers/attention/attention_registry.py +206 -0
  60. sglang/srt/layers/attention/base_attn_backend.py +12 -3
  61. sglang/srt/layers/attention/cutlass_mla_backend.py +3 -3
  62. sglang/srt/layers/attention/dual_chunk_flashattention_backend.py +1 -1
  63. sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py +2 -2
  64. sglang/srt/layers/attention/fla/fused_recurrent.py +4 -4
  65. sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py +2 -2
  66. sglang/srt/layers/attention/flashattention_backend.py +41 -8
  67. sglang/srt/layers/attention/flashinfer_backend.py +112 -194
  68. sglang/srt/layers/attention/flashinfer_mla_backend.py +11 -15
  69. sglang/srt/layers/attention/flashmla_backend.py +7 -5
  70. sglang/srt/layers/attention/hybrid_attn_backend.py +11 -3
  71. sglang/srt/layers/attention/hybrid_linear_attn_backend.py +72 -72
  72. sglang/srt/layers/attention/mamba/causal_conv1d.py +1 -0
  73. sglang/srt/layers/attention/mamba/causal_conv1d_triton.py +15 -98
  74. sglang/srt/layers/attention/mamba/mamba.py +566 -1
  75. sglang/srt/layers/attention/mamba/mamba_utils.py +81 -0
  76. sglang/srt/layers/attention/mamba/ops/__init__.py +2 -0
  77. sglang/srt/layers/attention/mamba/ops/layernorm_gated.py +172 -0
  78. sglang/srt/layers/attention/mamba/ops/mamba_ssm.py +442 -0
  79. sglang/srt/layers/attention/mamba/ops/ssd_bmm.py +264 -0
  80. sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py +622 -0
  81. sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py +757 -0
  82. sglang/srt/layers/attention/mamba/ops/ssd_combined.py +262 -0
  83. sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py +275 -0
  84. sglang/srt/layers/attention/npu_ops/mla_preprocess.py +393 -0
  85. sglang/srt/layers/attention/nsa/dequant_k_cache.py +163 -0
  86. sglang/srt/layers/attention/nsa/index_buf_accessor.py +354 -0
  87. sglang/srt/layers/attention/nsa/nsa_indexer.py +761 -0
  88. sglang/srt/layers/attention/nsa/quant_k_cache.py +255 -0
  89. sglang/srt/layers/attention/nsa/tilelang_kernel.py +785 -0
  90. sglang/srt/layers/attention/nsa/transform_index.py +144 -0
  91. sglang/srt/layers/attention/nsa/utils.py +24 -0
  92. sglang/srt/layers/attention/nsa_backend.py +887 -0
  93. sglang/srt/layers/attention/tbo_backend.py +6 -6
  94. sglang/srt/layers/attention/torch_flex_backend.py +325 -0
  95. sglang/srt/layers/attention/triton_backend.py +42 -9
  96. sglang/srt/layers/attention/trtllm_mha_backend.py +5 -7
  97. sglang/srt/layers/attention/trtllm_mla_backend.py +178 -34
  98. sglang/srt/layers/attention/vision.py +58 -0
  99. sglang/srt/layers/attention/wave_backend.py +4 -4
  100. sglang/srt/layers/communicator.py +8 -0
  101. sglang/srt/layers/dp_attention.py +11 -1
  102. sglang/srt/layers/elementwise.py +3 -1
  103. sglang/srt/layers/layernorm.py +2 -0
  104. sglang/srt/layers/linear.py +21 -4
  105. sglang/srt/layers/logits_processor.py +15 -2
  106. sglang/srt/layers/moe/ep_moe/kernels.py +1 -1
  107. sglang/srt/layers/moe/ep_moe/layer.py +147 -74
  108. sglang/srt/layers/moe/flashinfer_cutedsl_moe.py +52 -25
  109. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=256,N=256,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  110. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=128,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  111. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=512,N=256,device_name=NVIDIA_B200.json +146 -0
  112. sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +6 -2
  113. sglang/srt/layers/moe/fused_moe_triton/layer.py +11 -12
  114. sglang/srt/layers/moe/token_dispatcher/deepep.py +77 -19
  115. sglang/srt/layers/moe/utils.py +10 -0
  116. sglang/srt/layers/parameter.py +23 -6
  117. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +1 -0
  118. sglang/srt/layers/quantization/compressed_tensors/schemes/__init__.py +2 -0
  119. sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +173 -0
  120. sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +2 -10
  121. sglang/srt/layers/quantization/fp8.py +2 -2
  122. sglang/srt/layers/quantization/fp8_utils.py +1 -1
  123. sglang/srt/layers/quantization/modelopt_quant.py +44 -9
  124. sglang/srt/layers/quantization/mxfp4.py +12 -4
  125. sglang/srt/layers/quantization/quark/quark_moe.py +16 -3
  126. sglang/srt/layers/quantization/w4afp8.py +0 -4
  127. sglang/srt/layers/quantization/w8a8_int8.py +15 -3
  128. sglang/srt/layers/rotary_embedding.py +78 -31
  129. sglang/srt/layers/sampler.py +52 -4
  130. sglang/srt/layers/utils.py +23 -0
  131. sglang/srt/lora/backend/base_backend.py +3 -3
  132. sglang/srt/lora/backend/chunked_backend.py +348 -0
  133. sglang/srt/lora/backend/triton_backend.py +10 -4
  134. sglang/srt/lora/lora.py +7 -5
  135. sglang/srt/lora/lora_manager.py +17 -6
  136. sglang/srt/lora/mem_pool.py +1 -1
  137. sglang/srt/lora/triton_ops/__init__.py +4 -0
  138. sglang/srt/lora/triton_ops/chunked_sgmv_expand.py +214 -0
  139. sglang/srt/lora/triton_ops/chunked_sgmv_shrink.py +174 -0
  140. sglang/srt/lora/utils.py +7 -5
  141. sglang/srt/managers/cache_controller.py +42 -142
  142. sglang/srt/managers/data_parallel_controller.py +11 -46
  143. sglang/srt/managers/detokenizer_manager.py +11 -11
  144. sglang/srt/managers/io_struct.py +162 -118
  145. sglang/srt/managers/mm_utils.py +43 -6
  146. sglang/srt/managers/multi_tokenizer_mixin.py +17 -17
  147. sglang/srt/managers/multimodal_processor.py +1 -2
  148. sglang/srt/managers/overlap_utils.py +53 -0
  149. sglang/srt/managers/schedule_batch.py +167 -86
  150. sglang/srt/managers/schedule_policy.py +143 -16
  151. sglang/srt/managers/scheduler.py +359 -214
  152. sglang/srt/managers/scheduler_input_blocker.py +1 -1
  153. sglang/srt/managers/scheduler_metrics_mixin.py +98 -126
  154. sglang/srt/managers/scheduler_output_processor_mixin.py +21 -12
  155. sglang/srt/managers/scheduler_profiler_mixin.py +5 -5
  156. sglang/srt/managers/scheduler_update_weights_mixin.py +7 -0
  157. sglang/srt/managers/tokenizer_communicator_mixin.py +111 -5
  158. sglang/srt/managers/tokenizer_manager.py +84 -136
  159. sglang/srt/managers/tp_worker.py +39 -29
  160. sglang/srt/managers/tp_worker_overlap_thread.py +33 -41
  161. sglang/srt/managers/utils.py +1 -45
  162. sglang/srt/mem_cache/allocator.py +14 -20
  163. sglang/srt/mem_cache/allocator_ascend.py +41 -27
  164. sglang/srt/mem_cache/base_prefix_cache.py +1 -1
  165. sglang/srt/mem_cache/chunk_cache.py +8 -1
  166. sglang/srt/mem_cache/evict_policy.py +23 -0
  167. sglang/srt/mem_cache/hicache_storage.py +40 -1
  168. sglang/srt/mem_cache/hiradix_cache.py +119 -32
  169. sglang/srt/mem_cache/memory_pool.py +188 -10
  170. sglang/srt/mem_cache/memory_pool_host.py +134 -182
  171. sglang/srt/mem_cache/radix_cache.py +222 -71
  172. sglang/srt/mem_cache/radix_cache_cpp.py +11 -8
  173. sglang/srt/mem_cache/storage/__init__.py +10 -0
  174. sglang/srt/mem_cache/storage/aibrix_kvcache/aibrix_kvcache_storage.py +151 -0
  175. sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py +109 -0
  176. sglang/srt/mem_cache/storage/backend_factory.py +223 -0
  177. sglang/srt/mem_cache/storage/eic/eic_storage.py +778 -0
  178. sglang/srt/mem_cache/storage/eic/test_unit.py +115 -0
  179. sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +173 -58
  180. sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py +10 -6
  181. sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +117 -10
  182. sglang/srt/mem_cache/swa_radix_cache.py +25 -34
  183. sglang/srt/metrics/collector.py +82 -120
  184. sglang/srt/metrics/func_timer.py +2 -7
  185. sglang/srt/metrics/utils.py +8 -1
  186. sglang/srt/model_executor/cpu_graph_runner.py +2 -2
  187. sglang/srt/model_executor/cuda_graph_runner.py +39 -32
  188. sglang/srt/model_executor/forward_batch_info.py +23 -38
  189. sglang/srt/model_executor/model_runner.py +131 -183
  190. sglang/srt/model_executor/npu_graph_runner.py +12 -5
  191. sglang/srt/model_loader/loader.py +14 -10
  192. sglang/srt/model_loader/weight_utils.py +156 -2
  193. sglang/srt/models/bailing_moe.py +27 -4
  194. sglang/srt/models/deepseek_nextn.py +6 -1
  195. sglang/srt/models/deepseek_v2.py +536 -153
  196. sglang/srt/models/dots_ocr.py +173 -0
  197. sglang/srt/models/falcon_h1.py +576 -0
  198. sglang/srt/models/gemma3_causal.py +0 -2
  199. sglang/srt/models/gemma3_mm.py +1 -1
  200. sglang/srt/models/gemma3n_mm.py +1 -1
  201. sglang/srt/models/glm4_moe.py +3 -3
  202. sglang/srt/models/glm4_moe_nextn.py +2 -2
  203. sglang/srt/models/glm4v.py +1 -1
  204. sglang/srt/models/glm4v_moe.py +1 -1
  205. sglang/srt/models/gpt_oss.py +7 -30
  206. sglang/srt/models/kimi_vl_moonvit.py +2 -2
  207. sglang/srt/models/llama.py +4 -0
  208. sglang/srt/models/longcat_flash.py +1 -1
  209. sglang/srt/models/longcat_flash_nextn.py +1 -1
  210. sglang/srt/models/mllama4.py +15 -4
  211. sglang/srt/models/qwen2.py +0 -7
  212. sglang/srt/models/qwen2_5_vl.py +2 -2
  213. sglang/srt/models/qwen2_audio.py +1 -1
  214. sglang/srt/models/qwen2_moe.py +64 -1
  215. sglang/srt/models/qwen2_vl.py +1 -1
  216. sglang/srt/models/qwen3.py +18 -3
  217. sglang/srt/models/qwen3_moe.py +31 -3
  218. sglang/srt/models/qwen3_next.py +36 -9
  219. sglang/srt/models/qwen3_vl.py +787 -0
  220. sglang/srt/models/qwen3_vl_moe.py +471 -0
  221. sglang/srt/models/registry.py +15 -3
  222. sglang/srt/models/sarashina2_vision.py +269 -0
  223. sglang/srt/models/solar.py +505 -0
  224. sglang/srt/models/starcoder2.py +357 -0
  225. sglang/srt/models/torch_native_llama.py +9 -2
  226. sglang/srt/models/utils.py +51 -0
  227. sglang/srt/multimodal/processors/base_processor.py +15 -7
  228. sglang/srt/multimodal/processors/dots_vlm.py +2 -3
  229. sglang/srt/multimodal/processors/internvl.py +20 -8
  230. sglang/srt/multimodal/processors/qwen_vl.py +8 -1
  231. sglang/srt/multimodal/processors/sarashina2_vision.py +81 -0
  232. sglang/srt/parser/jinja_template_utils.py +6 -0
  233. sglang/srt/sampling/sampling_batch_info.py +20 -2
  234. sglang/srt/sampling/sampling_params.py +7 -0
  235. sglang/srt/server_args.py +753 -295
  236. sglang/srt/server_args_config_parser.py +146 -0
  237. sglang/srt/single_batch_overlap.py +151 -0
  238. sglang/srt/speculative/cpp_ngram/ngram.cpp +374 -0
  239. sglang/srt/speculative/cpp_ngram/ngram.h +110 -0
  240. sglang/srt/speculative/cpp_ngram/ngram_cache.py +138 -0
  241. sglang/srt/speculative/cpp_ngram/ngram_cache_binding.cpp +43 -0
  242. sglang/srt/speculative/cpp_ngram/param.h +125 -0
  243. sglang/srt/speculative/cpp_ngram/queue.h +71 -0
  244. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +2 -1
  245. sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +3 -1
  246. sglang/srt/speculative/{eagle_utils.py → eagle_info.py} +207 -755
  247. sglang/srt/speculative/eagle_worker.py +57 -25
  248. sglang/srt/speculative/ngram_utils.py +428 -0
  249. sglang/srt/speculative/ngram_worker.py +245 -0
  250. sglang/srt/speculative/spec_info.py +47 -0
  251. sglang/srt/speculative/spec_utils.py +606 -0
  252. sglang/srt/torch_memory_saver_adapter.py +5 -7
  253. sglang/srt/tracing/trace.py +32 -6
  254. sglang/srt/two_batch_overlap.py +8 -5
  255. sglang/srt/utils/__init__.py +2 -0
  256. sglang/srt/{utils.py → utils/common.py} +399 -74
  257. sglang/srt/{hf_transformers_utils.py → utils/hf_transformers_utils.py} +49 -5
  258. sglang/srt/{patch_torch.py → utils/patch_torch.py} +8 -0
  259. sglang/srt/utils/rpd_utils.py +452 -0
  260. sglang/srt/utils/slow_rank_detector.py +71 -0
  261. sglang/srt/warmup.py +8 -4
  262. sglang/srt/weight_sync/utils.py +1 -1
  263. sglang/test/get_logits_ut.py +57 -0
  264. sglang/test/run_eval.py +79 -11
  265. sglang/test/runners.py +1 -1
  266. sglang/test/simple_eval_common.py +5 -2
  267. sglang/test/simple_eval_mmmu_vlm.py +441 -0
  268. sglang/test/test_block_fp8.py +2 -2
  269. sglang/test/test_deterministic.py +297 -0
  270. sglang/test/test_disaggregation_utils.py +12 -1
  271. sglang/test/test_programs.py +1 -1
  272. sglang/test/test_utils.py +355 -4
  273. sglang/utils.py +10 -1
  274. sglang/version.py +1 -1
  275. {sglang-0.5.3rc0.dist-info → sglang-0.5.3rc2.dist-info}/METADATA +34 -25
  276. {sglang-0.5.3rc0.dist-info → sglang-0.5.3rc2.dist-info}/RECORD +281 -210
  277. sglang/srt/mem_cache/lora_radix_cache.py +0 -421
  278. /sglang/srt/{remote_instance_weight_loader_utils.py → model_loader/remote_instance_weight_loader_utils.py} +0 -0
  279. /sglang/srt/{poll_based_barrier.py → utils/poll_based_barrier.py} +0 -0
  280. {sglang-0.5.3rc0.dist-info → sglang-0.5.3rc2.dist-info}/WHEEL +0 -0
  281. {sglang-0.5.3rc0.dist-info → sglang-0.5.3rc2.dist-info}/licenses/LICENSE +0 -0
  282. {sglang-0.5.3rc0.dist-info → sglang-0.5.3rc2.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
1
1
  import json
2
2
  from json import JSONDecodeError, JSONDecoder
3
- from typing import Any, Tuple
3
+ from json.decoder import WHITESPACE
4
+ from typing import Any, List, Literal, Optional, Tuple, Union
4
5
 
5
6
  import partial_json_parser
6
7
  from partial_json_parser.core.options import Allow
7
8
 
9
+ from sglang.srt.entrypoints.openai.protocol import Tool, ToolChoice
10
+
8
11
 
9
12
  def _find_common_prefix(s1: str, s2: str) -> str:
10
13
  prefix = ""
@@ -37,10 +40,12 @@ def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
37
40
  """
38
41
  try:
39
42
  return (partial_json_parser.loads(input_str, flags), len(input_str))
40
- except JSONDecodeError as e:
41
- if "Extra data" in e.msg:
42
- dec = JSONDecoder()
43
- return dec.raw_decode(input_str)
43
+ except (JSONDecodeError, IndexError) as e:
44
+ msg = getattr(e, "msg", str(e))
45
+ if "Extra data" in msg or "pop from empty list" in msg:
46
+ start = WHITESPACE.match(input_str, 0).end()
47
+ obj, end = JSONDecoder().raw_decode(input_str, start)
48
+ return obj, end
44
49
  raise
45
50
 
46
51
 
@@ -50,3 +55,89 @@ def _is_complete_json(input_str: str) -> bool:
50
55
  return True
51
56
  except JSONDecodeError:
52
57
  return False
58
+
59
+
60
+ def _get_tool_schema_defs(tools: List[Tool]) -> dict:
61
+ """
62
+ Get consolidated $defs from all tools, validating for conflicts.
63
+
64
+ Args:
65
+ tools: List of tools to process
66
+
67
+ Returns:
68
+ Dictionary of consolidated $defs from all tools
69
+
70
+ Raises:
71
+ ValueError: If conflicting $defs are found
72
+ """
73
+ all_defs = {}
74
+ for tool in tools:
75
+ if tool.function.parameters is None:
76
+ continue
77
+ defs = tool.function.parameters.get("$defs", {})
78
+ for def_name, def_schema in defs.items():
79
+ if def_name in all_defs and all_defs[def_name] != def_schema:
80
+ raise ValueError(
81
+ f"Tool definition '{def_name}' has "
82
+ "multiple schemas, which is not "
83
+ "supported."
84
+ )
85
+ else:
86
+ all_defs[def_name] = def_schema
87
+ return all_defs
88
+
89
+
90
+ def _get_tool_schema(tool: Tool) -> dict:
91
+ return {
92
+ "properties": {
93
+ "name": {"type": "string", "enum": [tool.function.name]},
94
+ "parameters": (
95
+ tool.function.parameters
96
+ if tool.function.parameters
97
+ else {"type": "object", "properties": {}}
98
+ ),
99
+ },
100
+ "required": ["name", "parameters"],
101
+ }
102
+
103
+
104
+ def get_json_schema_constraint(
105
+ tools: List[Tool], tool_choice: Union[ToolChoice, Literal["required"]]
106
+ ) -> Optional[dict]:
107
+ """
108
+ Get the JSON schema constraint for the specified tool choice.
109
+
110
+ Args:
111
+ tool_choice: The tool choice specification
112
+
113
+ Returns:
114
+ JSON schema dict, or None if no valid tools found
115
+ """
116
+
117
+ if isinstance(tool_choice, ToolChoice):
118
+ # For specific function choice, return the user's parameters schema directly
119
+ fn_name = tool_choice.function.name
120
+ for tool in tools:
121
+ if tool.function.name == fn_name:
122
+ return {
123
+ "type": "array",
124
+ "minItems": 1,
125
+ "maxItems": 1,
126
+ "items": _get_tool_schema(tool),
127
+ }
128
+ return None
129
+ elif tool_choice == "required":
130
+ json_schema = {
131
+ "type": "array",
132
+ "minItems": 1,
133
+ "items": {
134
+ "type": "object",
135
+ "anyOf": [_get_tool_schema(tool) for tool in tools],
136
+ },
137
+ }
138
+ json_schema_defs = _get_tool_schema_defs(tools)
139
+ if json_schema_defs:
140
+ json_schema["$defs"] = json_schema_defs
141
+ return json_schema
142
+
143
+ return None
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Compile protobuf files for SGLang gRPC server.
4
+
5
+ This script compiles .proto files to Python code using grpc_tools.protoc.
6
+ It generates:
7
+ - *_pb2.py (protobuf message classes)
8
+ - *_pb2_grpc.py (gRPC service classes)
9
+ - *_pb2.pyi (type hints for mypy/IDEs)
10
+
11
+ Usage:
12
+ python compile_proto.py [--check] [--proto-file PROTO_FILE]
13
+
14
+ Options:
15
+ --check Check if regeneration is needed (exit 1 if needed)
16
+ --proto-file Specify proto file (default: sglang_scheduler.proto)
17
+
18
+ ### Install Dependencies
19
+ pip install "grpcio==1.74.0" "grpcio-tools==1.74.0"
20
+
21
+ ### Run Script
22
+ cd python/sglang/srt/grpc
23
+ python compile_proto.py
24
+ """
25
+
26
+
27
+ import argparse
28
+ import subprocess
29
+ import sys
30
+ from importlib.metadata import version
31
+ from pathlib import Path
32
+
33
+ GRPC_VERSION = "1.74.0"
34
+
35
+
36
+ def get_file_mtime(path: Path) -> float:
37
+ """Get file modification time, return 0 if file doesn't exist."""
38
+ try:
39
+ return path.stat().st_mtime
40
+ except FileNotFoundError:
41
+ return 0.0
42
+
43
+
44
+ def check_regeneration_needed(proto_file: Path, output_dir: Path) -> bool:
45
+ """Check if proto files are newer than generated files."""
46
+ proto_mtime = get_file_mtime(proto_file)
47
+
48
+ generated_files = [
49
+ output_dir / f"{proto_file.stem}_pb2.py",
50
+ output_dir / f"{proto_file.stem}_pb2_grpc.py",
51
+ output_dir / f"{proto_file.stem}_pb2.pyi",
52
+ ]
53
+
54
+ for gen_file in generated_files:
55
+ if get_file_mtime(gen_file) < proto_mtime:
56
+ return True
57
+
58
+ return False
59
+
60
+
61
+ def compile_proto(proto_file: Path, output_dir: Path, verbose: bool = True) -> bool:
62
+ """Compile the protobuf file to Python."""
63
+
64
+ if not proto_file.exists():
65
+ print(f"Error: Proto file not found: {proto_file}")
66
+ return False
67
+
68
+ if verbose:
69
+ print(f"Found proto file: {proto_file}")
70
+
71
+ # Check if grpc_tools is available
72
+ try:
73
+ import grpc_tools.protoc
74
+ except ImportError:
75
+ print("Error: grpcio-tools not installed")
76
+ print(
77
+ f'Install with: pip install "grpcio-tools=={GRPC_VERSION}" "grpcio=={GRPC_VERSION}"'
78
+ )
79
+ return False
80
+
81
+ grpc_tools_version = version("grpcio-tools")
82
+ grpc_version = version("grpcio")
83
+ if grpc_tools_version != GRPC_VERSION or grpc_version != GRPC_VERSION:
84
+ raise RuntimeError(
85
+ f"Error: grpcio-tools version {grpc_tools_version} and grpcio version {grpc_version} detected, but {GRPC_VERSION} is required."
86
+ )
87
+
88
+ # Compile command
89
+ cmd = [
90
+ sys.executable,
91
+ "-m",
92
+ "grpc_tools.protoc",
93
+ f"-I{proto_file.parent}",
94
+ f"--python_out={output_dir}",
95
+ f"--grpc_python_out={output_dir}",
96
+ f"--pyi_out={output_dir}", # Generate type stubs
97
+ str(proto_file.name),
98
+ ]
99
+
100
+ if verbose:
101
+ print(f"Running: {' '.join(cmd)}")
102
+
103
+ # Run protoc
104
+ result = subprocess.run(cmd, capture_output=True, text=True, cwd=proto_file.parent)
105
+
106
+ if result.returncode != 0:
107
+ print(f"Error compiling proto:")
108
+ print(result.stderr)
109
+ if result.stdout:
110
+ print(result.stdout)
111
+ return False
112
+
113
+ # Verify generated files exist
114
+ generated_files = [
115
+ f"{proto_file.stem}_pb2.py",
116
+ f"{proto_file.stem}_pb2_grpc.py",
117
+ f"{proto_file.stem}_pb2.pyi",
118
+ ]
119
+
120
+ missing_files = []
121
+ for gen_file in generated_files:
122
+ if not (output_dir / gen_file).exists():
123
+ missing_files.append(gen_file)
124
+
125
+ if missing_files:
126
+ print(f"Error: Expected generated files not found: {missing_files}")
127
+ return False
128
+
129
+ if verbose:
130
+ print("Successfully compiled protobuf files:")
131
+ for gen_file in generated_files:
132
+ print(f" - {output_dir}/{gen_file}")
133
+
134
+ # Fix imports in generated files
135
+ fix_imports(output_dir, proto_file.stem, verbose)
136
+
137
+ return True
138
+
139
+
140
+ def fix_imports(output_dir: Path, proto_stem: str, verbose: bool = True) -> None:
141
+ """Fix imports in generated files to use relative imports."""
142
+ grpc_file = output_dir / f"{proto_stem}_pb2_grpc.py"
143
+
144
+ if grpc_file.exists():
145
+ content = grpc_file.read_text()
146
+ # Change absolute import to relative import
147
+ old_import = f"import {proto_stem}_pb2"
148
+ new_import = f"from . import {proto_stem}_pb2"
149
+
150
+ if old_import in content:
151
+ content = content.replace(old_import, new_import)
152
+ grpc_file.write_text(content)
153
+ if verbose:
154
+ print("Fixed imports in generated files")
155
+
156
+
157
+ def add_generation_header(output_dir: Path, proto_stem: str) -> None:
158
+ """Add header to generated files indicating they are auto-generated."""
159
+ header = """# This file is auto-generated. Do not edit manually.
160
+ # Regenerate with: python compile_proto.py
161
+
162
+ """
163
+
164
+ files_to_update = [f"{proto_stem}_pb2.py", f"{proto_stem}_pb2_grpc.py"]
165
+
166
+ for filename in files_to_update:
167
+ file_path = output_dir / filename
168
+ if file_path.exists():
169
+ content = file_path.read_text()
170
+ if not content.startswith("# This file is auto-generated"):
171
+ file_path.write_text(header + content)
172
+
173
+
174
+ def main():
175
+ """Main entry point."""
176
+ parser = argparse.ArgumentParser(
177
+ description="Compile protobuf files for SGLang gRPC server",
178
+ formatter_class=argparse.RawDescriptionHelpFormatter,
179
+ epilog=__doc__,
180
+ )
181
+
182
+ parser.add_argument(
183
+ "--check",
184
+ action="store_true",
185
+ help="Check if regeneration is needed (exit 1 if needed)",
186
+ )
187
+
188
+ parser.add_argument(
189
+ "--proto-file",
190
+ type=str,
191
+ default="sglang_scheduler.proto",
192
+ help="Proto file to compile (default: sglang_scheduler.proto)",
193
+ )
194
+
195
+ parser.add_argument(
196
+ "-v",
197
+ "--verbose",
198
+ action="store_true",
199
+ default=True,
200
+ help="Verbose output (default: True)",
201
+ )
202
+
203
+ parser.add_argument(
204
+ "-q", "--quiet", action="store_true", help="Quiet mode (overrides verbose)"
205
+ )
206
+
207
+ args = parser.parse_args()
208
+
209
+ # Handle verbosity
210
+ verbose = args.verbose and not args.quiet
211
+
212
+ # Get paths
213
+ script_dir = Path(__file__).parent
214
+ proto_file = script_dir / args.proto_file
215
+ output_dir = script_dir
216
+
217
+ # Check mode
218
+ if args.check:
219
+ if check_regeneration_needed(proto_file, output_dir):
220
+ if verbose:
221
+ print("Proto files need regeneration")
222
+ sys.exit(1)
223
+ else:
224
+ if verbose:
225
+ print("Generated files are up to date")
226
+ sys.exit(0)
227
+
228
+ # Compile mode
229
+ success = compile_proto(proto_file, output_dir, verbose)
230
+
231
+ if success:
232
+ # Add generation headers
233
+ add_generation_header(output_dir, proto_file.stem)
234
+
235
+ if verbose:
236
+ print("\n✅ Protobuf compilation successful!")
237
+ print("Generated files are ready for use")
238
+ else:
239
+ if verbose:
240
+ print("\n❌ Protobuf compilation failed!")
241
+ sys.exit(1)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()
@@ -1,3 +1,6 @@
1
+ # This file is auto-generated. Do not edit manually.
2
+ # Regenerate with: python compile_proto.py
3
+
1
4
  # -*- coding: utf-8 -*-
2
5
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
6
  # NO CHECKED-IN PROTOBUF GENCODE
@@ -26,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
26
29
  from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
27
30
 
28
31
 
29
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16sglang_scheduler.proto\x12\x15sglang.grpc.scheduler\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\"\xc7\x05\n\x0eSamplingParams\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_p\x18\x02 \x01(\x02\x12\r\n\x05top_k\x18\x03 \x01(\x05\x12\r\n\x05min_p\x18\x04 \x01(\x02\x12\x19\n\x11\x66requency_penalty\x18\x05 \x01(\x02\x12\x18\n\x10presence_penalty\x18\x06 \x01(\x02\x12\x1a\n\x12repetition_penalty\x18\x07 \x01(\x02\x12\x16\n\x0emax_new_tokens\x18\x08 \x01(\x05\x12\x0c\n\x04stop\x18\t \x03(\t\x12\x16\n\x0estop_token_ids\x18\n \x03(\x05\x12\x1b\n\x13skip_special_tokens\x18\x0b \x01(\x08\x12%\n\x1dspaces_between_special_tokens\x18\x0c \x01(\x08\x12\x0f\n\x05regex\x18\r \x01(\tH\x00\x12\x15\n\x0bjson_schema\x18\x0e \x01(\tH\x00\x12\x16\n\x0c\x65\x62nf_grammar\x18\x0f \x01(\tH\x00\x12\x11\n\tlora_path\x18\x10 \x01(\t\x12\t\n\x01n\x18\x11 \x01(\x05\x12\x15\n\rtoken_healing\x18\x12 \x01(\x08\x12\x16\n\x0emin_new_tokens\x18\x13 \x01(\x05\x12\x12\n\nignore_eos\x18\x14 \x01(\x08\x12\x14\n\x0cno_stop_trim\x18\x15 \x01(\x08\x12\x17\n\x0fstream_interval\x18\x16 \x01(\x05\x12H\n\nlogit_bias\x18\x17 \x03(\x0b\x32\x34.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry\x12\x16\n\x0estructural_tag\x18\x18 \x01(\t\x12.\n\rcustom_params\x18\x19 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x30\n\x0eLogitBiasEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x42\x0c\n\nconstraint\"]\n\x13\x44isaggregatedParams\x12\x16\n\x0e\x62ootstrap_host\x18\x01 \x01(\t\x12\x16\n\x0e\x62ootstrap_port\x18\x02 \x01(\x05\x12\x16\n\x0e\x62ootstrap_room\x18\x03 \x01(\x05\"\xe9\x04\n\x0fGenerateRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x04 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x16\n\x0ereturn_logprob\x18\x05 \x01(\x08\x12\x19\n\x11logprob_start_len\x18\x06 \x01(\x05\x12\x18\n\x10top_logprobs_num\x18\x07 \x01(\x05\x12\x19\n\x11token_ids_logprob\x18\x08 \x03(\x05\x12\x1c\n\x14return_hidden_states\x18\t \x01(\x08\x12H\n\x14\x64isaggregated_params\x18\n \x01(\x0b\x32*.sglang.grpc.scheduler.DisaggregatedParams\x12\x1e\n\x16\x63ustom_logit_processor\x18\x0b \x01(\t\x12-\n\ttimestamp\x18\x0c \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x13\n\x0blog_metrics\x18\r \x01(\x08\x12\x14\n\x0cinput_embeds\x18\x0e \x03(\x02\x12\x0f\n\x07lora_id\x18\x0f \x01(\t\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x10 \x01(\x05\x12\x15\n\rdp_balance_id\x18\x11 \x01(\x05\":\n\x0eTokenizedInput\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x11\n\tinput_ids\x18\x02 \x03(\x05\"\xd3\x01\n\x10MultimodalInputs\x12\x12\n\nimage_urls\x18\x01 \x03(\t\x12\x12\n\nvideo_urls\x18\x02 \x03(\t\x12\x12\n\naudio_urls\x18\x03 \x03(\t\x12\x33\n\x12processed_features\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x12\n\nimage_data\x18\x05 \x03(\x0c\x12\x12\n\nvideo_data\x18\x06 \x03(\x0c\x12\x12\n\naudio_data\x18\x07 \x03(\x0c\x12\x12\n\nmodalities\x18\x08 \x03(\t\"\xe3\x01\n\x10GenerateResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12;\n\x05\x63hunk\x18\x02 \x01(\x0b\x32*.sglang.grpc.scheduler.GenerateStreamChunkH\x00\x12;\n\x08\x63omplete\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.GenerateCompleteH\x00\x12\x35\n\x05\x65rror\x18\x04 \x01(\x0b\x32$.sglang.grpc.scheduler.GenerateErrorH\x00\x42\n\n\x08response\"\xf5\x01\n\x13GenerateStreamChunk\x12\x10\n\x08token_id\x18\x01 \x01(\x05\x12\x0c\n\x04text\x18\x02 \x01(\t\x12\x15\n\rprompt_tokens\x18\x03 \x01(\x05\x12\x19\n\x11\x63ompletion_tokens\x18\x04 \x01(\x05\x12\x15\n\rcached_tokens\x18\x05 \x01(\x05\x12\x31\n\x08logprobs\x18\x06 \x01(\x0b\x32\x1f.sglang.grpc.scheduler.LogProbs\x12\x15\n\rhidden_states\x18\x07 \x03(\x02\x12\x17\n\x0fgeneration_time\x18\x08 \x01(\x02\x12\x12\n\nqueue_time\x18\t \x01(\x05\"\xcd\x02\n\x10GenerateComplete\x12\x12\n\noutput_ids\x18\x01 \x03(\x05\x12\x13\n\x0boutput_text\x18\x02 \x01(\t\x12K\n\rfinish_reason\x18\x03 \x01(\x0e\x32\x34.sglang.grpc.scheduler.GenerateComplete.FinishReason\x12\x35\n\x0c\x61ll_logprobs\x18\x0b \x03(\x0b\x32\x1f.sglang.grpc.scheduler.LogProbs\x12>\n\x11\x61ll_hidden_states\x18\x0c \x03(\x0b\x32#.sglang.grpc.scheduler.HiddenStates\"L\n\x0c\x46inishReason\x12\x08\n\x04STOP\x10\x00\x12\n\n\x06LENGTH\x10\x01\x12\r\n\tEOS_TOKEN\x10\x02\x12\x0c\n\x08STOP_STR\x10\x03\x12\t\n\x05\x41\x42ORT\x10\x04\"K\n\rGenerateError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x18\n\x10http_status_code\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"\x84\x01\n\x08LogProbs\x12\x16\n\x0etoken_logprobs\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x38\n\x0ctop_logprobs\x18\x03 \x03(\x0b\x32\".sglang.grpc.scheduler.TopLogProbs\x12\x13\n\x0btoken_texts\x18\x04 \x03(\t\"E\n\x0bTopLogProbs\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x13\n\x0btoken_texts\x18\x03 \x03(\t\"?\n\x0cHiddenStates\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05layer\x18\x02 \x01(\x05\x12\x10\n\x08position\x18\x03 \x01(\x05\"\xca\x02\n\x0c\x45mbedRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x04 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x05 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x13\n\x0blog_metrics\x18\x06 \x01(\x08\x12\x16\n\x0etoken_type_ids\x18\x07 \x03(\x05\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x08 \x01(\x05\x12\x18\n\x10is_cross_encoder\x18\t \x01(\x08\x12\r\n\x05texts\x18\n \x03(\t\"\x9d\x01\n\rEmbedResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\x08\x63omplete\x18\x02 \x01(\x0b\x32$.sglang.grpc.scheduler.EmbedCompleteH\x00\x12\x32\n\x05\x65rror\x18\x03 \x01(\x0b\x32!.sglang.grpc.scheduler.EmbedErrorH\x00\x42\n\n\x08response\"\xbc\x01\n\rEmbedComplete\x12\x11\n\tembedding\x18\x01 \x03(\x02\x12\x15\n\rprompt_tokens\x18\x02 \x01(\x05\x12\x15\n\rcached_tokens\x18\x03 \x01(\x05\x12\x15\n\rembedding_dim\x18\x04 \x01(\x05\x12\x17\n\x0fgeneration_time\x18\x05 \x01(\x02\x12:\n\x10\x62\x61tch_embeddings\x18\x06 \x03(\x0b\x32 .sglang.grpc.scheduler.Embedding\"*\n\tEmbedding\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05index\x18\x02 \x01(\x05\"<\n\nEmbedError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"N\n\x12HealthCheckRequest\x12\x38\n\ttokenized\x18\x01 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\"7\n\x13HealthCheckResponse\x12\x0f\n\x07healthy\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"2\n\x0c\x41\x62ortRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06reason\x18\x02 \x01(\t\"1\n\rAbortResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"I\n\x0fLoadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\x12\x14\n\x0c\x61\x64\x61pter_path\x18\x02 \x01(\t\x12\x0c\n\x04rank\x18\x03 \x01(\x05\"H\n\x10LoadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x12\n\nadapter_id\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\"\'\n\x11UnloadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\"6\n\x12UnloadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"w\n\x14UpdateWeightsRequest\x12\x13\n\tdisk_path\x18\x01 \x01(\tH\x00\x12\x15\n\x0btensor_data\x18\x02 \x01(\x0cH\x00\x12\x14\n\nremote_url\x18\x03 \x01(\tH\x00\x12\x13\n\x0bweight_name\x18\x04 \x01(\tB\x08\n\x06source\"9\n\x15UpdateWeightsResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"-\n\x17GetInternalStateRequest\x12\x12\n\nstate_keys\x18\x01 \x03(\t\"B\n\x18GetInternalStateResponse\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"A\n\x17SetInternalStateRequest\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"<\n\x18SetInternalStateResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xfe\x02\n\x0fSglangScheduler\x12]\n\x08Generate\x12&.sglang.grpc.scheduler.GenerateRequest\x1a\'.sglang.grpc.scheduler.GenerateResponse0\x01\x12R\n\x05\x45mbed\x12#.sglang.grpc.scheduler.EmbedRequest\x1a$.sglang.grpc.scheduler.EmbedResponse\x12\x64\n\x0bHealthCheck\x12).sglang.grpc.scheduler.HealthCheckRequest\x1a*.sglang.grpc.scheduler.HealthCheckResponse\x12R\n\x05\x41\x62ort\x12#.sglang.grpc.scheduler.AbortRequest\x1a$.sglang.grpc.scheduler.AbortResponseb\x06proto3')
32
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16sglang_scheduler.proto\x12\x15sglang.grpc.scheduler\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\"\xe1\x05\n\x0eSamplingParams\x12\x13\n\x0btemperature\x18\x01 \x01(\x02\x12\r\n\x05top_p\x18\x02 \x01(\x02\x12\r\n\x05top_k\x18\x03 \x01(\x05\x12\r\n\x05min_p\x18\x04 \x01(\x02\x12\x19\n\x11\x66requency_penalty\x18\x05 \x01(\x02\x12\x18\n\x10presence_penalty\x18\x06 \x01(\x02\x12\x1a\n\x12repetition_penalty\x18\x07 \x01(\x02\x12\x1b\n\x0emax_new_tokens\x18\x08 \x01(\x05H\x01\x88\x01\x01\x12\x0c\n\x04stop\x18\t \x03(\t\x12\x16\n\x0estop_token_ids\x18\n \x03(\r\x12\x1b\n\x13skip_special_tokens\x18\x0b \x01(\x08\x12%\n\x1dspaces_between_special_tokens\x18\x0c \x01(\x08\x12\x0f\n\x05regex\x18\r \x01(\tH\x00\x12\x15\n\x0bjson_schema\x18\x0e \x01(\tH\x00\x12\x16\n\x0c\x65\x62nf_grammar\x18\x0f \x01(\tH\x00\x12\x18\n\x0estructural_tag\x18\x10 \x01(\tH\x00\x12\x11\n\tlora_path\x18\x11 \x01(\t\x12\t\n\x01n\x18\x12 \x01(\x05\x12\x15\n\rtoken_healing\x18\x13 \x01(\x08\x12\x16\n\x0emin_new_tokens\x18\x14 \x01(\x05\x12\x12\n\nignore_eos\x18\x15 \x01(\x08\x12\x14\n\x0cno_stop_trim\x18\x16 \x01(\x08\x12\x17\n\x0fstream_interval\x18\x17 \x01(\x05\x12H\n\nlogit_bias\x18\x18 \x03(\x0b\x32\x34.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry\x12.\n\rcustom_params\x18\x19 \x01(\x0b\x32\x17.google.protobuf.Struct\x1a\x30\n\x0eLogitBiasEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x42\x0c\n\nconstraintB\x11\n\x0f_max_new_tokens\"]\n\x13\x44isaggregatedParams\x12\x16\n\x0e\x62ootstrap_host\x18\x01 \x01(\t\x12\x16\n\x0e\x62ootstrap_port\x18\x02 \x01(\x05\x12\x16\n\x0e\x62ootstrap_room\x18\x03 \x01(\x05\"\xe2\x04\n\x0fGenerateRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x04 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x16\n\x0ereturn_logprob\x18\x05 \x01(\x08\x12\x19\n\x11logprob_start_len\x18\x06 \x01(\x05\x12\x18\n\x10top_logprobs_num\x18\x07 \x01(\x05\x12\x19\n\x11token_ids_logprob\x18\x08 \x03(\r\x12\x1c\n\x14return_hidden_states\x18\t \x01(\x08\x12H\n\x14\x64isaggregated_params\x18\n \x01(\x0b\x32*.sglang.grpc.scheduler.DisaggregatedParams\x12\x1e\n\x16\x63ustom_logit_processor\x18\x0b \x01(\t\x12-\n\ttimestamp\x18\x0c \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x13\n\x0blog_metrics\x18\r \x01(\x08\x12\x14\n\x0cinput_embeds\x18\x0e \x03(\x02\x12\x0f\n\x07lora_id\x18\x0f \x01(\t\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x10 \x01(\x05\x12\x0e\n\x06stream\x18\x11 \x01(\x08\":\n\x0eTokenizedInput\x12\x15\n\roriginal_text\x18\x01 \x01(\t\x12\x11\n\tinput_ids\x18\x02 \x03(\r\"\xd3\x01\n\x10MultimodalInputs\x12\x12\n\nimage_urls\x18\x01 \x03(\t\x12\x12\n\nvideo_urls\x18\x02 \x03(\t\x12\x12\n\naudio_urls\x18\x03 \x03(\t\x12\x33\n\x12processed_features\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x12\n\nimage_data\x18\x05 \x03(\x0c\x12\x12\n\nvideo_data\x18\x06 \x03(\x0c\x12\x12\n\naudio_data\x18\x07 \x03(\x0c\x12\x12\n\nmodalities\x18\x08 \x03(\t\"\xe3\x01\n\x10GenerateResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12;\n\x05\x63hunk\x18\x02 \x01(\x0b\x32*.sglang.grpc.scheduler.GenerateStreamChunkH\x00\x12;\n\x08\x63omplete\x18\x03 \x01(\x0b\x32\'.sglang.grpc.scheduler.GenerateCompleteH\x00\x12\x35\n\x05\x65rror\x18\x04 \x01(\x0b\x32$.sglang.grpc.scheduler.GenerateErrorH\x00\x42\n\n\x08response\"\x95\x02\n\x13GenerateStreamChunk\x12\x11\n\ttoken_ids\x18\x01 \x03(\r\x12\x15\n\rprompt_tokens\x18\x02 \x01(\x05\x12\x19\n\x11\x63ompletion_tokens\x18\x03 \x01(\x05\x12\x15\n\rcached_tokens\x18\x04 \x01(\x05\x12>\n\x0foutput_logprobs\x18\x05 \x01(\x0b\x32%.sglang.grpc.scheduler.OutputLogProbs\x12\x15\n\rhidden_states\x18\x06 \x03(\x02\x12<\n\x0einput_logprobs\x18\x07 \x01(\x0b\x32$.sglang.grpc.scheduler.InputLogProbs\x12\r\n\x05index\x18\x08 \x01(\r\"\x9b\x03\n\x10GenerateComplete\x12\x12\n\noutput_ids\x18\x01 \x03(\r\x12\x15\n\rfinish_reason\x18\x02 \x01(\t\x12\x15\n\rprompt_tokens\x18\x03 \x01(\x05\x12\x19\n\x11\x63ompletion_tokens\x18\x04 \x01(\x05\x12\x15\n\rcached_tokens\x18\x05 \x01(\x05\x12>\n\x0foutput_logprobs\x18\x06 \x01(\x0b\x32%.sglang.grpc.scheduler.OutputLogProbs\x12>\n\x11\x61ll_hidden_states\x18\x07 \x03(\x0b\x32#.sglang.grpc.scheduler.HiddenStates\x12\x1a\n\x10matched_token_id\x18\x08 \x01(\rH\x00\x12\x1a\n\x10matched_stop_str\x18\t \x01(\tH\x00\x12<\n\x0einput_logprobs\x18\n \x01(\x0b\x32$.sglang.grpc.scheduler.InputLogProbs\x12\r\n\x05index\x18\x0b \x01(\rB\x0e\n\x0cmatched_stop\"K\n\rGenerateError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x18\n\x10http_status_code\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"u\n\x0eOutputLogProbs\x12\x16\n\x0etoken_logprobs\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x38\n\x0ctop_logprobs\x18\x03 \x03(\x0b\x32\".sglang.grpc.scheduler.TopLogProbs\"\x9e\x01\n\rInputLogProbs\x12@\n\x0etoken_logprobs\x18\x01 \x03(\x0b\x32(.sglang.grpc.scheduler.InputTokenLogProb\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\x12\x38\n\x0ctop_logprobs\x18\x03 \x03(\x0b\x32\".sglang.grpc.scheduler.TopLogProbs\"1\n\x11InputTokenLogProb\x12\x12\n\x05value\x18\x01 \x01(\x02H\x00\x88\x01\x01\x42\x08\n\x06_value\"0\n\x0bTopLogProbs\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\x11\n\ttoken_ids\x18\x02 \x03(\x05\"?\n\x0cHiddenStates\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05layer\x18\x02 \x01(\x05\x12\x10\n\x08position\x18\x03 \x01(\x05\"\xca\x02\n\x0c\x45mbedRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\ttokenized\x18\x02 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\x12:\n\tmm_inputs\x18\x04 \x01(\x0b\x32\'.sglang.grpc.scheduler.MultimodalInputs\x12>\n\x0fsampling_params\x18\x05 \x01(\x0b\x32%.sglang.grpc.scheduler.SamplingParams\x12\x13\n\x0blog_metrics\x18\x06 \x01(\x08\x12\x16\n\x0etoken_type_ids\x18\x07 \x03(\x05\x12\x1a\n\x12\x64\x61ta_parallel_rank\x18\x08 \x01(\x05\x12\x18\n\x10is_cross_encoder\x18\t \x01(\x08\x12\r\n\x05texts\x18\n \x03(\t\"\x9d\x01\n\rEmbedResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x38\n\x08\x63omplete\x18\x02 \x01(\x0b\x32$.sglang.grpc.scheduler.EmbedCompleteH\x00\x12\x32\n\x05\x65rror\x18\x03 \x01(\x0b\x32!.sglang.grpc.scheduler.EmbedErrorH\x00\x42\n\n\x08response\"\xa3\x01\n\rEmbedComplete\x12\x11\n\tembedding\x18\x01 \x03(\x02\x12\x15\n\rprompt_tokens\x18\x02 \x01(\x05\x12\x15\n\rcached_tokens\x18\x03 \x01(\x05\x12\x15\n\rembedding_dim\x18\x04 \x01(\x05\x12:\n\x10\x62\x61tch_embeddings\x18\x05 \x03(\x0b\x32 .sglang.grpc.scheduler.Embedding\"*\n\tEmbedding\x12\x0e\n\x06values\x18\x01 \x03(\x02\x12\r\n\x05index\x18\x02 \x01(\x05\"<\n\nEmbedError\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\t\x12\x0f\n\x07\x64\x65tails\x18\x03 \x01(\t\"N\n\x12HealthCheckRequest\x12\x38\n\ttokenized\x18\x01 \x01(\x0b\x32%.sglang.grpc.scheduler.TokenizedInput\"7\n\x13HealthCheckResponse\x12\x0f\n\x07healthy\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"2\n\x0c\x41\x62ortRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06reason\x18\x02 \x01(\t\"1\n\rAbortResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"I\n\x0fLoadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\x12\x14\n\x0c\x61\x64\x61pter_path\x18\x02 \x01(\t\x12\x0c\n\x04rank\x18\x03 \x01(\x05\"H\n\x10LoadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x12\n\nadapter_id\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\"\'\n\x11UnloadLoRARequest\x12\x12\n\nadapter_id\x18\x01 \x01(\t\"6\n\x12UnloadLoRAResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"w\n\x14UpdateWeightsRequest\x12\x13\n\tdisk_path\x18\x01 \x01(\tH\x00\x12\x15\n\x0btensor_data\x18\x02 \x01(\x0cH\x00\x12\x14\n\nremote_url\x18\x03 \x01(\tH\x00\x12\x13\n\x0bweight_name\x18\x04 \x01(\tB\x08\n\x06source\"9\n\x15UpdateWeightsResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"-\n\x17GetInternalStateRequest\x12\x12\n\nstate_keys\x18\x01 \x03(\t\"B\n\x18GetInternalStateResponse\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"A\n\x17SetInternalStateRequest\x12&\n\x05state\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\"<\n\x18SetInternalStateResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xfe\x02\n\x0fSglangScheduler\x12]\n\x08Generate\x12&.sglang.grpc.scheduler.GenerateRequest\x1a\'.sglang.grpc.scheduler.GenerateResponse0\x01\x12R\n\x05\x45mbed\x12#.sglang.grpc.scheduler.EmbedRequest\x1a$.sglang.grpc.scheduler.EmbedResponse\x12\x64\n\x0bHealthCheck\x12).sglang.grpc.scheduler.HealthCheckRequest\x1a*.sglang.grpc.scheduler.HealthCheckResponse\x12R\n\x05\x41\x62ort\x12#.sglang.grpc.scheduler.AbortRequest\x1a$.sglang.grpc.scheduler.AbortResponseb\x06proto3')
30
33
 
31
34
  _globals = globals()
32
35
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -36,71 +39,73 @@ if not _descriptor._USE_C_DESCRIPTORS:
36
39
  _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._loaded_options = None
37
40
  _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_options = b'8\001'
38
41
  _globals['_SAMPLINGPARAMS']._serialized_start=113
39
- _globals['_SAMPLINGPARAMS']._serialized_end=824
40
- _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_start=762
41
- _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_end=810
42
- _globals['_DISAGGREGATEDPARAMS']._serialized_start=826
43
- _globals['_DISAGGREGATEDPARAMS']._serialized_end=919
44
- _globals['_GENERATEREQUEST']._serialized_start=922
45
- _globals['_GENERATEREQUEST']._serialized_end=1539
46
- _globals['_TOKENIZEDINPUT']._serialized_start=1541
47
- _globals['_TOKENIZEDINPUT']._serialized_end=1599
48
- _globals['_MULTIMODALINPUTS']._serialized_start=1602
49
- _globals['_MULTIMODALINPUTS']._serialized_end=1813
50
- _globals['_GENERATERESPONSE']._serialized_start=1816
51
- _globals['_GENERATERESPONSE']._serialized_end=2043
52
- _globals['_GENERATESTREAMCHUNK']._serialized_start=2046
53
- _globals['_GENERATESTREAMCHUNK']._serialized_end=2291
54
- _globals['_GENERATECOMPLETE']._serialized_start=2294
55
- _globals['_GENERATECOMPLETE']._serialized_end=2627
56
- _globals['_GENERATECOMPLETE_FINISHREASON']._serialized_start=2551
57
- _globals['_GENERATECOMPLETE_FINISHREASON']._serialized_end=2627
58
- _globals['_GENERATEERROR']._serialized_start=2629
59
- _globals['_GENERATEERROR']._serialized_end=2704
60
- _globals['_LOGPROBS']._serialized_start=2707
61
- _globals['_LOGPROBS']._serialized_end=2839
62
- _globals['_TOPLOGPROBS']._serialized_start=2841
63
- _globals['_TOPLOGPROBS']._serialized_end=2910
64
- _globals['_HIDDENSTATES']._serialized_start=2912
65
- _globals['_HIDDENSTATES']._serialized_end=2975
66
- _globals['_EMBEDREQUEST']._serialized_start=2978
67
- _globals['_EMBEDREQUEST']._serialized_end=3308
68
- _globals['_EMBEDRESPONSE']._serialized_start=3311
69
- _globals['_EMBEDRESPONSE']._serialized_end=3468
70
- _globals['_EMBEDCOMPLETE']._serialized_start=3471
71
- _globals['_EMBEDCOMPLETE']._serialized_end=3659
72
- _globals['_EMBEDDING']._serialized_start=3661
73
- _globals['_EMBEDDING']._serialized_end=3703
74
- _globals['_EMBEDERROR']._serialized_start=3705
75
- _globals['_EMBEDERROR']._serialized_end=3765
76
- _globals['_HEALTHCHECKREQUEST']._serialized_start=3767
77
- _globals['_HEALTHCHECKREQUEST']._serialized_end=3845
78
- _globals['_HEALTHCHECKRESPONSE']._serialized_start=3847
79
- _globals['_HEALTHCHECKRESPONSE']._serialized_end=3902
80
- _globals['_ABORTREQUEST']._serialized_start=3904
81
- _globals['_ABORTREQUEST']._serialized_end=3954
82
- _globals['_ABORTRESPONSE']._serialized_start=3956
83
- _globals['_ABORTRESPONSE']._serialized_end=4005
84
- _globals['_LOADLORAREQUEST']._serialized_start=4007
85
- _globals['_LOADLORAREQUEST']._serialized_end=4080
86
- _globals['_LOADLORARESPONSE']._serialized_start=4082
87
- _globals['_LOADLORARESPONSE']._serialized_end=4154
88
- _globals['_UNLOADLORAREQUEST']._serialized_start=4156
89
- _globals['_UNLOADLORAREQUEST']._serialized_end=4195
90
- _globals['_UNLOADLORARESPONSE']._serialized_start=4197
91
- _globals['_UNLOADLORARESPONSE']._serialized_end=4251
92
- _globals['_UPDATEWEIGHTSREQUEST']._serialized_start=4253
93
- _globals['_UPDATEWEIGHTSREQUEST']._serialized_end=4372
94
- _globals['_UPDATEWEIGHTSRESPONSE']._serialized_start=4374
95
- _globals['_UPDATEWEIGHTSRESPONSE']._serialized_end=4431
96
- _globals['_GETINTERNALSTATEREQUEST']._serialized_start=4433
97
- _globals['_GETINTERNALSTATEREQUEST']._serialized_end=4478
98
- _globals['_GETINTERNALSTATERESPONSE']._serialized_start=4480
99
- _globals['_GETINTERNALSTATERESPONSE']._serialized_end=4546
100
- _globals['_SETINTERNALSTATEREQUEST']._serialized_start=4548
101
- _globals['_SETINTERNALSTATEREQUEST']._serialized_end=4613
102
- _globals['_SETINTERNALSTATERESPONSE']._serialized_start=4615
103
- _globals['_SETINTERNALSTATERESPONSE']._serialized_end=4675
104
- _globals['_SGLANGSCHEDULER']._serialized_start=4678
105
- _globals['_SGLANGSCHEDULER']._serialized_end=5060
42
+ _globals['_SAMPLINGPARAMS']._serialized_end=850
43
+ _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_start=769
44
+ _globals['_SAMPLINGPARAMS_LOGITBIASENTRY']._serialized_end=817
45
+ _globals['_DISAGGREGATEDPARAMS']._serialized_start=852
46
+ _globals['_DISAGGREGATEDPARAMS']._serialized_end=945
47
+ _globals['_GENERATEREQUEST']._serialized_start=948
48
+ _globals['_GENERATEREQUEST']._serialized_end=1558
49
+ _globals['_TOKENIZEDINPUT']._serialized_start=1560
50
+ _globals['_TOKENIZEDINPUT']._serialized_end=1618
51
+ _globals['_MULTIMODALINPUTS']._serialized_start=1621
52
+ _globals['_MULTIMODALINPUTS']._serialized_end=1832
53
+ _globals['_GENERATERESPONSE']._serialized_start=1835
54
+ _globals['_GENERATERESPONSE']._serialized_end=2062
55
+ _globals['_GENERATESTREAMCHUNK']._serialized_start=2065
56
+ _globals['_GENERATESTREAMCHUNK']._serialized_end=2342
57
+ _globals['_GENERATECOMPLETE']._serialized_start=2345
58
+ _globals['_GENERATECOMPLETE']._serialized_end=2756
59
+ _globals['_GENERATEERROR']._serialized_start=2758
60
+ _globals['_GENERATEERROR']._serialized_end=2833
61
+ _globals['_OUTPUTLOGPROBS']._serialized_start=2835
62
+ _globals['_OUTPUTLOGPROBS']._serialized_end=2952
63
+ _globals['_INPUTLOGPROBS']._serialized_start=2955
64
+ _globals['_INPUTLOGPROBS']._serialized_end=3113
65
+ _globals['_INPUTTOKENLOGPROB']._serialized_start=3115
66
+ _globals['_INPUTTOKENLOGPROB']._serialized_end=3164
67
+ _globals['_TOPLOGPROBS']._serialized_start=3166
68
+ _globals['_TOPLOGPROBS']._serialized_end=3214
69
+ _globals['_HIDDENSTATES']._serialized_start=3216
70
+ _globals['_HIDDENSTATES']._serialized_end=3279
71
+ _globals['_EMBEDREQUEST']._serialized_start=3282
72
+ _globals['_EMBEDREQUEST']._serialized_end=3612
73
+ _globals['_EMBEDRESPONSE']._serialized_start=3615
74
+ _globals['_EMBEDRESPONSE']._serialized_end=3772
75
+ _globals['_EMBEDCOMPLETE']._serialized_start=3775
76
+ _globals['_EMBEDCOMPLETE']._serialized_end=3938
77
+ _globals['_EMBEDDING']._serialized_start=3940
78
+ _globals['_EMBEDDING']._serialized_end=3982
79
+ _globals['_EMBEDERROR']._serialized_start=3984
80
+ _globals['_EMBEDERROR']._serialized_end=4044
81
+ _globals['_HEALTHCHECKREQUEST']._serialized_start=4046
82
+ _globals['_HEALTHCHECKREQUEST']._serialized_end=4124
83
+ _globals['_HEALTHCHECKRESPONSE']._serialized_start=4126
84
+ _globals['_HEALTHCHECKRESPONSE']._serialized_end=4181
85
+ _globals['_ABORTREQUEST']._serialized_start=4183
86
+ _globals['_ABORTREQUEST']._serialized_end=4233
87
+ _globals['_ABORTRESPONSE']._serialized_start=4235
88
+ _globals['_ABORTRESPONSE']._serialized_end=4284
89
+ _globals['_LOADLORAREQUEST']._serialized_start=4286
90
+ _globals['_LOADLORAREQUEST']._serialized_end=4359
91
+ _globals['_LOADLORARESPONSE']._serialized_start=4361
92
+ _globals['_LOADLORARESPONSE']._serialized_end=4433
93
+ _globals['_UNLOADLORAREQUEST']._serialized_start=4435
94
+ _globals['_UNLOADLORAREQUEST']._serialized_end=4474
95
+ _globals['_UNLOADLORARESPONSE']._serialized_start=4476
96
+ _globals['_UNLOADLORARESPONSE']._serialized_end=4530
97
+ _globals['_UPDATEWEIGHTSREQUEST']._serialized_start=4532
98
+ _globals['_UPDATEWEIGHTSREQUEST']._serialized_end=4651
99
+ _globals['_UPDATEWEIGHTSRESPONSE']._serialized_start=4653
100
+ _globals['_UPDATEWEIGHTSRESPONSE']._serialized_end=4710
101
+ _globals['_GETINTERNALSTATEREQUEST']._serialized_start=4712
102
+ _globals['_GETINTERNALSTATEREQUEST']._serialized_end=4757
103
+ _globals['_GETINTERNALSTATERESPONSE']._serialized_start=4759
104
+ _globals['_GETINTERNALSTATERESPONSE']._serialized_end=4825
105
+ _globals['_SETINTERNALSTATEREQUEST']._serialized_start=4827
106
+ _globals['_SETINTERNALSTATEREQUEST']._serialized_end=4892
107
+ _globals['_SETINTERNALSTATERESPONSE']._serialized_start=4894
108
+ _globals['_SETINTERNALSTATERESPONSE']._serialized_end=4954
109
+ _globals['_SGLANGSCHEDULER']._serialized_start=4957
110
+ _globals['_SGLANGSCHEDULER']._serialized_end=5339
106
111
  # @@protoc_insertion_point(module_scope)