sglang 0.4.6.post4__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_offline_throughput.py +16 -10
- sglang/bench_one_batch.py +5 -4
- sglang/bench_one_batch_server.py +86 -22
- sglang/bench_serving.py +197 -110
- sglang/compile_deep_gemm.py +4 -4
- sglang/lang/backend/runtime_endpoint.py +24 -1
- sglang/profiler.py +167 -0
- sglang/srt/_custom_ops.py +34 -0
- sglang/srt/configs/internvl.py +8 -12
- sglang/srt/configs/model_config.py +66 -29
- sglang/srt/constrained/base_grammar_backend.py +5 -2
- sglang/srt/constrained/llguidance_backend.py +9 -8
- sglang/srt/constrained/outlines_backend.py +5 -4
- sglang/srt/constrained/xgrammar_backend.py +18 -18
- sglang/srt/conversation.py +47 -9
- sglang/srt/custom_op.py +38 -3
- sglang/srt/debug_utils.py +74 -0
- sglang/srt/disaggregation/common/__init__.py +1 -0
- sglang/srt/disaggregation/common/conn.py +407 -0
- sglang/srt/disaggregation/decode.py +187 -134
- sglang/srt/disaggregation/decode_schedule_batch_mixin.py +142 -0
- sglang/srt/disaggregation/fake/conn.py +4 -13
- sglang/srt/disaggregation/kv_events.py +412 -0
- sglang/srt/disaggregation/launch_lb.py +140 -0
- sglang/srt/disaggregation/mini_lb.py +84 -70
- sglang/srt/disaggregation/mooncake/conn.py +441 -140
- sglang/srt/disaggregation/mooncake/transfer_engine.py +31 -14
- sglang/srt/disaggregation/nixl/conn.py +124 -442
- sglang/srt/disaggregation/prefill.py +128 -44
- sglang/srt/disaggregation/utils.py +154 -6
- sglang/srt/distributed/device_communicators/pymscclpp.py +315 -0
- sglang/srt/distributed/parallel_state.py +52 -5
- sglang/srt/distributed/utils.py +3 -3
- sglang/srt/entrypoints/EngineBase.py +11 -0
- sglang/srt/entrypoints/engine.py +129 -12
- sglang/srt/entrypoints/http_server.py +21 -6
- sglang/srt/entrypoints/http_server_engine.py +5 -2
- sglang/srt/function_call/base_format_detector.py +302 -0
- sglang/srt/function_call/core_types.py +34 -0
- sglang/srt/function_call/deepseekv3_detector.py +205 -0
- sglang/srt/function_call/ebnf_composer.py +248 -0
- sglang/srt/function_call/function_call_parser.py +202 -0
- sglang/srt/function_call/llama32_detector.py +93 -0
- sglang/srt/function_call/mistral_detector.py +131 -0
- sglang/srt/function_call/pythonic_detector.py +229 -0
- sglang/srt/function_call/qwen25_detector.py +121 -0
- sglang/srt/function_call/utils.py +52 -0
- sglang/srt/hf_transformers_utils.py +50 -7
- sglang/srt/layers/attention/aiter_backend.py +878 -0
- sglang/srt/layers/attention/base_attn_backend.py +4 -0
- sglang/srt/layers/attention/cutlass_mla_backend.py +2 -19
- sglang/srt/layers/attention/flashattention_backend.py +166 -35
- sglang/srt/layers/attention/flashinfer_backend.py +45 -1
- sglang/srt/layers/attention/flashinfer_mla_backend.py +45 -5
- sglang/srt/layers/attention/flashmla_backend.py +340 -78
- sglang/srt/layers/attention/intel_amx_backend.py +128 -0
- sglang/srt/layers/attention/tbo_backend.py +232 -0
- sglang/srt/layers/attention/torch_native_backend.py +3 -0
- sglang/srt/layers/attention/triton_backend.py +247 -5
- sglang/srt/layers/attention/triton_ops/extend_attention.py +12 -4
- sglang/srt/layers/attention/utils.py +2 -2
- sglang/srt/layers/attention/vision.py +1 -1
- sglang/srt/layers/communicator.py +517 -0
- sglang/srt/layers/dp_attention.py +6 -15
- sglang/srt/layers/layernorm.py +30 -19
- sglang/srt/layers/moe/cutlass_moe.py +370 -0
- sglang/srt/layers/moe/cutlass_moe_params.py +169 -0
- sglang/srt/layers/moe/ep_moe/kernels.py +60 -17
- sglang/srt/layers/moe/ep_moe/layer.py +195 -87
- sglang/srt/layers/moe/ep_moe/token_dispatcher.py +88 -8
- sglang/srt/layers/moe/fused_moe_native.py +4 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +220 -25
- sglang/srt/layers/moe/fused_moe_triton/layer.py +48 -4
- sglang/srt/layers/moe/topk.py +107 -24
- sglang/srt/layers/multimodal.py +70 -0
- sglang/srt/layers/quantization/__init__.py +10 -4
- sglang/srt/layers/quantization/blockwise_int8.py +3 -0
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +5 -0
- sglang/srt/layers/quantization/deep_gemm.py +60 -59
- sglang/srt/layers/quantization/fp8.py +113 -18
- sglang/srt/layers/quantization/fp8_kernel.py +118 -66
- sglang/srt/layers/quantization/fp8_utils.py +165 -43
- sglang/srt/layers/quantization/gptq.py +298 -6
- sglang/srt/layers/quantization/int8_kernel.py +18 -5
- sglang/srt/layers/quantization/modelopt_quant.py +334 -7
- sglang/srt/layers/quantization/moe_wna16.py +3 -0
- sglang/srt/layers/quantization/qoq.py +244 -0
- sglang/srt/layers/quantization/w8a8_fp8.py +3 -0
- sglang/srt/layers/quantization/w8a8_int8.py +3 -0
- sglang/srt/layers/rotary_embedding.py +6 -12
- sglang/srt/layers/sampler.py +80 -79
- sglang/srt/layers/utils.py +6 -0
- sglang/srt/lora/layers.py +12 -15
- sglang/srt/lora/lora.py +49 -5
- sglang/srt/lora/lora_manager.py +20 -8
- sglang/srt/lora/mem_pool.py +24 -16
- sglang/srt/lora/utils.py +17 -13
- sglang/srt/managers/data_parallel_controller.py +13 -5
- sglang/srt/managers/eplb_algorithms/__init__.py +63 -0
- sglang/srt/managers/eplb_algorithms/deepseek.py +223 -0
- sglang/srt/managers/eplb_algorithms/deepseek_vec.py +276 -0
- sglang/srt/managers/eplb_manager.py +96 -0
- sglang/srt/managers/expert_distribution.py +878 -56
- sglang/srt/managers/expert_location.py +448 -0
- sglang/srt/managers/expert_location_dispatch.py +108 -0
- sglang/srt/managers/io_struct.py +29 -5
- sglang/srt/managers/mm_utils.py +355 -151
- sglang/srt/managers/multimodal_processors/base_processor.py +299 -42
- sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +6 -1
- sglang/srt/managers/multimodal_processors/gemma3.py +15 -17
- sglang/srt/managers/multimodal_processors/internvl.py +18 -5
- sglang/srt/managers/multimodal_processors/janus_pro.py +7 -1
- sglang/srt/managers/multimodal_processors/kimi_vl.py +14 -32
- sglang/srt/managers/multimodal_processors/llava.py +3 -3
- sglang/srt/managers/multimodal_processors/minicpm.py +27 -32
- sglang/srt/managers/multimodal_processors/mllama4.py +6 -0
- sglang/srt/managers/multimodal_processors/phi4mm.py +87 -0
- sglang/srt/managers/multimodal_processors/pixtral.py +9 -9
- sglang/srt/managers/multimodal_processors/qwen_vl.py +35 -35
- sglang/srt/managers/schedule_batch.py +185 -55
- sglang/srt/managers/schedule_policy.py +4 -5
- sglang/srt/managers/scheduler.py +389 -154
- sglang/srt/managers/session_controller.py +1 -1
- sglang/srt/managers/tokenizer_manager.py +231 -39
- sglang/srt/managers/utils.py +0 -4
- sglang/srt/mem_cache/base_prefix_cache.py +3 -0
- sglang/srt/mem_cache/chunk_cache.py +3 -1
- sglang/srt/mem_cache/hiradix_cache.py +4 -4
- sglang/srt/mem_cache/memory_pool.py +74 -52
- sglang/srt/mem_cache/multimodal_cache.py +45 -0
- sglang/srt/mem_cache/radix_cache.py +58 -5
- sglang/srt/metrics/collector.py +11 -2
- sglang/srt/mm_utils.py +10 -0
- sglang/srt/model_executor/cuda_graph_runner.py +87 -65
- sglang/srt/model_executor/expert_location_updater.py +557 -0
- sglang/srt/model_executor/forward_batch_info.py +39 -14
- sglang/srt/model_executor/model_runner.py +231 -101
- sglang/srt/model_loader/loader.py +10 -6
- sglang/srt/model_loader/utils.py +67 -1
- sglang/srt/models/clip.py +5 -1
- sglang/srt/models/deepseek_nextn.py +1 -1
- sglang/srt/models/deepseek_v2.py +732 -403
- sglang/srt/models/exaone.py +8 -3
- sglang/srt/models/gemma3_causal.py +7 -0
- sglang/srt/models/gemma3_mm.py +75 -33
- sglang/srt/models/idefics2.py +342 -0
- sglang/srt/models/kimi_vl.py +4 -4
- sglang/srt/models/llama.py +1 -1
- sglang/srt/models/llama4.py +10 -2
- sglang/srt/models/llava.py +26 -18
- sglang/srt/models/mimo_mtp.py +220 -0
- sglang/srt/models/minicpmo.py +7 -17
- sglang/srt/models/minicpmv.py +3 -295
- sglang/srt/models/mistral.py +71 -1
- sglang/srt/models/mllama.py +3 -3
- sglang/srt/models/phi4mm.py +512 -0
- sglang/srt/models/qwen2.py +133 -35
- sglang/srt/models/qwen2_5_vl.py +5 -3
- sglang/srt/models/qwen2_eagle.py +4 -1
- sglang/srt/models/qwen2_moe.py +206 -69
- sglang/srt/models/qwen2_vl.py +3 -3
- sglang/srt/models/qwen3.py +92 -19
- sglang/srt/models/qwen3_moe.py +457 -55
- sglang/srt/models/registry.py +9 -1
- sglang/srt/models/siglip.py +294 -0
- sglang/srt/models/transformers.py +291 -0
- sglang/srt/openai_api/adapter.py +114 -40
- sglang/srt/openai_api/protocol.py +37 -2
- sglang/srt/openai_api/utils.py +172 -0
- sglang/srt/operations.py +189 -0
- sglang/srt/operations_strategy.py +207 -0
- sglang/srt/sampling/sampling_batch_info.py +13 -1
- sglang/srt/sampling/sampling_params.py +2 -1
- sglang/srt/server_args.py +235 -38
- sglang/srt/speculative/build_eagle_tree.py +8 -8
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +8 -11
- sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +253 -0
- sglang/srt/speculative/eagle_utils.py +181 -90
- sglang/srt/speculative/eagle_worker.py +146 -21
- sglang/srt/two_batch_overlap.py +635 -0
- sglang/srt/utils.py +197 -19
- sglang/test/runners.py +16 -7
- sglang/test/send_one.py +4 -0
- sglang/test/test_cutlass_moe.py +278 -0
- sglang/test/test_fp4_moe.py +248 -0
- sglang/test/test_utils.py +81 -42
- sglang/utils.py +2 -2
- sglang/version.py +1 -1
- {sglang-0.4.6.post4.dist-info → sglang-0.4.7.dist-info}/METADATA +31 -19
- sglang-0.4.7.dist-info/RECORD +699 -0
- {sglang-0.4.6.post4.dist-info → sglang-0.4.7.dist-info}/WHEEL +1 -1
- sglang/srt/function_call_parser.py +0 -858
- sglang/srt/platforms/interface.py +0 -371
- sglang-0.4.6.post4.dist-info/RECORD +0 -646
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1024,device_name=NVIDIA_H200.json → triton_3_1_0/E=16,N=1024,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json → triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json → triton_3_1_0/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_1_0/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_1_0/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json → triton_3_1_0/E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json → triton_3_1_0/E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=1280,device_name=NVIDIA_H200.json → triton_3_1_0/E=64,N=1280,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=2560,device_name=NVIDIA_H200.json → triton_3_1_0/E=64,N=2560,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=320,device_name=NVIDIA_H200.json → triton_3_1_0/E=64,N=320,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_1_0/E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=64,N=640,device_name=NVIDIA_H200.json → triton_3_1_0/E=64,N=640,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=AMD_Instinct_MI300X.json → triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI300X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=AMD_Instinct_MI325X.json → triton_3_1_0/E=8,N=14336,device_name=AMD_Instinct_MI325X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=AMD_Radeon_Graphics.json → triton_3_1_0/E=8,N=14336,device_name=AMD_Radeon_Graphics.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=14336,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=14336,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=AMD_Instinct_MI300X.json → triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI300X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=AMD_Instinct_MI325X.json → triton_3_1_0/E=8,N=1792,device_name=AMD_Instinct_MI325X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=AMD_Radeon_Graphics.json → triton_3_1_0/E=8,N=1792,device_name=AMD_Radeon_Graphics.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json → triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=1792,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=1792,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=2048,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=2048,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=AMD_Instinct_MI300X.json → triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI300X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=AMD_Instinct_MI325X.json → triton_3_1_0/E=8,N=3584,device_name=AMD_Instinct_MI325X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=AMD_Radeon_Graphics.json → triton_3_1_0/E=8,N=3584,device_name=AMD_Radeon_Graphics.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=3584,device_name=NVIDIA_L40S.json → triton_3_1_0/E=8,N=3584,device_name=NVIDIA_L40S.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=4096,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=4096,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=AMD_Instinct_MI300X.json → triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI300X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=AMD_Instinct_MI325X.json → triton_3_1_0/E=8,N=7168,device_name=AMD_Instinct_MI325X.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=AMD_Radeon_Graphics.json → triton_3_1_0/E=8,N=7168,device_name=AMD_Radeon_Graphics.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json → triton_3_1_0/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=7168,device_name=NVIDIA_H200.json → triton_3_1_0/E=8,N=7168,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json → triton_3_1_0/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_2_0/E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_2_0/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=192,device_name=NVIDIA_H20.json → triton_3_2_0/E=128,N=192,device_name=NVIDIA_H20.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=192,device_name=NVIDIA_H200.json → triton_3_2_0/E=128,N=192,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_2_0/E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=384,device_name=NVIDIA_H20.json → triton_3_2_0/E=128,N=384,device_name=NVIDIA_H20.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=384,device_name=NVIDIA_H200.json → triton_3_2_0/E=128,N=384,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_2_0/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_H20.json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_H20.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=768,device_name=NVIDIA_H200.json → triton_3_2_0/E=128,N=768,device_name=NVIDIA_H200.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=128,N=96,device_name=NVIDIA_H20.json → triton_3_2_0/E=128,N=96,device_name=NVIDIA_H20.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json → triton_3_2_0/E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json → triton_3_2_0/E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json → triton_3_2_0/E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_2_0/E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/layers/moe/fused_moe_triton/configs/{E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json → triton_3_2_0/E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json} +0 -0
- /sglang/srt/models/{xiaomi_mimo.py → mimo.py} +0 -0
- {sglang-0.4.6.post4.dist-info → sglang-0.4.7.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.6.post4.dist-info → sglang-0.4.7.dist-info}/top_level.txt +0 -0
@@ -1,858 +0,0 @@
|
|
1
|
-
import ast
|
2
|
-
import json
|
3
|
-
import logging
|
4
|
-
import re
|
5
|
-
from abc import ABC, abstractmethod
|
6
|
-
from dataclasses import dataclass
|
7
|
-
from json import JSONDecodeError, JSONDecoder
|
8
|
-
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type
|
9
|
-
|
10
|
-
import partial_json_parser
|
11
|
-
from partial_json_parser.core.exceptions import MalformedJSON
|
12
|
-
from partial_json_parser.core.options import Allow
|
13
|
-
from pydantic import BaseModel
|
14
|
-
|
15
|
-
from sglang.srt.openai_api.protocol import (
|
16
|
-
StructuralTagResponseFormat,
|
17
|
-
StructuresResponseFormat,
|
18
|
-
Tool,
|
19
|
-
)
|
20
|
-
|
21
|
-
logger = logging.getLogger(__name__)
|
22
|
-
|
23
|
-
TOOLS_TAG_LIST = [
|
24
|
-
"<|plugin|>",
|
25
|
-
"<function=",
|
26
|
-
"<tool_call>",
|
27
|
-
"<|python_tag|>",
|
28
|
-
"[TOOL_CALLS]",
|
29
|
-
"<|tool▁calls▁begin|>",
|
30
|
-
]
|
31
|
-
|
32
|
-
|
33
|
-
class ToolCallItem(BaseModel):
|
34
|
-
"""Simple encapsulation of the parsed ToolCall result for easier usage in streaming contexts."""
|
35
|
-
|
36
|
-
tool_index: int
|
37
|
-
name: Optional[str] = None
|
38
|
-
parameters: str # JSON string
|
39
|
-
|
40
|
-
|
41
|
-
def _find_common_prefix(s1: str, s2: str) -> str:
|
42
|
-
prefix = ""
|
43
|
-
min_length = min(len(s1), len(s2))
|
44
|
-
for i in range(0, min_length):
|
45
|
-
if s1[i] == s2[i]:
|
46
|
-
prefix += s1[i]
|
47
|
-
else:
|
48
|
-
break
|
49
|
-
return prefix
|
50
|
-
|
51
|
-
|
52
|
-
def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
|
53
|
-
try:
|
54
|
-
return (partial_json_parser.loads(input_str, flags), len(input_str))
|
55
|
-
except JSONDecodeError as e:
|
56
|
-
if "Extra data" in e.msg:
|
57
|
-
dec = JSONDecoder()
|
58
|
-
return dec.raw_decode(input_str)
|
59
|
-
raise
|
60
|
-
|
61
|
-
|
62
|
-
def _is_complete_json(input_str: str) -> bool:
|
63
|
-
try:
|
64
|
-
json.loads(input_str)
|
65
|
-
return True
|
66
|
-
except JSONDecodeError:
|
67
|
-
return False
|
68
|
-
|
69
|
-
|
70
|
-
class StreamingParseResult:
|
71
|
-
"""Result of streaming incremental parsing."""
|
72
|
-
|
73
|
-
def __init__(
|
74
|
-
self, normal_text: str = "", calls: Optional[List[ToolCallItem]] = None
|
75
|
-
):
|
76
|
-
self.normal_text = normal_text
|
77
|
-
self.calls = calls or []
|
78
|
-
|
79
|
-
|
80
|
-
@dataclass
|
81
|
-
class StructureInfo:
|
82
|
-
begin: str
|
83
|
-
end: str
|
84
|
-
trigger: str
|
85
|
-
|
86
|
-
|
87
|
-
_GetInfoFunc = Callable[[str], StructureInfo]
|
88
|
-
"""
|
89
|
-
Helper alias of function
|
90
|
-
Usually it is a function that takes a name string and returns a StructureInfo object,
|
91
|
-
which can be used to construct a structural_tag object
|
92
|
-
"""
|
93
|
-
|
94
|
-
|
95
|
-
class BaseFormatDetector(ABC):
|
96
|
-
"""Base class providing two sets of interfaces: one-time and streaming incremental."""
|
97
|
-
|
98
|
-
def __init__(self):
|
99
|
-
# initialize properties used for state when parsing tool calls in
|
100
|
-
self._buffer = ""
|
101
|
-
# streaming mode
|
102
|
-
self.prev_tool_call_arr: List[Dict] = []
|
103
|
-
self.current_tool_id: int = -1
|
104
|
-
self.current_tool_name_sent: bool = False
|
105
|
-
self.streamed_args_for_tool: List[str] = (
|
106
|
-
[]
|
107
|
-
) # map what has been streamed for each tool so far to a list
|
108
|
-
self.bot_token = ""
|
109
|
-
self.eot_token = ""
|
110
|
-
|
111
|
-
def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
|
112
|
-
tool_indices = {
|
113
|
-
tool.function.name: i for i, tool in enumerate(tools) if tool.function.name
|
114
|
-
}
|
115
|
-
if not isinstance(action, list):
|
116
|
-
action = [action]
|
117
|
-
|
118
|
-
results = []
|
119
|
-
for act in action:
|
120
|
-
name = act.get("name")
|
121
|
-
if name and name in tool_indices:
|
122
|
-
results.append(
|
123
|
-
ToolCallItem(
|
124
|
-
tool_index=tool_indices[name],
|
125
|
-
name=name,
|
126
|
-
parameters=json.dumps(
|
127
|
-
act.get("parameters") or act.get("arguments", {}),
|
128
|
-
ensure_ascii=False,
|
129
|
-
),
|
130
|
-
)
|
131
|
-
)
|
132
|
-
else:
|
133
|
-
logger.warning(f"Model attempted to call undefined function: {name}")
|
134
|
-
|
135
|
-
return results
|
136
|
-
|
137
|
-
@abstractmethod
|
138
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
139
|
-
"""
|
140
|
-
Parses the text in one go. Returns success=True if the format matches, otherwise False.
|
141
|
-
Note that leftover_text here represents "content that this parser will not consume further".
|
142
|
-
"""
|
143
|
-
action = json.loads(text)
|
144
|
-
return StreamingParseResult(calls=self.parse_base_json(action, tools))
|
145
|
-
|
146
|
-
def parse_streaming_increment(
|
147
|
-
self, new_text: str, tools: List[Tool]
|
148
|
-
) -> StreamingParseResult:
|
149
|
-
"""
|
150
|
-
Streaming incremental parsing with tool validation.
|
151
|
-
"""
|
152
|
-
# Append new text to buffer
|
153
|
-
self._buffer += new_text
|
154
|
-
current_text = self._buffer
|
155
|
-
if not (self.bot_token in current_text or current_text.startswith("{")):
|
156
|
-
self._buffer = ""
|
157
|
-
if self.eot_token in new_text:
|
158
|
-
new_text = new_text.replace(self.eot_token, "")
|
159
|
-
return StreamingParseResult(normal_text=new_text)
|
160
|
-
|
161
|
-
# Build tool indices if not already built
|
162
|
-
if not hasattr(self, "_tool_indices"):
|
163
|
-
self._tool_indices = {
|
164
|
-
tool.function.name: i
|
165
|
-
for i, tool in enumerate(tools)
|
166
|
-
if tool.function and tool.function.name
|
167
|
-
}
|
168
|
-
|
169
|
-
flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
|
170
|
-
try:
|
171
|
-
tool_call_arr = []
|
172
|
-
is_complete = []
|
173
|
-
try:
|
174
|
-
start_idx = (
|
175
|
-
len(self.bot_token)
|
176
|
-
if current_text.startswith(self.bot_token)
|
177
|
-
else 0
|
178
|
-
)
|
179
|
-
while start_idx < len(current_text):
|
180
|
-
(obj, end_idx) = _partial_json_loads(
|
181
|
-
current_text[start_idx:], flags
|
182
|
-
)
|
183
|
-
is_complete.append(
|
184
|
-
_is_complete_json(current_text[start_idx : start_idx + end_idx])
|
185
|
-
)
|
186
|
-
start_idx += end_idx + len("; ")
|
187
|
-
|
188
|
-
# Validate tool name if present
|
189
|
-
if "name" in obj and obj["name"] not in self._tool_indices:
|
190
|
-
# Invalid tool name - reset state
|
191
|
-
self._buffer = ""
|
192
|
-
self.current_tool_id = -1
|
193
|
-
self.current_tool_name_sent = False
|
194
|
-
if self.streamed_args_for_tool:
|
195
|
-
self.streamed_args_for_tool.pop()
|
196
|
-
return StreamingParseResult()
|
197
|
-
|
198
|
-
# Handle parameters/arguments consistency
|
199
|
-
if "parameters" in obj:
|
200
|
-
assert (
|
201
|
-
"arguments" not in obj
|
202
|
-
), "model generated both parameters and arguments"
|
203
|
-
obj["arguments"] = obj["parameters"]
|
204
|
-
tool_call_arr.append(obj)
|
205
|
-
|
206
|
-
except MalformedJSON:
|
207
|
-
return StreamingParseResult()
|
208
|
-
|
209
|
-
if len(tool_call_arr) == 0:
|
210
|
-
return StreamingParseResult()
|
211
|
-
|
212
|
-
current_tool_call: Dict = (
|
213
|
-
tool_call_arr[self.current_tool_id] if len(tool_call_arr) > 0 else {}
|
214
|
-
)
|
215
|
-
|
216
|
-
# Handle new tool in array
|
217
|
-
if len(tool_call_arr) > 0 and len(tool_call_arr) > self.current_tool_id + 1:
|
218
|
-
if self.current_tool_id >= 0:
|
219
|
-
cur_arguments = current_tool_call.get("arguments")
|
220
|
-
if cur_arguments:
|
221
|
-
cur_args_json = json.dumps(cur_arguments)
|
222
|
-
sent = len(self.streamed_args_for_tool[self.current_tool_id])
|
223
|
-
argument_diff = cur_args_json[sent:]
|
224
|
-
|
225
|
-
res = StreamingParseResult(
|
226
|
-
calls=[
|
227
|
-
ToolCallItem(
|
228
|
-
tool_index=self.current_tool_id,
|
229
|
-
name="",
|
230
|
-
parameters=argument_diff,
|
231
|
-
)
|
232
|
-
],
|
233
|
-
)
|
234
|
-
self.streamed_args_for_tool[
|
235
|
-
self.current_tool_id
|
236
|
-
] += argument_diff
|
237
|
-
else:
|
238
|
-
res = StreamingParseResult()
|
239
|
-
else:
|
240
|
-
res = StreamingParseResult()
|
241
|
-
|
242
|
-
self.current_tool_id = len(tool_call_arr) - 1
|
243
|
-
self.current_tool_name_sent = False
|
244
|
-
self.streamed_args_for_tool.append("")
|
245
|
-
return res
|
246
|
-
|
247
|
-
# Handle tool name
|
248
|
-
elif not self.current_tool_name_sent:
|
249
|
-
function_name = current_tool_call.get("name")
|
250
|
-
if function_name and function_name in self._tool_indices:
|
251
|
-
res = StreamingParseResult(
|
252
|
-
calls=[
|
253
|
-
ToolCallItem(
|
254
|
-
tool_index=self._tool_indices[function_name],
|
255
|
-
name=function_name,
|
256
|
-
parameters="",
|
257
|
-
)
|
258
|
-
],
|
259
|
-
)
|
260
|
-
self.current_tool_name_sent = True
|
261
|
-
else:
|
262
|
-
res = StreamingParseResult()
|
263
|
-
|
264
|
-
# Handle streaming arguments
|
265
|
-
else:
|
266
|
-
cur_arguments = current_tool_call.get("arguments")
|
267
|
-
res = StreamingParseResult()
|
268
|
-
|
269
|
-
if cur_arguments:
|
270
|
-
sent = len(self.streamed_args_for_tool[self.current_tool_id])
|
271
|
-
cur_args_json = json.dumps(cur_arguments)
|
272
|
-
prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
|
273
|
-
"arguments"
|
274
|
-
)
|
275
|
-
|
276
|
-
argument_diff = None
|
277
|
-
if is_complete[self.current_tool_id]:
|
278
|
-
argument_diff = cur_args_json[sent:]
|
279
|
-
self._buffer = ""
|
280
|
-
self.prev_tool_call_arr[self.current_tool_id].clear()
|
281
|
-
self.current_tool_name_sent = False
|
282
|
-
self.streamed_args_for_tool[self.current_tool_id] = ""
|
283
|
-
|
284
|
-
elif prev_arguments:
|
285
|
-
prev_args_json = json.dumps(prev_arguments)
|
286
|
-
if cur_args_json != prev_args_json:
|
287
|
-
prefix = _find_common_prefix(prev_args_json, cur_args_json)
|
288
|
-
argument_diff = prefix[sent:]
|
289
|
-
|
290
|
-
if argument_diff is not None:
|
291
|
-
res = StreamingParseResult(
|
292
|
-
calls=[
|
293
|
-
ToolCallItem(
|
294
|
-
tool_index=self.current_tool_id,
|
295
|
-
parameters=argument_diff,
|
296
|
-
)
|
297
|
-
],
|
298
|
-
)
|
299
|
-
if not is_complete[self.current_tool_id]:
|
300
|
-
self.streamed_args_for_tool[
|
301
|
-
self.current_tool_id
|
302
|
-
] += argument_diff
|
303
|
-
|
304
|
-
self.prev_tool_call_arr = tool_call_arr
|
305
|
-
return res
|
306
|
-
|
307
|
-
except Exception as e:
|
308
|
-
logger.error(f"Error in parse_streaming_increment: {e}")
|
309
|
-
return StreamingParseResult()
|
310
|
-
|
311
|
-
@abstractmethod
|
312
|
-
def has_tool_call(self, text: str) -> bool:
|
313
|
-
raise NotImplementedError()
|
314
|
-
|
315
|
-
@abstractmethod
|
316
|
-
def structure_info(self) -> _GetInfoFunc:
|
317
|
-
raise NotImplementedError()
|
318
|
-
|
319
|
-
|
320
|
-
class Qwen25Detector(BaseFormatDetector):
|
321
|
-
"""
|
322
|
-
Detector for Qwen 2.5 models.
|
323
|
-
Assumes function call format:
|
324
|
-
<tool_call>{"name":"xxx", "arguments":{...}}</tool_call>
|
325
|
-
"""
|
326
|
-
|
327
|
-
def __init__(self):
|
328
|
-
"""
|
329
|
-
Initializes the detector with necessary state variables.
|
330
|
-
"""
|
331
|
-
super().__init__()
|
332
|
-
self.bot_token = "<tool_call>"
|
333
|
-
self.eot_token = "</tool_call>"
|
334
|
-
|
335
|
-
def has_tool_call(self, text: str) -> bool:
|
336
|
-
"""Check if the text contains a Qwen 2.5 format tool call."""
|
337
|
-
return self.bot_token in text
|
338
|
-
|
339
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
340
|
-
"""
|
341
|
-
One-time parsing: Detects and parses tool calls in the provided text.
|
342
|
-
|
343
|
-
:param text: The complete text to parse.
|
344
|
-
:param tools: List of available tools.
|
345
|
-
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
|
346
|
-
"""
|
347
|
-
idx = text.find(self.bot_token)
|
348
|
-
normal_text = text[:idx].strip() if idx != -1 else text
|
349
|
-
if self.bot_token not in text:
|
350
|
-
return StreamingParseResult(normal_text=normal_text, calls=[])
|
351
|
-
pattern = rf"{self.bot_token}(.*?){self.eot_token}"
|
352
|
-
match_result_list = re.findall(pattern, text, re.DOTALL)
|
353
|
-
calls = []
|
354
|
-
for match_result in match_result_list:
|
355
|
-
match_result = json.loads(match_result)
|
356
|
-
calls.extend(self.parse_base_json(match_result, tools))
|
357
|
-
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
358
|
-
|
359
|
-
def structure_info(self) -> _GetInfoFunc:
|
360
|
-
return lambda name: StructureInfo(
|
361
|
-
begin='<tool_call>{"name":"' + name + '", "arguments":',
|
362
|
-
end="}</tool_call>",
|
363
|
-
trigger="<tool_call>",
|
364
|
-
)
|
365
|
-
|
366
|
-
|
367
|
-
class MistralDetector(BaseFormatDetector):
|
368
|
-
"""
|
369
|
-
Detector for Mistral models.
|
370
|
-
Assumes function call format:
|
371
|
-
<|action_start|><|plugin|>{"name":"xxx", "arguments":{...}}<|action_end|>
|
372
|
-
"""
|
373
|
-
|
374
|
-
def __init__(self):
|
375
|
-
"""
|
376
|
-
Initializes the detector with necessary state variables.
|
377
|
-
"""
|
378
|
-
super().__init__()
|
379
|
-
self.bot_token = "[TOOL_CALLS] ["
|
380
|
-
self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
|
381
|
-
|
382
|
-
def has_tool_call(self, text: str) -> bool:
|
383
|
-
"""Check if the text contains a Mistral format tool call."""
|
384
|
-
return self.bot_token in text
|
385
|
-
|
386
|
-
def _clean_text(self, text: str) -> str:
|
387
|
-
"""
|
388
|
-
clean text to only leave ''[TOOL_CALLS] [{"name": xxx, "arguments": {xxx}}]'
|
389
|
-
for example,
|
390
|
-
text = '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]\n\nToday\'s weather in Boston is :{function call result} (in Fahrenheit)\n\nIf you prefer Celsius, please let me know.'
|
391
|
-
return '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]'
|
392
|
-
The key pattern is [TOOL_CALLS] [...]
|
393
|
-
"""
|
394
|
-
find_results = re.findall(r"\[TOOL_CALLS\] \[.*?\]", text, re.DOTALL)
|
395
|
-
if len(find_results) > 0:
|
396
|
-
return find_results[0]
|
397
|
-
else:
|
398
|
-
return ""
|
399
|
-
|
400
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
401
|
-
"""
|
402
|
-
One-time parsing: Detects and parses tool calls in the provided text.
|
403
|
-
|
404
|
-
:param text: The complete text to parse.
|
405
|
-
:param tools: List of available tools.
|
406
|
-
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
|
407
|
-
"""
|
408
|
-
idx = text.find(self.bot_token)
|
409
|
-
normal_text = text[:idx].strip() if idx != -1 else text
|
410
|
-
text = self._clean_text(text)
|
411
|
-
tool_content = text.replace("[TOOL_CALLS]", "").strip()
|
412
|
-
raw_tool_calls = self.tool_call_regex.findall(tool_content)
|
413
|
-
calls = []
|
414
|
-
if len(raw_tool_calls) > 0:
|
415
|
-
raw_tool_call = raw_tool_calls[0]
|
416
|
-
function_call_arr = json.loads(raw_tool_call)
|
417
|
-
for match_result in function_call_arr:
|
418
|
-
calls.extend(self.parse_base_json(match_result, tools))
|
419
|
-
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
420
|
-
|
421
|
-
def structure_info(self) -> _GetInfoFunc:
|
422
|
-
return lambda name: StructureInfo(
|
423
|
-
begin='[TOOL_CALLS] [{"name":"' + name + '", "arguments":',
|
424
|
-
end="}]",
|
425
|
-
trigger="[TOOL_CALLS]",
|
426
|
-
)
|
427
|
-
|
428
|
-
|
429
|
-
class Llama32Detector(BaseFormatDetector):
|
430
|
-
"""
|
431
|
-
Detector for Llama 3.2 models.
|
432
|
-
Assumes function call format:
|
433
|
-
<|python_tag|>{"name":"xxx", "arguments":{...}}
|
434
|
-
"""
|
435
|
-
|
436
|
-
def __init__(self):
|
437
|
-
super().__init__()
|
438
|
-
self.bot_token = "<|python_tag|>"
|
439
|
-
|
440
|
-
def has_tool_call(self, text: str) -> bool:
|
441
|
-
"""Check if the text contains a Llama 3.2 format tool call."""
|
442
|
-
# depending on the prompt format the Llama model may or may not
|
443
|
-
# prefix the output with the <|python_tag|> token
|
444
|
-
return "<|python_tag|>" in text or text.startswith("{")
|
445
|
-
|
446
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
447
|
-
"""Parse function calls from text, handling multiple JSON objects."""
|
448
|
-
if "<|python_tag|>" not in text and not text.startswith("{"):
|
449
|
-
return StreamingParseResult(normal_text=text, calls=[])
|
450
|
-
|
451
|
-
if "<|python_tag|>" in text:
|
452
|
-
normal_text, action_text = text.split("<|python_tag|>")
|
453
|
-
else:
|
454
|
-
normal_text, action_text = "", text
|
455
|
-
|
456
|
-
# Split by semicolon and process each part
|
457
|
-
json_parts = [part.strip() for part in action_text.split(";") if part.strip()]
|
458
|
-
all_actions = []
|
459
|
-
for part in json_parts:
|
460
|
-
try:
|
461
|
-
# Parse each individual JSON object
|
462
|
-
action = json.loads(part)
|
463
|
-
all_actions.append(action)
|
464
|
-
except json.JSONDecodeError as e:
|
465
|
-
logger.warning(f"Failed to parse JSON part: {part}")
|
466
|
-
logger.warning(f"JSON parse error: {str(e)}")
|
467
|
-
continue
|
468
|
-
calls = []
|
469
|
-
# Only process if we found valid JSON objects
|
470
|
-
if all_actions:
|
471
|
-
calls = self.parse_base_json(all_actions, tools)
|
472
|
-
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
473
|
-
|
474
|
-
def structure_info(self) -> _GetInfoFunc:
|
475
|
-
return lambda name: StructureInfo(
|
476
|
-
begin='<|python_tag|>{"name":"' + name + '", "arguments":',
|
477
|
-
end="}",
|
478
|
-
trigger="<|python_tag|>",
|
479
|
-
)
|
480
|
-
|
481
|
-
|
482
|
-
class DeepSeekV3Detector(BaseFormatDetector):
|
483
|
-
"""
|
484
|
-
Detector for DeepSeek models.
|
485
|
-
Assumes function call format:
|
486
|
-
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
|
487
|
-
"""
|
488
|
-
|
489
|
-
def __init__(self):
|
490
|
-
super().__init__()
|
491
|
-
self.bot_token = "<|tool▁calls▁begin|>"
|
492
|
-
self.eot_token = "<|tool▁calls▁end|>"
|
493
|
-
self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
|
494
|
-
self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"
|
495
|
-
self._last_arguments = ""
|
496
|
-
|
497
|
-
def has_tool_call(self, text: str) -> bool:
|
498
|
-
"""Check if the text contains a deepseek format tool call."""
|
499
|
-
return self.bot_token in text
|
500
|
-
|
501
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
502
|
-
"""
|
503
|
-
One-time parsing: Detects and parses tool calls in the provided text.
|
504
|
-
|
505
|
-
:param text: The complete text to parse.
|
506
|
-
:param tools: List of available tools.
|
507
|
-
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
|
508
|
-
"""
|
509
|
-
idx = text.find(self.bot_token)
|
510
|
-
normal_text = text[:idx].strip() if idx != -1 else text
|
511
|
-
if self.bot_token not in text:
|
512
|
-
return StreamingParseResult(normal_text=normal_text, calls=[])
|
513
|
-
match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
|
514
|
-
calls = []
|
515
|
-
try:
|
516
|
-
for match_result in match_result_list:
|
517
|
-
# Get function name
|
518
|
-
func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
|
519
|
-
func_name = func_detail.group(2)
|
520
|
-
func_args = func_detail.group(3)
|
521
|
-
func_args = json.loads(func_args)
|
522
|
-
# construct match_result for parse_base_json
|
523
|
-
match_result = {"name": func_name, "parameters": func_args}
|
524
|
-
calls.extend(self.parse_base_json(match_result, tools))
|
525
|
-
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
526
|
-
except Exception as e:
|
527
|
-
logger.error(f"Error in detect_and_parse: {e}")
|
528
|
-
# return the normal text if parsing fails
|
529
|
-
return StreamingParseResult(normal_text=text)
|
530
|
-
|
531
|
-
def structure_info(self) -> _GetInfoFunc:
|
532
|
-
return lambda name: StructureInfo(
|
533
|
-
begin=">" + name + "\n```json\n",
|
534
|
-
end="\n```<",
|
535
|
-
trigger=">" + name + "\n```json\n",
|
536
|
-
)
|
537
|
-
|
538
|
-
def parse_streaming_increment(
|
539
|
-
self, new_text: str, tools: List[Tool]
|
540
|
-
) -> StreamingParseResult:
|
541
|
-
"""
|
542
|
-
Streaming incremental parsing tool calls for DeepSeekV3 format.
|
543
|
-
"""
|
544
|
-
self._buffer += new_text
|
545
|
-
current_text = self._buffer
|
546
|
-
|
547
|
-
if self.bot_token not in current_text:
|
548
|
-
self._buffer = ""
|
549
|
-
for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]:
|
550
|
-
if e_token in new_text:
|
551
|
-
new_text = new_text.replace(e_token, "")
|
552
|
-
return StreamingParseResult(normal_text=new_text)
|
553
|
-
|
554
|
-
if not hasattr(self, "_tool_indices"):
|
555
|
-
self._tool_indices = {
|
556
|
-
tool.function.name: i
|
557
|
-
for i, tool in enumerate(tools)
|
558
|
-
if tool.function and tool.function.name
|
559
|
-
}
|
560
|
-
|
561
|
-
calls: list[ToolCallItem] = []
|
562
|
-
try:
|
563
|
-
partial_match = re.search(
|
564
|
-
pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)",
|
565
|
-
string=current_text,
|
566
|
-
flags=re.DOTALL,
|
567
|
-
)
|
568
|
-
if partial_match:
|
569
|
-
func_name = partial_match.group(2).strip()
|
570
|
-
func_args_raw = partial_match.group(3).strip()
|
571
|
-
|
572
|
-
if not self.current_tool_name_sent:
|
573
|
-
calls.append(
|
574
|
-
ToolCallItem(
|
575
|
-
tool_index=self._tool_indices.get(func_name, 0),
|
576
|
-
name=func_name,
|
577
|
-
parameters="",
|
578
|
-
)
|
579
|
-
)
|
580
|
-
self.current_tool_name_sent = True
|
581
|
-
else:
|
582
|
-
argument_diff = (
|
583
|
-
func_args_raw[len(self._last_arguments) :]
|
584
|
-
if func_args_raw.startswith(self._last_arguments)
|
585
|
-
else func_args_raw
|
586
|
-
)
|
587
|
-
|
588
|
-
if argument_diff:
|
589
|
-
calls.append(
|
590
|
-
ToolCallItem(
|
591
|
-
tool_index=self._tool_indices.get(func_name, 0),
|
592
|
-
name=None,
|
593
|
-
parameters=argument_diff,
|
594
|
-
)
|
595
|
-
)
|
596
|
-
self._last_arguments += argument_diff
|
597
|
-
|
598
|
-
if _is_complete_json(func_args_raw):
|
599
|
-
result = StreamingParseResult(normal_text="", calls=calls)
|
600
|
-
self._buffer = ""
|
601
|
-
self._last_arguments = ""
|
602
|
-
self.current_tool_name_sent = False
|
603
|
-
return result
|
604
|
-
|
605
|
-
return StreamingParseResult(normal_text="", calls=calls)
|
606
|
-
|
607
|
-
except Exception as e:
|
608
|
-
logger.error(f"Error in parse_streaming_increment: {e}")
|
609
|
-
return StreamingParseResult(normal_text=current_text)
|
610
|
-
|
611
|
-
|
612
|
-
class MultiFormatParser:
|
613
|
-
def __init__(self, detectors: List[BaseFormatDetector]):
|
614
|
-
"""
|
615
|
-
:param detectors: A series of available Detector instances passed in
|
616
|
-
"""
|
617
|
-
self.detectors = detectors
|
618
|
-
|
619
|
-
def parse_once(
|
620
|
-
self, text: str, tools: List[Tool]
|
621
|
-
) -> Tuple[str, list[ToolCallItem]]:
|
622
|
-
"""
|
623
|
-
One-time parsing: Loop through detectors until there are no new matches or text is exhausted
|
624
|
-
Return: (final_text, all_calls)
|
625
|
-
- final_text: The remaining text after parsing that was not consumed by any Detector (can be treated as normal text)
|
626
|
-
- all_calls: All calls parsed by the Detectors
|
627
|
-
"""
|
628
|
-
final_calls = []
|
629
|
-
final_normal_text = text
|
630
|
-
for detector in self.detectors:
|
631
|
-
parsed_result = detector.detect_and_parse(text, tools)
|
632
|
-
tool_call_list = parsed_result.calls
|
633
|
-
if len(tool_call_list) > 0: # parsed successfully
|
634
|
-
final_calls = tool_call_list
|
635
|
-
final_normal_text = parsed_result.normal_text
|
636
|
-
break
|
637
|
-
|
638
|
-
# leftover_text is the normal text not consumed by any Detector
|
639
|
-
return final_normal_text, final_calls
|
640
|
-
|
641
|
-
def parse_streaming_increment(
|
642
|
-
self, new_text: str, tools: List[Tool]
|
643
|
-
) -> Tuple[str, list[ToolCallItem]]:
|
644
|
-
"""
|
645
|
-
Streaming incremental parsing: Feed new_text to each detector's parse_streaming_increment
|
646
|
-
and merge their produced normal_text/calls to return.
|
647
|
-
(The logic here can be "priority-based" or "parallel parsing" based on your needs)
|
648
|
-
"""
|
649
|
-
final_normal_text = ""
|
650
|
-
final_calls = []
|
651
|
-
|
652
|
-
for detector in self.detectors:
|
653
|
-
sp_result = detector.parse_streaming_increment(new_text, tools)
|
654
|
-
# Merge normal_text and calls
|
655
|
-
# If one sp_result contains result call, this should be a successful parse
|
656
|
-
# If one sp_result only contains normal_text, this can either be a successful
|
657
|
-
# parse or it is not using the desired parsing tool.
|
658
|
-
if sp_result.normal_text:
|
659
|
-
final_normal_text = sp_result.normal_text
|
660
|
-
if sp_result.calls:
|
661
|
-
final_calls.extend(sp_result.calls)
|
662
|
-
final_normal_text = sp_result.normal_text
|
663
|
-
break
|
664
|
-
|
665
|
-
return final_normal_text, final_calls
|
666
|
-
|
667
|
-
|
668
|
-
class PythonicDetector(BaseFormatDetector):
|
669
|
-
"""
|
670
|
-
Detector for Llama-3.2 and Llama-4 models with pythonic tool call format.
|
671
|
-
Assumes function call format:
|
672
|
-
[tool1(arg1=val1, arg2=val2), tool2(arg1=val3)]
|
673
|
-
Arguments are Python literals (not JSON).
|
674
|
-
"""
|
675
|
-
|
676
|
-
def __init__(self):
|
677
|
-
super().__init__()
|
678
|
-
self.tool_call_regex = re.compile(
|
679
|
-
r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",
|
680
|
-
re.DOTALL,
|
681
|
-
)
|
682
|
-
|
683
|
-
def has_tool_call(self, text: str) -> bool:
|
684
|
-
return bool(self.tool_call_regex.match(text.strip()))
|
685
|
-
|
686
|
-
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
687
|
-
# Try parsing the text as a Python list of function calls
|
688
|
-
text = text.strip()
|
689
|
-
if not (text.startswith("[") and text.endswith("]")):
|
690
|
-
# Not a pythonic tool call format
|
691
|
-
return StreamingParseResult(normal_text=text, calls=[])
|
692
|
-
try:
|
693
|
-
module = ast.parse(text)
|
694
|
-
parsed = getattr(module.body[0], "value", None)
|
695
|
-
if not (
|
696
|
-
isinstance(parsed, ast.List)
|
697
|
-
and all(isinstance(e, ast.Call) for e in parsed.elts)
|
698
|
-
):
|
699
|
-
return StreamingParseResult(normal_text=text, calls=[])
|
700
|
-
calls = []
|
701
|
-
tool_indices = {
|
702
|
-
tool.function.name: i
|
703
|
-
for i, tool in enumerate(tools)
|
704
|
-
if tool.function.name
|
705
|
-
}
|
706
|
-
for call in parsed.elts:
|
707
|
-
if not isinstance(call.func, ast.Name):
|
708
|
-
continue
|
709
|
-
function_name = call.func.id
|
710
|
-
arguments = {}
|
711
|
-
for keyword in call.keywords:
|
712
|
-
arguments[keyword.arg] = self._get_parameter_value(keyword.value)
|
713
|
-
calls.append(
|
714
|
-
ToolCallItem(
|
715
|
-
tool_index=tool_indices.get(function_name, -1),
|
716
|
-
name=function_name,
|
717
|
-
parameters=json.dumps(arguments, ensure_ascii=False),
|
718
|
-
)
|
719
|
-
)
|
720
|
-
return StreamingParseResult(normal_text="", calls=calls)
|
721
|
-
except Exception:
|
722
|
-
logger.exception("Error in pythonic tool call parsing.")
|
723
|
-
return StreamingParseResult(normal_text=text, calls=[])
|
724
|
-
|
725
|
-
def parse_streaming_increment(
|
726
|
-
self, new_text: str, tools: List[Tool]
|
727
|
-
) -> StreamingParseResult:
|
728
|
-
"""
|
729
|
-
Streaming incremental parsing for pythonic tool calls.
|
730
|
-
Buffers input until a complete pythonic tool call (from [ to ]) is found,
|
731
|
-
then parses and emits any detected calls.
|
732
|
-
"""
|
733
|
-
self._buffer += new_text
|
734
|
-
start = self._buffer.find("[")
|
735
|
-
end = self._buffer.find("]", start)
|
736
|
-
if start != -1 and end != -1:
|
737
|
-
call_text = self._buffer[start : end + 1]
|
738
|
-
result = self.detect_and_parse(call_text, tools)
|
739
|
-
self._buffer = self._buffer[end + 1 :]
|
740
|
-
return result
|
741
|
-
return StreamingParseResult(normal_text="")
|
742
|
-
|
743
|
-
def _get_parameter_value(self, val):
|
744
|
-
if isinstance(val, ast.Constant):
|
745
|
-
return val.value
|
746
|
-
elif isinstance(val, ast.Dict):
|
747
|
-
return {
|
748
|
-
k.value: self._get_parameter_value(v)
|
749
|
-
for k, v in zip(val.keys, val.values)
|
750
|
-
}
|
751
|
-
elif isinstance(val, ast.List):
|
752
|
-
return [self._get_parameter_value(v) for v in val.elts]
|
753
|
-
else:
|
754
|
-
raise ValueError("Tool call arguments must be literals")
|
755
|
-
|
756
|
-
def structure_info(self) -> _GetInfoFunc:
|
757
|
-
def info(name: str):
|
758
|
-
return StructureInfo(begin="[", end="]", trigger="")
|
759
|
-
|
760
|
-
return info
|
761
|
-
|
762
|
-
|
763
|
-
class FunctionCallParser:
|
764
|
-
"""
|
765
|
-
In streaming scenarios, each time new_text is received, it calls multi_format_parser.parse_streaming_increment
|
766
|
-
and returns the resulting normal_text and calls to the upper layer (or SSE).
|
767
|
-
"""
|
768
|
-
|
769
|
-
ToolCallParserEnum: Dict[str, Type[BaseFormatDetector]] = {
|
770
|
-
"llama3": Llama32Detector,
|
771
|
-
"qwen25": Qwen25Detector,
|
772
|
-
"mistral": MistralDetector,
|
773
|
-
"deepseekv3": DeepSeekV3Detector,
|
774
|
-
"pythonic": PythonicDetector,
|
775
|
-
}
|
776
|
-
|
777
|
-
def __init__(self, tools: List[Tool], tool_call_parser: str):
|
778
|
-
detectors = []
|
779
|
-
if tool_call_parser:
|
780
|
-
detector_class = self.ToolCallParserEnum.get(tool_call_parser)
|
781
|
-
if detector_class:
|
782
|
-
detectors.append(detector_class())
|
783
|
-
else:
|
784
|
-
raise ValueError(f"Unsupported tool_call_parser: {tool_call_parser}")
|
785
|
-
else:
|
786
|
-
raise ValueError("Tool Call Parser Not Given!")
|
787
|
-
|
788
|
-
self.multi_format_parser = MultiFormatParser(detectors)
|
789
|
-
self.tools = tools
|
790
|
-
|
791
|
-
def has_tool_call(self, text: str) -> bool:
|
792
|
-
"""
|
793
|
-
Check if the given text contains a tool call in the format supported by this parser.
|
794
|
-
This delegates to the detector's implementation.
|
795
|
-
|
796
|
-
:param text: The text to check for tool calls
|
797
|
-
:return: True if the text contains a tool call, False otherwise
|
798
|
-
"""
|
799
|
-
# Check all detectors in the multi_format_parser
|
800
|
-
for detector in self.multi_format_parser.detectors:
|
801
|
-
if detector.has_tool_call(text):
|
802
|
-
return True
|
803
|
-
return False
|
804
|
-
|
805
|
-
def parse_non_stream(self, full_text: str) -> Tuple[str, list[ToolCallItem]]:
|
806
|
-
"""
|
807
|
-
Non-streaming call: one-time parsing
|
808
|
-
"""
|
809
|
-
full_normal_text, calls = self.multi_format_parser.parse_once(
|
810
|
-
full_text, self.tools
|
811
|
-
)
|
812
|
-
return full_normal_text, calls
|
813
|
-
|
814
|
-
def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, list[ToolCallItem]]:
|
815
|
-
"""
|
816
|
-
Streaming call: incremental parsing
|
817
|
-
"""
|
818
|
-
normal_text, calls = self.multi_format_parser.parse_streaming_increment(
|
819
|
-
chunk_text, self.tools
|
820
|
-
)
|
821
|
-
return normal_text, calls
|
822
|
-
|
823
|
-
def structure_infos(self) -> List[_GetInfoFunc]:
|
824
|
-
"""
|
825
|
-
Returns a list of structure_info functions for each detector
|
826
|
-
"""
|
827
|
-
return [
|
828
|
-
detector.structure_info() for detector in self.multi_format_parser.detectors
|
829
|
-
]
|
830
|
-
|
831
|
-
def get_structure_tag(self) -> StructuralTagResponseFormat:
|
832
|
-
tool_structures: List[StructuresResponseFormat] = list()
|
833
|
-
tool_trigger_set: Set[str] = set()
|
834
|
-
|
835
|
-
for wrapper in self.structure_infos():
|
836
|
-
for tool in self.tools:
|
837
|
-
function = tool.function
|
838
|
-
name = function.name
|
839
|
-
assert name is not None
|
840
|
-
info = wrapper(name)
|
841
|
-
|
842
|
-
# accept all if not strict, otherwise only accept the schema
|
843
|
-
schema = function.parameters if function.strict else {}
|
844
|
-
|
845
|
-
tool_structures.append(
|
846
|
-
StructuresResponseFormat(
|
847
|
-
begin=info.begin,
|
848
|
-
schema=schema, # type: ignore
|
849
|
-
end=info.end,
|
850
|
-
)
|
851
|
-
)
|
852
|
-
tool_trigger_set.add(info.trigger)
|
853
|
-
|
854
|
-
return StructuralTagResponseFormat(
|
855
|
-
type="structural_tag",
|
856
|
-
structures=tool_structures,
|
857
|
-
triggers=list(tool_trigger_set),
|
858
|
-
)
|