sglang 0.4.3.post2__py3-none-any.whl → 0.4.3.post4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/api.py +1 -1
- sglang/bench_offline_throughput.py +19 -0
- sglang/bench_one_batch.py +2 -2
- sglang/bench_serving.py +123 -79
- sglang/global_config.py +8 -3
- sglang/lang/backend/runtime_endpoint.py +1 -1
- sglang/lang/ir.py +1 -1
- sglang/srt/_custom_ops.py +83 -91
- sglang/srt/configs/load_config.py +4 -1
- sglang/srt/configs/model_config.py +48 -2
- sglang/srt/configs/qwen2_5_vl_config.py +5 -2
- sglang/srt/constrained/base_grammar_backend.py +117 -15
- sglang/srt/constrained/llguidance_backend.py +151 -0
- sglang/srt/constrained/outlines_backend.py +24 -33
- sglang/srt/constrained/xgrammar_backend.py +69 -38
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +225 -80
- sglang/srt/distributed/parallel_state.py +48 -3
- sglang/srt/entrypoints/engine.py +67 -9
- sglang/srt/entrypoints/http_server.py +190 -41
- sglang/srt/entrypoints/verl_engine.py +147 -0
- sglang/srt/function_call_parser.py +0 -1
- sglang/srt/layers/activation.py +11 -0
- sglang/srt/layers/attention/{__init__.py → base_attn_backend.py} +14 -6
- sglang/srt/layers/attention/double_sparsity_backend.py +1 -1
- sglang/srt/layers/attention/flashinfer_backend.py +302 -414
- sglang/srt/layers/attention/flashinfer_mla_backend.py +582 -0
- sglang/srt/layers/attention/torch_native_backend.py +1 -1
- sglang/srt/layers/attention/triton_backend.py +13 -8
- sglang/srt/layers/attention/triton_ops/decode_attention.py +3 -0
- sglang/srt/layers/attention/triton_ops/extend_attention.py +20 -4
- sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py +439 -0
- sglang/srt/layers/attention/utils.py +39 -0
- sglang/srt/layers/attention/vision.py +60 -63
- sglang/srt/layers/dp_attention.py +142 -1
- sglang/srt/layers/layernorm.py +1 -1
- sglang/srt/layers/linear.py +3 -1
- sglang/srt/layers/logits_processor.py +281 -45
- sglang/srt/layers/moe/ep_moe/kernels.py +126 -8
- sglang/srt/layers/moe/ep_moe/layer.py +140 -28
- sglang/srt/layers/moe/fused_moe_native.py +2 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +50 -50
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +16 -16
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +16 -16
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +16 -16
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json +18 -18
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +15 -15
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +15 -15
- sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json +15 -15
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +88 -20
- sglang/srt/layers/moe/fused_moe_triton/layer.py +34 -13
- sglang/srt/layers/moe/topk.py +13 -4
- sglang/srt/layers/quantization/__init__.py +111 -7
- sglang/srt/layers/quantization/blockwise_int8.py +409 -0
- sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json +164 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/fp8.py +69 -28
- sglang/srt/layers/quantization/fp8_utils.py +17 -1
- sglang/srt/layers/quantization/gptq.py +416 -0
- sglang/srt/layers/quantization/int8_kernel.py +327 -0
- sglang/srt/layers/quantization/int8_utils.py +73 -0
- sglang/srt/layers/quantization/modelopt_quant.py +18 -1
- sglang/srt/layers/radix_attention.py +1 -0
- sglang/srt/layers/rotary_embedding.py +0 -1
- sglang/srt/layers/sampler.py +76 -31
- sglang/srt/layers/vocab_parallel_embedding.py +14 -13
- sglang/srt/lora/lora.py +17 -1
- sglang/srt/lora/lora_config.py +5 -0
- sglang/srt/lora/lora_manager.py +1 -3
- sglang/srt/managers/cache_controller.py +193 -62
- sglang/srt/managers/configure_logging.py +2 -1
- sglang/srt/managers/data_parallel_controller.py +6 -2
- sglang/srt/managers/detokenizer_manager.py +124 -102
- sglang/srt/managers/image_processor.py +2 -1
- sglang/srt/managers/io_struct.py +144 -6
- sglang/srt/managers/schedule_batch.py +237 -197
- sglang/srt/managers/schedule_policy.py +29 -29
- sglang/srt/managers/scheduler.py +773 -334
- sglang/srt/managers/session_controller.py +6 -2
- sglang/srt/managers/tokenizer_manager.py +225 -68
- sglang/srt/managers/tp_worker.py +15 -4
- sglang/srt/managers/tp_worker_overlap_thread.py +3 -4
- sglang/srt/mem_cache/chunk_cache.py +18 -11
- sglang/srt/mem_cache/hiradix_cache.py +394 -0
- sglang/srt/mem_cache/memory_pool.py +68 -37
- sglang/srt/mem_cache/radix_cache.py +58 -47
- sglang/srt/metrics/collector.py +102 -36
- sglang/srt/model_executor/cuda_graph_runner.py +56 -31
- sglang/srt/model_executor/forward_batch_info.py +49 -16
- sglang/srt/model_executor/model_runner.py +280 -81
- sglang/srt/model_loader/loader.py +3 -3
- sglang/srt/model_loader/weight_utils.py +36 -14
- sglang/srt/models/baichuan.py +31 -6
- sglang/srt/models/chatglm.py +39 -7
- sglang/srt/models/commandr.py +29 -5
- sglang/srt/models/dbrx.py +31 -5
- sglang/srt/models/deepseek.py +43 -6
- sglang/srt/models/deepseek_nextn.py +32 -19
- sglang/srt/models/deepseek_v2.py +265 -32
- sglang/srt/models/exaone.py +19 -9
- sglang/srt/models/gemma.py +22 -8
- sglang/srt/models/gemma2.py +25 -12
- sglang/srt/models/gemma2_reward.py +5 -1
- sglang/srt/models/gpt2.py +28 -13
- sglang/srt/models/gpt_bigcode.py +27 -5
- sglang/srt/models/granite.py +21 -9
- sglang/srt/models/grok.py +21 -4
- sglang/srt/models/internlm2.py +36 -6
- sglang/srt/models/internlm2_reward.py +5 -1
- sglang/srt/models/llama.py +26 -9
- sglang/srt/models/llama_classification.py +5 -1
- sglang/srt/models/llama_eagle.py +17 -4
- sglang/srt/models/llama_embedding.py +5 -1
- sglang/srt/models/llama_reward.py +7 -2
- sglang/srt/models/llava.py +19 -3
- sglang/srt/models/llavavid.py +10 -1
- sglang/srt/models/minicpm.py +26 -2
- sglang/srt/models/minicpm3.py +39 -3
- sglang/srt/models/minicpmv.py +45 -14
- sglang/srt/models/mixtral.py +20 -9
- sglang/srt/models/mixtral_quant.py +50 -8
- sglang/srt/models/mllama.py +57 -11
- sglang/srt/models/olmo.py +34 -6
- sglang/srt/models/olmo2.py +34 -13
- sglang/srt/models/olmoe.py +26 -4
- sglang/srt/models/phi3_small.py +29 -10
- sglang/srt/models/qwen.py +26 -3
- sglang/srt/models/qwen2.py +26 -4
- sglang/srt/models/qwen2_5_vl.py +46 -8
- sglang/srt/models/qwen2_eagle.py +17 -5
- sglang/srt/models/qwen2_moe.py +44 -6
- sglang/srt/models/qwen2_rm.py +78 -0
- sglang/srt/models/qwen2_vl.py +39 -8
- sglang/srt/models/stablelm.py +32 -5
- sglang/srt/models/torch_native_llama.py +5 -2
- sglang/srt/models/xverse.py +21 -9
- sglang/srt/models/xverse_moe.py +45 -7
- sglang/srt/models/yivl.py +2 -1
- sglang/srt/openai_api/adapter.py +109 -24
- sglang/srt/openai_api/protocol.py +17 -1
- sglang/srt/reasoning_parser.py +154 -0
- sglang/srt/sampling/penaltylib/__init__.py +4 -6
- sglang/srt/sampling/penaltylib/frequency_penalty.py +66 -0
- sglang/srt/sampling/penaltylib/{penalizers/min_new_tokens.py → min_new_tokens.py} +15 -23
- sglang/srt/sampling/penaltylib/orchestrator.py +39 -188
- sglang/srt/sampling/penaltylib/presence_penalty.py +66 -0
- sglang/srt/sampling/sampling_batch_info.py +79 -157
- sglang/srt/sampling/sampling_params.py +16 -13
- sglang/srt/server_args.py +135 -60
- sglang/srt/speculative/build_eagle_tree.py +8 -9
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +1 -12
- sglang/srt/speculative/eagle_utils.py +92 -57
- sglang/srt/speculative/eagle_worker.py +238 -111
- sglang/srt/speculative/spec_info.py +1 -13
- sglang/srt/utils.py +43 -17
- sglang/srt/warmup.py +47 -0
- sglang/test/few_shot_gsm8k.py +4 -1
- sglang/test/runners.py +389 -126
- sglang/test/send_one.py +88 -0
- sglang/test/test_block_fp8_ep.py +361 -0
- sglang/test/test_programs.py +1 -1
- sglang/test/test_utils.py +138 -84
- sglang/utils.py +50 -60
- sglang/version.py +1 -1
- {sglang-0.4.3.post2.dist-info → sglang-0.4.3.post4.dist-info}/METADATA +22 -15
- {sglang-0.4.3.post2.dist-info → sglang-0.4.3.post4.dist-info}/RECORD +200 -166
- {sglang-0.4.3.post2.dist-info → sglang-0.4.3.post4.dist-info}/WHEEL +1 -1
- sglang/bench_latency.py +0 -1
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +0 -75
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +0 -74
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +0 -85
- sglang/test/srt/sampling/penaltylib/utils.py +0 -344
- {sglang-0.4.3.post2.dist-info → sglang-0.4.3.post4.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.post2.dist-info → sglang-0.4.3.post4.dist-info}/top_level.txt +0 -0
@@ -1,98 +1,104 @@
|
|
1
1
|
sglang/__init__.py,sha256=njc4c2IBYklSqVMiT70GL630Uddg5D_IU_6dthApPxc,1587
|
2
|
-
sglang/api.py,sha256=
|
3
|
-
sglang/
|
4
|
-
sglang/
|
5
|
-
sglang/bench_one_batch.py,sha256=d-LuRHEyDZjh180OCN5fqTjr8Zusk3zc0vhoJ33x0B0,17905
|
2
|
+
sglang/api.py,sha256=rYa2qKE88_RJQwYVvjuJzEZECf75ujchZVqi0q48tqc,6890
|
3
|
+
sglang/bench_offline_throughput.py,sha256=OQb-AjL4UNymmir02ht43uzgaNsnO_I11nXSowKMqBI,13841
|
4
|
+
sglang/bench_one_batch.py,sha256=mVgmg1LP_Y67HlH4M2V7QvuO1aFjCE0n3gDRXW_w_NQ,17935
|
6
5
|
sglang/bench_one_batch_server.py,sha256=iu73SsvYwnuRktYZDz1P6psMiRx8MbEbF5sbsYJdzYg,5962
|
7
|
-
sglang/bench_serving.py,sha256=
|
6
|
+
sglang/bench_serving.py,sha256=DGpC7L7fH3F8h_yOvkhMWwSxPQnNaYInE9gSv13Xeb8,55327
|
8
7
|
sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
|
9
|
-
sglang/global_config.py,sha256=
|
8
|
+
sglang/global_config.py,sha256=xzLdk8W53fneFblNh8iIjGF9C3-7mnzR1-LleD9Btxg,1495
|
10
9
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
10
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
|
-
sglang/utils.py,sha256=
|
13
|
-
sglang/version.py,sha256=
|
11
|
+
sglang/utils.py,sha256=hhoF2AILUCu0p7La-7pYAF2C6FeeaXGvErj3SqJTYxg,15404
|
12
|
+
sglang/version.py,sha256=ZlAh3ELJql74nlCek86guWjIlTdZddRBNjrR9kE8_Dk,28
|
14
13
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
14
|
sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
|
16
15
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
16
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
17
|
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
19
|
-
sglang/lang/ir.py,sha256=
|
18
|
+
sglang/lang/ir.py,sha256=fUQc7ExghlQNGFxYpD-uLXonVSCA3GUGdd8PNLPIP5Y,18560
|
20
19
|
sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
|
21
20
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
21
|
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
23
22
|
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
24
23
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
25
24
|
sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
|
26
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=
|
25
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=CAVh3X9F80t_2tkJECF__7AdCQtqDg1AHDqIoKIPnvs,16755
|
27
26
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
28
|
-
sglang/srt/_custom_ops.py,sha256=
|
27
|
+
sglang/srt/_custom_ops.py,sha256=GUIn2rI5KXAkVdl0LZ-tMSoXDIscQgVRkRl-MD75kdA,5007
|
29
28
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
30
29
|
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
31
30
|
sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
|
32
|
-
sglang/srt/function_call_parser.py,sha256
|
31
|
+
sglang/srt/function_call_parser.py,sha256=-siKVUqr3B3pufJ8G0wKGoQKQniAcCa8K7x1kFgQyyQ,19532
|
33
32
|
sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
|
34
33
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
35
34
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
35
|
+
sglang/srt/reasoning_parser.py,sha256=45xsU9RCPfyG4_Zx4y3-JPyNgAtrqwKI4j5R2NT4g1s,5594
|
36
36
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
37
|
-
sglang/srt/server_args.py,sha256=
|
37
|
+
sglang/srt/server_args.py,sha256=61p3vmiMcw3I-g_Xfs2OsMRKroxqzJkGdf_zerRN2js,44682
|
38
38
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
39
|
-
sglang/srt/utils.py,sha256=
|
39
|
+
sglang/srt/utils.py,sha256=7P7fyymG10E9ImmjUlRDVuJa8zn9C2tfnxULfw1CP30,47673
|
40
|
+
sglang/srt/warmup.py,sha256=FmJiYfjRr3X_eAe7ojQaPoN17LvHpjDmRWRnO-k86AQ,1469
|
40
41
|
sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
|
41
42
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
42
43
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
43
44
|
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
44
45
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
45
|
-
sglang/srt/configs/load_config.py,sha256=
|
46
|
-
sglang/srt/configs/model_config.py,sha256=
|
47
|
-
sglang/srt/configs/qwen2_5_vl_config.py,sha256=
|
48
|
-
sglang/srt/constrained/base_grammar_backend.py,sha256=
|
49
|
-
sglang/srt/constrained/
|
46
|
+
sglang/srt/configs/load_config.py,sha256=NcptQBTG-TBJpfcnE1e32BefBBzrINm9n5bYzTrH1bs,3287
|
47
|
+
sglang/srt/configs/model_config.py,sha256=DFcYhNcTngoCfGn3lSrIBhOZvuzaFmuSSd0afGn8BXM,18750
|
48
|
+
sglang/srt/configs/qwen2_5_vl_config.py,sha256=x7erMMDwqlI2l6YYBvZCXlZ53li2waWWgyhJjz273dk,48223
|
49
|
+
sglang/srt/constrained/base_grammar_backend.py,sha256=q2RTH9hv2nKUF_-HVdZzEUjG6LxejPqXjvUOE1NDWaU,6788
|
50
|
+
sglang/srt/constrained/llguidance_backend.py,sha256=ej7wN13SzCsT310C6OIyUg2zs5jeuLl3Ocok9SP9-c4,5702
|
51
|
+
sglang/srt/constrained/outlines_backend.py,sha256=UWv2xjg8x4XtoqpY8LoorlJaYOZhfDeIr5YCiFn4knA,6812
|
50
52
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
51
|
-
sglang/srt/constrained/xgrammar_backend.py,sha256=
|
53
|
+
sglang/srt/constrained/xgrammar_backend.py,sha256=W7_qyyQiOUwejIPCnWgJrp6ka5fy137SiJtxt3VNruM,6220
|
52
54
|
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
53
55
|
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
54
|
-
sglang/srt/distributed/parallel_state.py,sha256=
|
56
|
+
sglang/srt/distributed/parallel_state.py,sha256=ExRNf5pgoUK-UP0oU-PpBgaXoutKZwFs-7djjZSBD0k,49196
|
55
57
|
sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
|
56
58
|
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
57
|
-
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=
|
59
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=cFVxk9zMBZIDdvkM6HAkpWxN80iTbF4ycwtuFIJvjAk,22191
|
58
60
|
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
|
59
61
|
sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
|
60
62
|
sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
|
61
63
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
62
64
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
63
65
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
64
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
65
|
-
sglang/srt/entrypoints/http_server.py,sha256=
|
66
|
-
sglang/srt/
|
67
|
-
sglang/srt/layers/
|
68
|
-
sglang/srt/layers/
|
69
|
-
sglang/srt/layers/
|
70
|
-
sglang/srt/layers/
|
66
|
+
sglang/srt/entrypoints/engine.py,sha256=4BVw8wJIDXtHJu2YM3H1emJIOHO9lt2RXlX8YtsCs-o,19647
|
67
|
+
sglang/srt/entrypoints/http_server.py,sha256=avgOaHQsxXdwGGbcBE2H225iBUp6Zz21z6hr6y6Z_VU,24724
|
68
|
+
sglang/srt/entrypoints/verl_engine.py,sha256=ICo0F860JvoCy3aKTTLpEHqKcDX4WIN5V85q6873IxA,5798
|
69
|
+
sglang/srt/layers/activation.py,sha256=f097ndEIQ-lQ5JLa4HrcjqLLBeRcZbjYmj2gmVknnkA,5993
|
70
|
+
sglang/srt/layers/dp_attention.py,sha256=nllN2eCd4KtiOmWdG6dQRlI3CVMi_CBzn7UihlXNLi0,6547
|
71
|
+
sglang/srt/layers/layernorm.py,sha256=DI2Ih34bLh5Ex_yd8-X8L7kNRcYStpc9XpIS4zIoM_U,3860
|
72
|
+
sglang/srt/layers/linear.py,sha256=tuwG1HAJPZJdzuNnlnYQYyP3ISRgWDY8Nz1fljjcOiw,51111
|
73
|
+
sglang/srt/layers/logits_processor.py,sha256=BcoVrVYndmP9o0lFXj3YjZ8TOzloHXBAzLwjFZi4u-8,23207
|
71
74
|
sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
|
72
75
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
73
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
74
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
75
|
-
sglang/srt/layers/sampler.py,sha256=
|
76
|
+
sglang/srt/layers/radix_attention.py,sha256=UDL0y4Zasay_Rk-_XmIU4kaGbaF26ONvEHX5EQzLrqI,2260
|
77
|
+
sglang/srt/layers/rotary_embedding.py,sha256=6-dB-PjdLeY0D5g_5Yx7E8QXtd-MSAfXcFg1ptOp6Hw,43938
|
78
|
+
sglang/srt/layers/sampler.py,sha256=Cyka1ZvJBtXDl5w1h5pG7bqWDr-w6U0Y53jJKMOdIIM,12034
|
76
79
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
77
|
-
sglang/srt/layers/vocab_parallel_embedding.py,sha256=
|
78
|
-
sglang/srt/layers/attention/
|
79
|
-
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=
|
80
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
81
|
-
sglang/srt/layers/attention/
|
82
|
-
sglang/srt/layers/attention/
|
83
|
-
sglang/srt/layers/attention/
|
84
|
-
sglang/srt/layers/attention/
|
80
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=WFMCGR4EdYloQK5fT1F6BbA7rT5OK76_7pzV81wiJWU,22286
|
81
|
+
sglang/srt/layers/attention/base_attn_backend.py,sha256=j6pDIDuxlWVmc8GzNxcAFi1USRo4gqqI7G65XAQ5u5M,3263
|
82
|
+
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=2ZRL_gYz14idoVqQzeQ6N77nXer0f_8_TUYw40XUUz0,9161
|
83
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=S8t6u0PliXhtUqnDIG8uYU2h1aIhujkJHyqawxWrYPo,45962
|
84
|
+
sglang/srt/layers/attention/flashinfer_mla_backend.py,sha256=De7VK-2-E5cjuwffOFq0KPjQtDfnJnkFJDdOp-nZbSQ,20003
|
85
|
+
sglang/srt/layers/attention/torch_native_backend.py,sha256=KABmBrMqKa4x08kkQYdIcZUGydvmaVJIUfo3y8jhFHI,9270
|
86
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=3bt0Cs0B3w7QURtq7AdBgTjcGCAj_Ojod3kjWIdk58k,21542
|
87
|
+
sglang/srt/layers/attention/utils.py,sha256=gwZoJDyJ9OIPMHWl6r3qkuyVp4Sji9juX7Pwvh9PNxI,1131
|
88
|
+
sglang/srt/layers/attention/vision.py,sha256=rDKvuA14zC8JQaKJFKZSf0hBbpf3F-ELTzcFk0y3l7o,12939
|
89
|
+
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=fmUCxXnnWt9FlomBrveedq5WB-Gzw87wPncDCoHVA2U,17926
|
85
90
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=ztLWKeW-260EiIw3kCAbtUTUHHxAICz2mVxZJFes4oI,31167
|
86
|
-
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=
|
91
|
+
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=314ubx4GFPmvwxRpsfu-uaTrJ8RKRKK8gdxcdwEMO9s,13244
|
87
92
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
|
88
|
-
sglang/srt/layers/
|
89
|
-
sglang/srt/layers/moe/
|
93
|
+
sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py,sha256=-68q4M7AL7OYMo2JwVv3yGd29jxITVcX0bUjfKn-xAo,13866
|
94
|
+
sglang/srt/layers/moe/fused_moe_native.py,sha256=domK1jDc5-zJcdwDso-YgBp_O136VHifnvFVlfFvKsY,4325
|
95
|
+
sglang/srt/layers/moe/topk.py,sha256=MOwBNBwxxW__mgmpX08RIqrh77aXLEgr0F5b5iF7hRI,7512
|
90
96
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
|
-
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=
|
92
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
97
|
+
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=CCvcVRNE8COLDKglczEni_dIj755p5aStVAiyhuiEfE,15378
|
98
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=EAOGm9RHwZvf_tiLs7azSoQZ4gFTD0GDyiiUN37chJc,27290
|
93
99
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
94
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
95
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
100
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=5Rl7IBzDFjkF4g0hbLvT-FbcutxclGhbskBNjbuIlX8,40426
|
101
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MYel2jkrny6StRwE1eWKh3ZTG_p_nH6r4xkZZpuK4IU,23438
|
96
102
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
97
103
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
98
104
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -123,11 +129,12 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
|
|
123
129
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=FGVvsvw23DeDFLj7TNqm402fAtib2cPGRZR3ePBeUp4,2753
|
124
130
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
|
125
131
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
|
132
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=RuUDK9XfgXs1eZESWQR9ba4tu-rCRG_UCYwjaJ568sI,3264
|
126
133
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
127
134
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
|
128
135
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
|
129
136
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
|
130
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=
|
137
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=23CJv3de1UDX3EduMpylD9AA8qL5kzMSjLK4GDMqlro,3734
|
131
138
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=IMTKnPRjhSptf7smIkpqmMjSML9SQ7I8CpkbR3Inzqk,3258
|
132
139
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
|
133
140
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
|
@@ -150,15 +157,15 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
|
|
150
157
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=sjEVPVTgUAlp4s8tZLGSyeNzbW6zTtUm2IioH3nZsIg,3254
|
151
158
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=eD3Y9sOwHFcgVdOfya8KxPhvLx_b4whfEWm4d8Y2HW8,3268
|
152
159
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=KIfpZydSl31FOEqq0EBfxTyWRj1QTDwTjkPHFjNO3_A,3253
|
153
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=
|
154
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=
|
155
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json",sha256=
|
160
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=Hh3sfBOR5j3Hz9O2yoOw56A8KAYZabqWTr1uXem4vvs,4733
|
161
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=Hh3sfBOR5j3Hz9O2yoOw56A8KAYZabqWTr1uXem4vvs,4733
|
162
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=AMD_Radeon_Graphics.json",sha256=Hh3sfBOR5j3Hz9O2yoOw56A8KAYZabqWTr1uXem4vvs,4733
|
156
163
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
|
157
164
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=LD4Z5MRR5Ivi4bYB5hMgymtvmFyVJwq6gmehA7fzecc,3271
|
158
165
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=GLIH4egg-pE-NWU5XqKuJCoRXciHN6GSc3NaE4PaeYg,3261
|
159
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=
|
160
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=
|
161
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json",sha256=
|
166
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=zcNvXyWveWBefuCTviItdnkFAUi3ou30PHpkktwodQM,4730
|
167
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=zcNvXyWveWBefuCTviItdnkFAUi3ou30PHpkktwodQM,4730
|
168
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=AMD_Radeon_Graphics.json",sha256=zcNvXyWveWBefuCTviItdnkFAUi3ou30PHpkktwodQM,4730
|
162
169
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
|
163
170
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
|
164
171
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
|
@@ -169,9 +176,9 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
|
|
169
176
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
|
170
177
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=4O4VeMpgFNrqWyWqWgYgcYAgBQnOlAXvt26CRSXK-sY,3270
|
171
178
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=qfjbXqbl902TuiyzzomUy2sMvs-Dud8ZphDRY5WIPBM,3260
|
172
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=
|
173
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=
|
174
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json",sha256=
|
179
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=ZnslvT6HljcanWwYjF8nthwqr_h4tjyb_wELBdcGD48,4732
|
180
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=ZnslvT6HljcanWwYjF8nthwqr_h4tjyb_wELBdcGD48,4732
|
181
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=AMD_Radeon_Graphics.json",sha256=ZnslvT6HljcanWwYjF8nthwqr_h4tjyb_wELBdcGD48,4732
|
175
182
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
|
176
183
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
|
177
184
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4Q_-yITMfijOMoguUM2n96clARh-DUFsS-4oW_a3Jpc,3252
|
@@ -180,58 +187,70 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
|
|
180
187
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=BXjSVGdvgP_-7xTvbHOO6ZrXWe0qSXiQChxoHGgWL7o,3263
|
181
188
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=Pi2coJlJlpgqXiPRd77B_eCmmi7sCdBuoSGK1RA5YO8,3258
|
182
189
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=p2qlRhTt7owWB8keEmoCrPZpo39IAxsKnULFQ7R38SI,3873
|
183
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=
|
184
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=
|
185
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=
|
190
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=hdWYpkBWfG9kwUYco1VNjwU_S65Coat1uzfEJl33UfY,4409
|
191
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=hdWYpkBWfG9kwUYco1VNjwU_S65Coat1uzfEJl33UfY,4409
|
192
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=hdWYpkBWfG9kwUYco1VNjwU_S65Coat1uzfEJl33UfY,4409
|
186
193
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
|
187
194
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
|
188
195
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
|
189
196
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=rR8b-OuQ3watb8b2zuNlxKDSZpzlAagm9nb-FdKkt7s,3270
|
190
197
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=8G_QqV_DhvZ6xSavMSpeE6qcXPVpsVjEtJabydybKqY,3263
|
191
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=
|
192
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=
|
193
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json",sha256=
|
198
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=hW48t9J7ptdtil6MKMSfXV-HQ8Eba3SLOgScNuwseSg,4733
|
199
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=hW48t9J7ptdtil6MKMSfXV-HQ8Eba3SLOgScNuwseSg,4733
|
200
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=AMD_Radeon_Graphics.json",sha256=hW48t9J7ptdtil6MKMSfXV-HQ8Eba3SLOgScNuwseSg,4733
|
194
201
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
|
195
202
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
|
196
203
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
197
204
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=3YQakSmUKhpw1KO7Hn-tEc-yyD1fEj01_6JlSYnrrlI,3274
|
198
205
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=W2ka_U8pzwjzX62NEGKXR32uuSR_zfHD1XjXYf5bgBs,3262
|
199
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256
|
200
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256
|
201
|
-
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256
|
206
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
|
207
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
|
208
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json",sha256=-RzUWSIAAsg6iA-8SPMa68hPpBVoUyMJs3dLP7edRu0,4323
|
202
209
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
203
210
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
204
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
211
|
+
sglang/srt/layers/quantization/__init__.py,sha256=QS3lOGK5pP5e27lLDu3nPoLtrcOlzL17vneKoWVX3Rs,9760
|
205
212
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
206
|
-
sglang/srt/layers/quantization/
|
213
|
+
sglang/srt/layers/quantization/blockwise_int8.py,sha256=xLn2dApnxAHo6UFqtDe67CDDN3DxcXmwH5cERFNme_0,14985
|
214
|
+
sglang/srt/layers/quantization/fp8.py,sha256=KBWTZXz74gqAHAOo0PRv1PT4RCF0ymEFq8u0dKHYfbI,37228
|
207
215
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=rbuoOhgpA8_sWE5Tm3C9m0YmLqUSSBKKunLiAnHhh6c,19300
|
208
|
-
sglang/srt/layers/quantization/fp8_utils.py,sha256=
|
209
|
-
sglang/srt/layers/quantization/
|
210
|
-
sglang/srt/layers/quantization/
|
216
|
+
sglang/srt/layers/quantization/fp8_utils.py,sha256=ju4JIYatz3hOv-n5XCAh7V6QvOLFzRbceNuuXaXSwac,5815
|
217
|
+
sglang/srt/layers/quantization/gptq.py,sha256=u-WadiLnVbeskCU8Ar-1IYofraOf1caYkGrUFFYC27k,14651
|
218
|
+
sglang/srt/layers/quantization/int8_kernel.py,sha256=GfRn_imIw8kNgqdtb2lr7BettjgDgimbl1Rubnamjh8,11352
|
219
|
+
sglang/srt/layers/quantization/int8_utils.py,sha256=YK9CS-lb_n91kNCTKK5o5apYF31V2giDg5G5VKrpcUA,2356
|
220
|
+
sglang/srt/layers/quantization/modelopt_quant.py,sha256=IfLPea7K99Z92_GX1AvP0_wCiOJWZSqzRTG1EDnat_E,6729
|
211
221
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
|
222
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=RdHQxWXwXqvio31192vsLaKjEr4f_DjpMPKlarY1IAk,3251
|
212
223
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
213
224
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=CPo1WRF0HgsQMPBkvpoImElQMrfwpJLhEvL86e6fkPU,3247
|
214
225
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9qdyh6ki9LAyq7VDO9WMRmBOPWKSrZhU-I7z1E9bTKA,550
|
215
226
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
216
227
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
217
228
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9ya9f1Nt0g0RD-6sRRBZOaIPLSpSFZCz7jNvqTPrgFE,3732
|
229
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=3E-LXaW1FPoTiCPBurm7U2SV4KmWi4xmqwdCjHvZkkA,3250
|
218
230
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
219
231
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=UZljnxxCSjwnZlX3OgKWZJGXCf5BWF_agEpNX8I4Zxc,3248
|
220
232
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
221
233
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6POXzQZHdNwcBDv1w6BJKbLMRDt0jbFUuMsMNf-ToEs,549
|
234
|
+
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=1G2RuKNS88mjD76ZhXFwR-LBhKaqltupGVJQdhsKwJo,3250
|
222
235
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
223
236
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3matoCzEe4aexwoe7YTmkjyE4NA8khWXjL5EySuNwzA,3254
|
224
237
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
|
225
238
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=JeXNLkbMAjdDKV-WpzQy87SXN06towo3xUofLtvYCQI,551
|
239
|
+
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=k81ilt1195nP4r197W7cZonPJ5f2Z5AtSwUZjG2nMOE,3243
|
226
240
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
|
227
241
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9f8Ib4gLEFSfdNpO8IL8uiONImvqnlPbJrZ0HM3OB-o,3247
|
228
242
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
|
229
243
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Boa83ZSPZ0LvzqtfLGvois5QK4TmJfwjA2n96c9ET58,549
|
244
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
245
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
246
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
247
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=fmbzpCd0iyVAjrnUkALmq_RkwuzV_VnaplbS1Mj_csk,3261
|
230
248
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
|
231
249
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
232
250
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tG5_iVeRBHTgHX-liOf79nWRjj_lUZ-NQWTbBrBgORQ,3246
|
233
251
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
234
252
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9tCZxJ0eAD7AYMH7OqS3AGppJUllKnJLNvMq7FMXdsA,552
|
253
|
+
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=smT1Yg8fVLAzRqQubCCxirWJ9KLbwEqCt9vP_doPv_o,3246
|
235
254
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
236
255
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
|
237
256
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=QMVfMXS0Yjgob8_9xps1xuZi6KnY5l2MeKxXLRjTeg4,548
|
@@ -243,6 +262,10 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
243
262
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
|
244
263
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
|
245
264
|
"sglang/srt/layers/quantization/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
|
265
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
266
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
267
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
268
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=7PaawDEmgCGOKnf5l8oVoCEWx9l6APn25ndZ8Yrtar4,3257
|
246
269
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
|
247
270
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
|
248
271
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=B0lo3SuoQXhBEnojH2TwpVeurvlKD8yI8kQrJ5ORhWU,3249
|
@@ -270,27 +293,36 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
270
293
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
|
271
294
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xsFMrq4aybClfJyhm78c1Hf1jcyFSGnfygdHYp7OhSQ,3727
|
272
295
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=A4uzWJTNhyNVh7ntOvUpT0TheaEVu_js0NCNdav8mTs,3730
|
296
|
+
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=uh2HLUNRGIqNulVSJrhamvMo_uExHA-S2okQd6rHB8Y,3247
|
273
297
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
|
274
298
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
275
299
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
|
276
300
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
277
301
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FcuzcmKFf2RbaUpAaAsuObUefcGMgNPMDbVdHXRkoGY,549
|
302
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=YKPb9yvuMTBy2mnelxrN0eYcufaMH1ZgNx7_0gGEROI,3259
|
278
303
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
279
304
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=h32lCTFilLlyKbMeuJvNWG1v0yJJzNj93kwSvlrHfaY,3249
|
280
305
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
|
281
306
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kjQ_kvF38bZGcmaeJGSJsSR0NcUjUOh3LZ2-5c4kPvE,550
|
307
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=DM908j7iJKK0eLDR8R2aBmZC_zmMjWc2LXxLktYtqAc,3254
|
282
308
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
|
283
309
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0jX-z2lTgVw7ABLmWsIsQdqW4EjmbXKRDHye_XPLCAE,3245
|
284
310
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
|
285
311
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=mZGU39sobtUqNYKjtyIGjhOZyCOQFJMF3MinA1zjTJA,550
|
312
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=UkXzxGGVXuq3ymqaXb1QEqnqXcXBN-mFvZDZoKHH5kE,3258
|
286
313
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
|
287
314
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_exM3wJ3FMmGHweBcH-8IxwZBzaOmPaF3ScMM6KDpiY,3253
|
288
315
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ue2oWml2ouUTZelYx5Nt5pgCmY-ib3mLV1reJL9ZudE,550
|
316
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
317
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
318
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
319
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=2t1lmnh4Fn67YSq4naP2g_RqYC0VtsVgTw5GS14A__w,3258
|
289
320
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
|
290
321
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
|
291
322
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENRWYdUwI0ooHb6IwcHliupRWOPnw-7-WtxZB-qQGJI,3245
|
292
323
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
293
324
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Nv9KP_KLGsRJdJF755dZBvbTws37u1GM2UigMRlAtl0,552
|
325
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json",sha256=v2VwGLYvrYy3Nfr7CiOksjaR-XbwHu21RsXZ3J6_yfI,3258
|
294
326
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
|
295
327
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
296
328
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TdWuE2RIsIyr4Im24MuWK3XyiNtbhO_hAiAXDz5gNUk,3246
|
@@ -314,9 +346,9 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
314
346
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
|
315
347
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
316
348
|
sglang/srt/lora/layers.py,sha256=r34oprzwyE3SWPvaNkBvXWPtfa-0IY987_bjj36ySfw,9996
|
317
|
-
sglang/srt/lora/lora.py,sha256=
|
318
|
-
sglang/srt/lora/lora_config.py,sha256=
|
319
|
-
sglang/srt/lora/lora_manager.py,sha256
|
349
|
+
sglang/srt/lora/lora.py,sha256=07-IaAfbb3zPJ-DukHL3uyQ8fjJx_hrVFHS2pqe8LZg,8238
|
350
|
+
sglang/srt/lora/lora_config.py,sha256=qDgMTx_69jyJUl29O5FxLzYa0BMhqYVXWXfyyVOvGm0,1684
|
351
|
+
sglang/srt/lora/lora_manager.py,sha256=Wlq5dxLM7Uj4uTGpFXH1q-IOI8j4mFXYHPKSltx2QMI,7794
|
320
352
|
sglang/srt/lora/mem_pool.py,sha256=eV_GXETxNODPVIAnTEeUUUVn0IVgguBR_mYFzIK-VHA,6835
|
321
353
|
sglang/srt/lora/utils.py,sha256=6i7Q1Y-1LLbRkeCMv_lKIzkTN0veUTLbc8wlHn7R-bA,4571
|
322
354
|
sglang/srt/lora/backend/__init__.py,sha256=98L_KRRnE3gcGcx7Lb6yjAEUUE_Yay3QszcQXdzYsDw,708
|
@@ -328,101 +360,103 @@ sglang/srt/lora/triton_ops/gate_up_lora_b.py,sha256=qve4oNZHYUFk9ckmT2BVuDNMEvrN
|
|
328
360
|
sglang/srt/lora/triton_ops/qkv_lora_b.py,sha256=BmIcTZMnlSnie9rnMl4KvLpc4Njsk7_IppbUqitf9Xw,5738
|
329
361
|
sglang/srt/lora/triton_ops/sgemm_lora_a.py,sha256=kv-AvJ_Bi3yWjGvFnSwXvP66iJvY9n9pEnJzJ9-DWzo,3982
|
330
362
|
sglang/srt/lora/triton_ops/sgemm_lora_b.py,sha256=Ai5vPriT4OgACwK7xrpGgf5L1oaN9x0jwNKMChu3uI0,4299
|
331
|
-
sglang/srt/managers/cache_controller.py,sha256=
|
332
|
-
sglang/srt/managers/configure_logging.py,sha256=
|
333
|
-
sglang/srt/managers/data_parallel_controller.py,sha256=
|
334
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
335
|
-
sglang/srt/managers/image_processor.py,sha256=
|
336
|
-
sglang/srt/managers/io_struct.py,sha256=
|
337
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
338
|
-
sglang/srt/managers/schedule_policy.py,sha256=
|
339
|
-
sglang/srt/managers/scheduler.py,sha256=
|
340
|
-
sglang/srt/managers/session_controller.py,sha256=
|
341
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
342
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
343
|
-
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=
|
363
|
+
sglang/srt/managers/cache_controller.py,sha256=8idtERyZayP5rJZBcdBSnoJaB7FmeDdhgNydwetxa5E,15588
|
364
|
+
sglang/srt/managers/configure_logging.py,sha256=fOJaXAQ1n9m-8KPJndpsKvS885i69SMafoEADLIVfIM,1633
|
365
|
+
sglang/srt/managers/data_parallel_controller.py,sha256=K6jwHn_UhsC7o_lZT5FQm9oKBbsYhlHKiDSutFlm3jA,9539
|
366
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=HTfpJWMF1EImhKOnLJ96xPmYXm71xzaisLMfxg3zpgs,10111
|
367
|
+
sglang/srt/managers/image_processor.py,sha256=NQnhbV8AYr9H6wTKulxcBd6cauBlkySNSDQzk6nKs74,23906
|
368
|
+
sglang/srt/managers/io_struct.py,sha256=ltod5DPNQBJalrz420mcSZReSV1EmYQF9Edh_FiDMf4,22047
|
369
|
+
sglang/srt/managers/schedule_batch.py,sha256=oYrAtlDUjiekQ8xTSnR15i19SJcNyfbiy61PBJpbMi0,52241
|
370
|
+
sglang/srt/managers/schedule_policy.py,sha256=CTN9bggZ8_EiJJYJwEE6rSlfSrrVjka-hadXcQn10HM,18321
|
371
|
+
sglang/srt/managers/scheduler.py,sha256=Kd80MjJEKxSfsETlacUtOZmZtF5iUI-ED3d8G8QcvHo,92144
|
372
|
+
sglang/srt/managers/session_controller.py,sha256=YOv8cFcuVmdCE4OfQJ6aA5AosHRwtZF9WdUUfUXEp0I,5753
|
373
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=Wg_W42A04WBVTDUpRYpvIW2-RK-AiMwZ9-ytCN_2PjM,44750
|
374
|
+
sglang/srt/managers/tp_worker.py,sha256=o9MY1a8x81nI3W0m64YvOXcAA4sCBvPPmnNU67vBXGs,8710
|
375
|
+
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=bFt8L6H1Tz3QWYdVx9Hl9vUbxY8xmAiaMEMFzCwd7Nk,9022
|
344
376
|
sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
|
345
377
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qxgpSHm3qtMdab4U35Mr2BE9TQNjElrnrNMTwL_Osdo,1049
|
346
|
-
sglang/srt/mem_cache/chunk_cache.py,sha256=
|
378
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=gsXvfCkMRLHIvInFopoSGoDxCDd_um7VDPZaWpUF0v4,2696
|
347
379
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
348
|
-
sglang/srt/mem_cache/
|
349
|
-
sglang/srt/mem_cache/
|
350
|
-
sglang/srt/
|
380
|
+
sglang/srt/mem_cache/hiradix_cache.py,sha256=x8cX2d88QjbEjqyOF3vT67lyFSdLem8AYAxej9NF5Jw,14256
|
381
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=ovqpob1luqfzg3qckANwUnEcpLTG-DJcULIZQyeFlVQ,20687
|
382
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=LbdorXpJ42mmNv9y_Mll0v_c5hgy0LraDCn8vK78QnA,12161
|
383
|
+
sglang/srt/metrics/collector.py,sha256=0X40ZZ18182sx2t0eqeqoK7gspH36L343zNvSkgBvd0,9293
|
351
384
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
352
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
353
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
354
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
385
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=l4coS7de4zwYbxuBokZ3MyLZUx9ygGW1BqqqMCJGNtQ,20010
|
386
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=8x3y5rCMotL8iSoSG3YMd77bI6mZOoisbHIbJcBZT04,16809
|
387
|
+
sglang/srt/model_executor/model_runner.py,sha256=b1NEnYaKbg_w6fu2cjdX_YdMjkpwRgBgfMf86T1JJ7w,41727
|
355
388
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
356
|
-
sglang/srt/model_loader/loader.py,sha256=
|
389
|
+
sglang/srt/model_loader/loader.py,sha256=zDmZ7NZ9Z44EDR2H8d3bKadZD4Ey7Jmju8wBnWhM1FI,46805
|
357
390
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
358
|
-
sglang/srt/model_loader/weight_utils.py,sha256=
|
359
|
-
sglang/srt/models/baichuan.py,sha256=
|
360
|
-
sglang/srt/models/chatglm.py,sha256=
|
361
|
-
sglang/srt/models/commandr.py,sha256=
|
362
|
-
sglang/srt/models/dbrx.py,sha256
|
363
|
-
sglang/srt/models/deepseek.py,sha256=
|
364
|
-
sglang/srt/models/deepseek_nextn.py,sha256=
|
365
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
366
|
-
sglang/srt/models/exaone.py,sha256=
|
367
|
-
sglang/srt/models/gemma.py,sha256=
|
368
|
-
sglang/srt/models/gemma2.py,sha256=
|
369
|
-
sglang/srt/models/gemma2_reward.py,sha256=
|
370
|
-
sglang/srt/models/gpt2.py,sha256=
|
371
|
-
sglang/srt/models/gpt_bigcode.py,sha256=
|
372
|
-
sglang/srt/models/granite.py,sha256=
|
373
|
-
sglang/srt/models/grok.py,sha256=
|
374
|
-
sglang/srt/models/internlm2.py,sha256=
|
375
|
-
sglang/srt/models/internlm2_reward.py,sha256=
|
376
|
-
sglang/srt/models/llama.py,sha256=
|
377
|
-
sglang/srt/models/llama_classification.py,sha256=
|
378
|
-
sglang/srt/models/llama_eagle.py,sha256=
|
379
|
-
sglang/srt/models/llama_embedding.py,sha256=
|
380
|
-
sglang/srt/models/llama_reward.py,sha256=
|
381
|
-
sglang/srt/models/llava.py,sha256=
|
382
|
-
sglang/srt/models/llavavid.py,sha256=
|
383
|
-
sglang/srt/models/minicpm.py,sha256
|
384
|
-
sglang/srt/models/minicpm3.py,sha256=
|
385
|
-
sglang/srt/models/minicpmv.py,sha256=
|
391
|
+
sglang/srt/model_loader/weight_utils.py,sha256=dWEnDimMFNHbre_QMB9sG8m_L7_t1NAZk4s-vkdpSHQ,30354
|
392
|
+
sglang/srt/models/baichuan.py,sha256=iXgta-W38OWesxmXWZJ73fUvPdu51EwTQzUD5mmfJ8s,15721
|
393
|
+
sglang/srt/models/chatglm.py,sha256=avLC7mjjGskBxCxy-9s0sMlAJjfFoG_y8VieR1QfDsM,13918
|
394
|
+
sglang/srt/models/commandr.py,sha256=Ug-B0QcdWZufrTybC6K5yP3MLKNsYb-vzfrqUsXYUcI,15276
|
395
|
+
sglang/srt/models/dbrx.py,sha256=0Vf4yhqe8YeQuKR3P-agvYJScmHwH3-tFbyU8kv5QJM,15559
|
396
|
+
sglang/srt/models/deepseek.py,sha256=Le2MXij8m4hT7QYgD0bFMFmYhbgX7SMjoXZFB8BxgyA,16871
|
397
|
+
sglang/srt/models/deepseek_nextn.py,sha256=F8GCAsgq8EfW5ykx9pvQzyHPQ9q8xK5WPd8WtPV-6PU,12141
|
398
|
+
sglang/srt/models/deepseek_v2.py,sha256=TwSz-UbxqHpIDxNmipDQTUOoOpxvHhM0IYSmkyRAERY,48544
|
399
|
+
sglang/srt/models/exaone.py,sha256=5iibqQTjpgosuGRt2rj2lWR0ShK2XGhbdFSnOWpaQss,13386
|
400
|
+
sglang/srt/models/gemma.py,sha256=3XxMDOKz4xMP6VzWoW8f0hmMf8LP8fhzMw5prsYC4e8,12602
|
401
|
+
sglang/srt/models/gemma2.py,sha256=MDe_HNkSpEJpw426tbx3fp271GBlSVEuhIdGeOB_jYA,16356
|
402
|
+
sglang/srt/models/gemma2_reward.py,sha256=V8U3_ADUHWPdOwvEe1jhGW-oJmBgL8t1TY3-67Ksv2A,2618
|
403
|
+
sglang/srt/models/gpt2.py,sha256=dAnfmsAL7JVHakryqrERR1jgL8mI1Op6nPHYfDCF7Ao,9802
|
404
|
+
sglang/srt/models/gpt_bigcode.py,sha256=EAN6xAXpa8m3DcBuH1D4rTPji2oG9NSozGXSNHtE2lw,10268
|
405
|
+
sglang/srt/models/granite.py,sha256=nu_Zl_PYn188gk1uYVZ76y4wwHZV7G0w7uanhqpSFUs,20813
|
406
|
+
sglang/srt/models/grok.py,sha256=LYi-1VpGiB4SvFBc3Scp7vQTjiCODa6J_bFMjSOdsCQ,18768
|
407
|
+
sglang/srt/models/internlm2.py,sha256=4eh9WVgK4yg13IsnH5qB2xUCWnixj_aLLz7qa_4m2_Q,13017
|
408
|
+
sglang/srt/models/internlm2_reward.py,sha256=ndfGmyqYZbVZ7C7rJ-v9oK3wa-EpoBGybS8MlyKZi2E,2522
|
409
|
+
sglang/srt/models/llama.py,sha256=FEtMrzjrbQLOOQvrqGFwslizjHPaU--9rEiXlr7MIiI,22958
|
410
|
+
sglang/srt/models/llama_classification.py,sha256=4QWTFaUZIFKYZvEzs8bx8VkOZNIwdYCLrnwrdAw4QK0,3108
|
411
|
+
sglang/srt/models/llama_eagle.py,sha256=4ynCbF2Lp-t0_T036N_Pa_8a1SIPkNeWuEwF7r5x1ls,4819
|
412
|
+
sglang/srt/models/llama_embedding.py,sha256=zq-_lNu35VBFc7eemiam0zdkGIE8fzrgk5OWYfirZnA,3254
|
413
|
+
sglang/srt/models/llama_reward.py,sha256=LF2nqMV5XOrljGjAwJg43mBv3z6Q040I2EYlgZeCp8k,4681
|
414
|
+
sglang/srt/models/llava.py,sha256=J06XzASrhg2Pw1Z_jMoC1lXI4hFfoZacqS2mhiLI41k,26778
|
415
|
+
sglang/srt/models/llavavid.py,sha256=iwqwTruJTG9D1zV95RHq-RpTp1g12kKPrNIfv2a2XyI,12485
|
416
|
+
sglang/srt/models/minicpm.py,sha256=-ot45U_Bv4x85JdbIAQXoxa1sF-ZDkBk8flU-Ruli5Y,14652
|
417
|
+
sglang/srt/models/minicpm3.py,sha256=sRHPFUH636GIY94B-hpAN2MSzYT1pzLPVypTNjUtttY,26270
|
418
|
+
sglang/srt/models/minicpmv.py,sha256=LH3IHnPJJMxPOb3rZuDReE_21aUPA56EvqFUguRcyGY,45948
|
386
419
|
sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
|
387
|
-
sglang/srt/models/mixtral.py,sha256=
|
388
|
-
sglang/srt/models/mixtral_quant.py,sha256=
|
389
|
-
sglang/srt/models/mllama.py,sha256=
|
390
|
-
sglang/srt/models/olmo.py,sha256
|
391
|
-
sglang/srt/models/olmo2.py,sha256=
|
392
|
-
sglang/srt/models/olmoe.py,sha256=
|
393
|
-
sglang/srt/models/phi3_small.py,sha256=
|
394
|
-
sglang/srt/models/qwen.py,sha256=
|
395
|
-
sglang/srt/models/qwen2.py,sha256=
|
396
|
-
sglang/srt/models/qwen2_5_vl.py,sha256=
|
397
|
-
sglang/srt/models/qwen2_eagle.py,sha256=
|
398
|
-
sglang/srt/models/qwen2_moe.py,sha256=
|
399
|
-
sglang/srt/models/
|
420
|
+
sglang/srt/models/mixtral.py,sha256=6Fse2J-20IMylP-yzpEihIinaH37TmmslATbLcWBRYY,14926
|
421
|
+
sglang/srt/models/mixtral_quant.py,sha256=MSa6UKPbgv8Rn8Iv8o1dQhcstAHLNQzE0eepFx_hYSw,15221
|
422
|
+
sglang/srt/models/mllama.py,sha256=SlNDNKAlF42dtkS-JgkLNE8k70YGcGuIeg24FvTs-Us,37886
|
423
|
+
sglang/srt/models/olmo.py,sha256=FJk8A3T3TF5QcTV6rMP8np94QtvxpMWlgCsv_5VwpVE,12632
|
424
|
+
sglang/srt/models/olmo2.py,sha256=U0ScFzWazOrb_Q90sfXkpVNAsXT-pgZbNgGh80R40VE,14288
|
425
|
+
sglang/srt/models/olmoe.py,sha256=tx5OKWLOr6_pohe2eBcIodCmcuSjtpteHq_tG_QVYCY,15910
|
426
|
+
sglang/srt/models/phi3_small.py,sha256=_ZDXVJN3B5f-46MMKxNim9874cVqJpzBipbmfBz6Hn4,15464
|
427
|
+
sglang/srt/models/qwen.py,sha256=edS0UYq6AoHZdYUJtQa5wyFNzZMW0JAMmBulH2uheaw,10719
|
428
|
+
sglang/srt/models/qwen2.py,sha256=KkMd6fxF2dn77RfrH_Ayc4w0uZmOLtwPlsVzDGIMW8g,15980
|
429
|
+
sglang/srt/models/qwen2_5_vl.py,sha256=QQFFPV4t35grzjVdB13SCPxxLiJDXLQ_tBk62DRljq4,29113
|
430
|
+
sglang/srt/models/qwen2_eagle.py,sha256=Iz0HWL2FgSD3FqoFhfYmbIZeEYkPTJ96lYbkncmHJX4,4644
|
431
|
+
sglang/srt/models/qwen2_moe.py,sha256=zYLJecN1mUyMBmnZoVaJd8LUKT4YZPBIO1lfFOqmU-c,17755
|
432
|
+
sglang/srt/models/qwen2_rm.py,sha256=-mQXDEv11p-I1HXgYLTtY6ROem6UYorO958WsDrzsgs,2837
|
433
|
+
sglang/srt/models/qwen2_vl.py,sha256=7xe2aS7V3_0z85ohegpz7DQLU1JN3LgvLpG_miyb-Ms,24520
|
400
434
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
401
|
-
sglang/srt/models/stablelm.py,sha256=
|
402
|
-
sglang/srt/models/torch_native_llama.py,sha256=
|
403
|
-
sglang/srt/models/xverse.py,sha256=
|
404
|
-
sglang/srt/models/xverse_moe.py,sha256=
|
405
|
-
sglang/srt/models/yivl.py,sha256=
|
406
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
407
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
435
|
+
sglang/srt/models/stablelm.py,sha256=w93fNXpDwQbuKi4tdeo0bsXFZrMZVY4_pgNL0E5RErQ,12242
|
436
|
+
sglang/srt/models/torch_native_llama.py,sha256=5tfFSMAXB3ScToqTALtCXa8Oo-qPCJh-KQCNB6QOlNA,19293
|
437
|
+
sglang/srt/models/xverse.py,sha256=I7ivNsk6NRqPxlMUmdclpzDCvhAnWbv_GOj01MKHJrQ,13996
|
438
|
+
sglang/srt/models/xverse_moe.py,sha256=xLwn5pRwQrvj7zMmwl3o49m7xILb2ACRdWvm9hY8LDc,16743
|
439
|
+
sglang/srt/models/yivl.py,sha256=oToK7-u5IGO7xwpJIQ7VtudlK6-zPqJX4bt6_wv0SH8,4850
|
440
|
+
sglang/srt/openai_api/adapter.py,sha256=FqYLICnYL53UwJT2OJPPJrMJxK-p372-cFl0TmZOQBE,66142
|
441
|
+
sglang/srt/openai_api/protocol.py,sha256=KLh9u9On595L7EDXmg76X8mhCw4yLDUjY7zvdjlwPWM,12336
|
408
442
|
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
409
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
410
|
-
sglang/srt/sampling/sampling_params.py,sha256=
|
411
|
-
sglang/srt/sampling/penaltylib/__init__.py,sha256=
|
412
|
-
sglang/srt/sampling/penaltylib/
|
413
|
-
sglang/srt/sampling/penaltylib/
|
414
|
-
sglang/srt/sampling/penaltylib/
|
415
|
-
sglang/srt/sampling/penaltylib/
|
416
|
-
sglang/srt/
|
417
|
-
sglang/srt/speculative/
|
418
|
-
sglang/srt/speculative/
|
419
|
-
sglang/srt/speculative/
|
420
|
-
sglang/srt/speculative/
|
421
|
-
sglang/
|
422
|
-
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
443
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=T4UJ_CRB6A0HnRwEsqmxy1CJQMwZaVdxbdlCeTK4BUI,11992
|
444
|
+
sglang/srt/sampling/sampling_params.py,sha256=HihGfhdR4FmOeltEqpW2kSLfNu94VCd8l0RNOQFSl-Q,5919
|
445
|
+
sglang/srt/sampling/penaltylib/__init__.py,sha256=mtN8grFEcaBUhl4yBHmw8NNirt_i6uKO2cDNLHOpZQE,496
|
446
|
+
sglang/srt/sampling/penaltylib/frequency_penalty.py,sha256=wdWLmhYnJkuS5qjFGbSLsWBvmYyKR77HIKFl5T_aavY,2232
|
447
|
+
sglang/srt/sampling/penaltylib/min_new_tokens.py,sha256=rdU_D7RoIcrQPhysNQEzmr4TO2OoEi___p-i3QdwkgU,3331
|
448
|
+
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=XM-Lm6u7gYPtMZrTIc0FR4QxNZxBH5s_Cj82umyCzYk,5721
|
449
|
+
sglang/srt/sampling/penaltylib/presence_penalty.py,sha256=ZU18IAOSd1qNcZdsP47TS-gjM-jVq9s8YuQWhUZ7xZI,2205
|
450
|
+
sglang/srt/speculative/build_eagle_tree.py,sha256=8_uUpkQAE0qcn5mA6NPjfl81EMuNxg4fZq628wjEZNU,20805
|
451
|
+
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=zsF4AcsykaFjzZ5SKdvUQyEB1GVXTvR1_kI_oaQafBo,7704
|
452
|
+
sglang/srt/speculative/eagle_utils.py,sha256=DhpNNNN56jw0ucXSBYq9_IzDuJmEKgXS6lbLczaghNs,27196
|
453
|
+
sglang/srt/speculative/eagle_worker.py,sha256=05bUiqQdO6B9dwfSW1BOEtez6HX24OHwJfm6bFPJa7I,18807
|
454
|
+
sglang/srt/speculative/spec_info.py,sha256=hJR0b3gZ0QA0KZLq6TfqSDJkmpV6mThyle3sHYI4o4M,522
|
455
|
+
sglang/test/few_shot_gsm8k.py,sha256=7VLbWl4nCQs1wjtW4q-46jf9jUCycSs5Iw8v7sUSzBw,4284
|
423
456
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
424
457
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
425
|
-
sglang/test/runners.py,sha256=
|
458
|
+
sglang/test/runners.py,sha256=HjMMlQtyYSzVYE9vHYaaWW5KEEJg6D5nklQIgvAsuFc,25189
|
459
|
+
sglang/test/send_one.py,sha256=6FhbJ3c8RpXxvFTELRXaF97GpT7zXXsCDYZh1DqG22E,2550
|
426
460
|
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
427
461
|
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
428
462
|
sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
|
@@ -431,12 +465,12 @@ sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylp
|
|
431
465
|
sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
|
432
466
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
433
467
|
sglang/test/test_block_fp8.py,sha256=rhrIun8aW5zq2qvuGRlo7F7aZ_upjVxtQMVlyc2Th_E,11771
|
468
|
+
sglang/test/test_block_fp8_ep.py,sha256=hkuQjmCv3y_hWZj21cT9EaB6KSfT3JSzYPRQNFaLP-Q,10759
|
434
469
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
435
|
-
sglang/test/test_programs.py,sha256=
|
436
|
-
sglang/test/test_utils.py,sha256=
|
437
|
-
sglang/
|
438
|
-
sglang-0.4.3.
|
439
|
-
sglang-0.4.3.
|
440
|
-
sglang-0.4.3.
|
441
|
-
sglang-0.4.3.
|
442
|
-
sglang-0.4.3.post2.dist-info/RECORD,,
|
470
|
+
sglang/test/test_programs.py,sha256=VZ3vXtUDBnXz0M7gFdDH8hXg9Wa0j_qI8CVqjEgRN_E,18877
|
471
|
+
sglang/test/test_utils.py,sha256=jVkIdnzkQ5ZGynWkfFMd6GLonJwq_2a6iZAvPLUFlZo,28549
|
472
|
+
sglang-0.4.3.post4.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
473
|
+
sglang-0.4.3.post4.dist-info/METADATA,sha256=I3MCfxnWrPkIIQdq1rdqEqm0_1QkyR_QmVZ-jcIbHZU,24409
|
474
|
+
sglang-0.4.3.post4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
475
|
+
sglang-0.4.3.post4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
476
|
+
sglang-0.4.3.post4.dist-info/RECORD,,
|