sglang 0.3.5.post2__py3-none-any.whl → 0.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +2 -2
- sglang/api.py +2 -2
- sglang/bench_latency.py +1 -553
- sglang/bench_offline_throughput.py +48 -20
- sglang/bench_one_batch.py +472 -0
- sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
- sglang/bench_serving.py +125 -6
- sglang/check_env.py +3 -6
- sglang/lang/backend/base_backend.py +1 -1
- sglang/lang/backend/runtime_endpoint.py +2 -2
- sglang/srt/configs/model_config.py +13 -14
- sglang/srt/constrained/__init__.py +13 -14
- sglang/srt/constrained/base_grammar_backend.py +13 -15
- sglang/srt/constrained/outlines_backend.py +28 -17
- sglang/srt/constrained/outlines_jump_forward.py +13 -15
- sglang/srt/constrained/xgrammar_backend.py +47 -58
- sglang/srt/conversation.py +13 -15
- sglang/srt/hf_transformers_utils.py +13 -15
- sglang/srt/layers/activation.py +16 -13
- sglang/srt/layers/attention/flashinfer_backend.py +106 -54
- sglang/srt/layers/attention/triton_backend.py +9 -7
- sglang/srt/layers/attention/triton_ops/decode_attention.py +51 -55
- sglang/srt/layers/attention/triton_ops/extend_attention.py +16 -16
- sglang/srt/layers/attention/triton_ops/prefill_attention.py +13 -15
- sglang/srt/layers/custom_op_util.py +25 -0
- sglang/srt/layers/fused_moe_grok/__init__.py +1 -0
- sglang/srt/layers/{fused_moe → fused_moe_grok}/fused_moe.py +11 -4
- sglang/srt/layers/{fused_moe → fused_moe_grok}/layer.py +4 -9
- sglang/srt/layers/{fused_moe/patch.py → fused_moe_patch.py} +5 -0
- sglang/srt/layers/fused_moe_triton/__init__.py +44 -0
- sglang/srt/layers/fused_moe_triton/fused_moe.py +861 -0
- sglang/srt/layers/fused_moe_triton/layer.py +633 -0
- sglang/srt/layers/layernorm.py +17 -15
- sglang/srt/layers/logits_processor.py +23 -25
- sglang/srt/layers/quantization/__init__.py +77 -17
- sglang/srt/layers/radix_attention.py +13 -15
- sglang/srt/layers/rotary_embedding.py +13 -13
- sglang/srt/layers/sampler.py +4 -8
- sglang/srt/layers/torchao_utils.py +2 -0
- sglang/srt/lora/lora.py +13 -14
- sglang/srt/lora/lora_config.py +13 -14
- sglang/srt/lora/lora_manager.py +22 -24
- sglang/srt/managers/data_parallel_controller.py +98 -27
- sglang/srt/managers/detokenizer_manager.py +13 -15
- sglang/srt/managers/io_struct.py +63 -21
- sglang/srt/managers/schedule_batch.py +154 -59
- sglang/srt/managers/schedule_policy.py +18 -16
- sglang/srt/managers/scheduler.py +278 -109
- sglang/srt/managers/session_controller.py +61 -0
- sglang/srt/managers/tokenizer_manager.py +63 -18
- sglang/srt/managers/tp_worker.py +25 -16
- sglang/srt/managers/tp_worker_overlap_thread.py +62 -67
- sglang/srt/metrics/collector.py +13 -15
- sglang/srt/metrics/func_timer.py +13 -15
- sglang/srt/mm_utils.py +13 -14
- sglang/srt/model_executor/cuda_graph_runner.py +63 -25
- sglang/srt/model_executor/forward_batch_info.py +128 -32
- sglang/srt/model_executor/model_runner.py +132 -64
- sglang/srt/model_parallel.py +98 -0
- sglang/srt/models/chatglm.py +15 -16
- sglang/srt/models/commandr.py +15 -16
- sglang/srt/models/dbrx.py +15 -16
- sglang/srt/models/deepseek.py +15 -15
- sglang/srt/models/deepseek_v2.py +162 -59
- sglang/srt/models/exaone.py +14 -15
- sglang/srt/models/gemma.py +14 -14
- sglang/srt/models/gemma2.py +31 -25
- sglang/srt/models/gemma2_reward.py +13 -14
- sglang/srt/models/gpt_bigcode.py +14 -14
- sglang/srt/models/grok.py +15 -15
- sglang/srt/models/internlm2.py +13 -15
- sglang/srt/models/internlm2_reward.py +13 -14
- sglang/srt/models/llama.py +21 -21
- sglang/srt/models/llama_classification.py +13 -14
- sglang/srt/models/llama_reward.py +13 -14
- sglang/srt/models/llava.py +14 -16
- sglang/srt/models/llavavid.py +14 -16
- sglang/srt/models/minicpm.py +13 -15
- sglang/srt/models/minicpm3.py +13 -15
- sglang/srt/models/mistral.py +13 -15
- sglang/srt/models/mixtral.py +15 -15
- sglang/srt/models/mixtral_quant.py +14 -14
- sglang/srt/models/olmo.py +22 -20
- sglang/srt/models/olmoe.py +23 -20
- sglang/srt/models/phi3_small.py +447 -0
- sglang/srt/models/qwen.py +14 -14
- sglang/srt/models/qwen2.py +22 -19
- sglang/srt/models/qwen2_moe.py +17 -18
- sglang/srt/models/qwen2_vl.py +13 -6
- sglang/srt/models/stablelm.py +18 -16
- sglang/srt/models/torch_native_llama.py +107 -93
- sglang/srt/models/xverse.py +13 -14
- sglang/srt/models/xverse_moe.py +15 -16
- sglang/srt/models/yivl.py +13 -15
- sglang/srt/openai_api/adapter.py +19 -17
- sglang/srt/openai_api/protocol.py +14 -16
- sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
- sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
- sglang/srt/sampling/sampling_batch_info.py +61 -57
- sglang/srt/sampling/sampling_params.py +14 -16
- sglang/srt/server.py +86 -35
- sglang/srt/server_args.py +96 -80
- sglang/srt/utils.py +266 -68
- sglang/test/few_shot_gsm8k.py +8 -4
- sglang/test/runners.py +38 -20
- sglang/test/srt/sampling/penaltylib/utils.py +23 -21
- sglang/test/test_utils.py +31 -20
- sglang/version.py +1 -1
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/LICENSE +1 -1
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/METADATA +66 -57
- sglang-0.3.6.post1.dist-info/RECORD +164 -0
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/WHEEL +1 -1
- sglang/srt/layers/fused_moe/__init__.py +0 -1
- sglang-0.3.5.post2.dist-info/RECORD +0 -156
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.post1.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import asyncio
|
5
|
+
import copy
|
5
6
|
import os
|
6
7
|
import random
|
7
8
|
import subprocess
|
@@ -43,7 +44,7 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8
|
|
43
44
|
|
44
45
|
def is_in_ci():
|
45
46
|
"""Return whether it is in CI runner."""
|
46
|
-
return os.getenv("SGLANG_IS_IN_CI", "false") == "true"
|
47
|
+
return os.getenv("SGLANG_IS_IN_CI", "false").lower() == "true"
|
47
48
|
|
48
49
|
|
49
50
|
if is_in_ci():
|
@@ -438,18 +439,22 @@ def popen_launch_server(
|
|
438
439
|
process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
|
439
440
|
|
440
441
|
start_time = time.time()
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
442
|
+
with requests.Session() as session:
|
443
|
+
while time.time() - start_time < timeout:
|
444
|
+
try:
|
445
|
+
headers = {
|
446
|
+
"Content-Type": "application/json; charset=utf-8",
|
447
|
+
"Authorization": f"Bearer {api_key}",
|
448
|
+
}
|
449
|
+
response = session.get(
|
450
|
+
f"{base_url}/health_generate",
|
451
|
+
headers=headers,
|
452
|
+
)
|
453
|
+
if response.status_code == 200:
|
454
|
+
return process
|
455
|
+
except requests.RequestException:
|
456
|
+
pass
|
457
|
+
time.sleep(10)
|
453
458
|
raise TimeoutError("Server failed to start within the timeout period.")
|
454
459
|
|
455
460
|
|
@@ -529,6 +534,7 @@ def run_bench_serving(
|
|
529
534
|
random_input_len=4096,
|
530
535
|
random_output_len=2048,
|
531
536
|
disable_stream=False,
|
537
|
+
need_warmup=False,
|
532
538
|
):
|
533
539
|
# Launch the server
|
534
540
|
base_url = DEFAULT_URL_FOR_TEST
|
@@ -562,9 +568,14 @@ def run_bench_serving(
|
|
562
568
|
disable_stream=disable_stream,
|
563
569
|
disable_ignore_eos=False,
|
564
570
|
extra_request_body=None,
|
571
|
+
profile=None,
|
565
572
|
)
|
566
573
|
|
567
574
|
try:
|
575
|
+
if need_warmup:
|
576
|
+
warmup_args = copy.deepcopy(args)
|
577
|
+
warmup_args.num_prompts = 16
|
578
|
+
run_benchmark(warmup_args)
|
568
579
|
res = run_benchmark(args)
|
569
580
|
finally:
|
570
581
|
kill_child_process(process.pid, include_self=True)
|
@@ -573,11 +584,11 @@ def run_bench_serving(
|
|
573
584
|
return res
|
574
585
|
|
575
586
|
|
576
|
-
def
|
587
|
+
def run_bench_one_batch(model, other_args):
|
577
588
|
command = [
|
578
589
|
"python3",
|
579
590
|
"-m",
|
580
|
-
"sglang.
|
591
|
+
"sglang.bench_one_batch",
|
581
592
|
"--model-path",
|
582
593
|
model,
|
583
594
|
"--batch-size",
|
@@ -664,7 +675,7 @@ def run_and_check_memory_leak(
|
|
664
675
|
workload_func,
|
665
676
|
disable_radix_cache,
|
666
677
|
enable_mixed_chunk,
|
667
|
-
|
678
|
+
disable_overlap,
|
668
679
|
chunked_prefill_size,
|
669
680
|
):
|
670
681
|
other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
|
@@ -672,8 +683,8 @@ def run_and_check_memory_leak(
|
|
672
683
|
other_args += ["--disable-radix-cache"]
|
673
684
|
if enable_mixed_chunk:
|
674
685
|
other_args += ["--enable-mixed-chunk"]
|
675
|
-
if
|
676
|
-
other_args += ["--
|
686
|
+
if disable_overlap:
|
687
|
+
other_args += ["--disable-overlap-schedule"]
|
677
688
|
|
678
689
|
model = DEFAULT_MODEL_NAME_FOR_TEST
|
679
690
|
port = random.randint(4000, 5000)
|
@@ -725,7 +736,7 @@ def run_and_check_memory_leak(
|
|
725
736
|
def run_mmlu_test(
|
726
737
|
disable_radix_cache=False,
|
727
738
|
enable_mixed_chunk=False,
|
728
|
-
|
739
|
+
disable_overlap=False,
|
729
740
|
chunked_prefill_size=32,
|
730
741
|
):
|
731
742
|
def workload_func(base_url, model):
|
@@ -748,7 +759,7 @@ def run_mmlu_test(
|
|
748
759
|
workload_func,
|
749
760
|
disable_radix_cache,
|
750
761
|
enable_mixed_chunk,
|
751
|
-
|
762
|
+
disable_overlap,
|
752
763
|
chunked_prefill_size,
|
753
764
|
)
|
754
765
|
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.3.
|
1
|
+
__version__ = "0.3.6.post1"
|
@@ -186,7 +186,7 @@
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
187
187
|
identification within third-party archives.
|
188
188
|
|
189
|
-
Copyright
|
189
|
+
Copyright 2023-2024 SGLang Team
|
190
190
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
192
192
|
you may not use this file except in compliance with the License.
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.6.post1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
|
-
License:
|
5
|
+
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
7
7
|
http://www.apache.org/licenses/
|
8
8
|
|
@@ -190,7 +190,7 @@ License: Apache License
|
|
190
190
|
same "printed page" as the copyright notice for easier
|
191
191
|
identification within third-party archives.
|
192
192
|
|
193
|
-
Copyright
|
193
|
+
Copyright 2023-2024 SGLang Team
|
194
194
|
|
195
195
|
Licensed under the Apache License, Version 2.0 (the "License");
|
196
196
|
you may not use this file except in compliance with the License.
|
@@ -215,74 +215,85 @@ Requires-Dist: requests
|
|
215
215
|
Requires-Dist: tqdm
|
216
216
|
Requires-Dist: numpy
|
217
217
|
Requires-Dist: IPython
|
218
|
-
Provides-Extra:
|
219
|
-
Requires-Dist: sglang[srt]; extra == "all"
|
220
|
-
Requires-Dist: sglang[openai]; extra == "all"
|
221
|
-
Requires-Dist: sglang[anthropic]; extra == "all"
|
222
|
-
Requires-Dist: sglang[litellm]; extra == "all"
|
223
|
-
Provides-Extra: all_hip
|
224
|
-
Requires-Dist: sglang[srt_hip]; extra == "all-hip"
|
225
|
-
Requires-Dist: sglang[openai]; extra == "all-hip"
|
226
|
-
Requires-Dist: sglang[anthropic]; extra == "all-hip"
|
227
|
-
Requires-Dist: sglang[litellm]; extra == "all-hip"
|
228
|
-
Provides-Extra: all_xpu
|
229
|
-
Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
|
230
|
-
Requires-Dist: sglang[openai]; extra == "all-xpu"
|
231
|
-
Requires-Dist: sglang[anthropic]; extra == "all-xpu"
|
232
|
-
Requires-Dist: sglang[litellm]; extra == "all-xpu"
|
233
|
-
Provides-Extra: anthropic
|
234
|
-
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
235
|
-
Provides-Extra: dev
|
236
|
-
Requires-Dist: sglang[all]; extra == "dev"
|
237
|
-
Requires-Dist: sglang[test]; extra == "dev"
|
238
|
-
Provides-Extra: dev_hip
|
239
|
-
Requires-Dist: sglang[all_hip]; extra == "dev-hip"
|
240
|
-
Requires-Dist: sglang[test]; extra == "dev-hip"
|
241
|
-
Provides-Extra: dev_xpu
|
242
|
-
Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
|
243
|
-
Requires-Dist: sglang[test]; extra == "dev-xpu"
|
244
|
-
Provides-Extra: litellm
|
245
|
-
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
246
|
-
Provides-Extra: openai
|
247
|
-
Requires-Dist: openai>=1.0; extra == "openai"
|
248
|
-
Requires-Dist: tiktoken; extra == "openai"
|
249
|
-
Provides-Extra: runtime_common
|
218
|
+
Provides-Extra: runtime-common
|
250
219
|
Requires-Dist: aiohttp; extra == "runtime-common"
|
251
220
|
Requires-Dist: decord; extra == "runtime-common"
|
252
221
|
Requires-Dist: fastapi; extra == "runtime-common"
|
253
|
-
Requires-Dist:
|
254
|
-
Requires-Dist:
|
222
|
+
Requires-Dist: hf_transfer; extra == "runtime-common"
|
223
|
+
Requires-Dist: huggingface_hub; extra == "runtime-common"
|
255
224
|
Requires-Dist: interegular; extra == "runtime-common"
|
225
|
+
Requires-Dist: modelscope; extra == "runtime-common"
|
256
226
|
Requires-Dist: orjson; extra == "runtime-common"
|
227
|
+
Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
|
257
228
|
Requires-Dist: packaging; extra == "runtime-common"
|
258
229
|
Requires-Dist: pillow; extra == "runtime-common"
|
259
230
|
Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
|
260
231
|
Requires-Dist: psutil; extra == "runtime-common"
|
261
232
|
Requires-Dist: pydantic; extra == "runtime-common"
|
262
233
|
Requires-Dist: python-multipart; extra == "runtime-common"
|
234
|
+
Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
263
235
|
Requires-Dist: torchao; extra == "runtime-common"
|
264
236
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
265
237
|
Requires-Dist: uvloop; extra == "runtime-common"
|
266
|
-
Requires-Dist:
|
267
|
-
Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
|
268
|
-
Requires-Dist: modelscope; extra == "runtime-common"
|
238
|
+
Requires-Dist: xgrammar>=0.1.4; extra == "runtime-common"
|
269
239
|
Provides-Extra: srt
|
270
240
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
271
241
|
Requires-Dist: torch; extra == "srt"
|
272
|
-
Requires-Dist: vllm
|
273
|
-
Provides-Extra:
|
242
|
+
Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
|
243
|
+
Provides-Extra: srt-hip
|
274
244
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
275
245
|
Requires-Dist: torch; extra == "srt-hip"
|
276
246
|
Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
|
277
|
-
Provides-Extra:
|
247
|
+
Provides-Extra: srt-xpu
|
278
248
|
Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
|
249
|
+
Provides-Extra: srt-hpu
|
250
|
+
Requires-Dist: sglang[runtime_common]; extra == "srt-hpu"
|
251
|
+
Provides-Extra: openai
|
252
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
253
|
+
Requires-Dist: tiktoken; extra == "openai"
|
254
|
+
Provides-Extra: anthropic
|
255
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
256
|
+
Provides-Extra: litellm
|
257
|
+
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
279
258
|
Provides-Extra: test
|
280
259
|
Requires-Dist: jsonlines; extra == "test"
|
281
260
|
Requires-Dist: matplotlib; extra == "test"
|
282
261
|
Requires-Dist: pandas; extra == "test"
|
283
|
-
Requires-Dist:
|
262
|
+
Requires-Dist: sentence_transformers; extra == "test"
|
284
263
|
Requires-Dist: accelerate; extra == "test"
|
285
264
|
Requires-Dist: peft; extra == "test"
|
265
|
+
Provides-Extra: all
|
266
|
+
Requires-Dist: sglang[srt]; extra == "all"
|
267
|
+
Requires-Dist: sglang[openai]; extra == "all"
|
268
|
+
Requires-Dist: sglang[anthropic]; extra == "all"
|
269
|
+
Requires-Dist: sglang[litellm]; extra == "all"
|
270
|
+
Provides-Extra: all-hip
|
271
|
+
Requires-Dist: sglang[srt_hip]; extra == "all-hip"
|
272
|
+
Requires-Dist: sglang[openai]; extra == "all-hip"
|
273
|
+
Requires-Dist: sglang[anthropic]; extra == "all-hip"
|
274
|
+
Requires-Dist: sglang[litellm]; extra == "all-hip"
|
275
|
+
Provides-Extra: all-xpu
|
276
|
+
Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
|
277
|
+
Requires-Dist: sglang[openai]; extra == "all-xpu"
|
278
|
+
Requires-Dist: sglang[anthropic]; extra == "all-xpu"
|
279
|
+
Requires-Dist: sglang[litellm]; extra == "all-xpu"
|
280
|
+
Provides-Extra: all-hpu
|
281
|
+
Requires-Dist: sglang[srt_hpu]; extra == "all-hpu"
|
282
|
+
Requires-Dist: sglang[openai]; extra == "all-hpu"
|
283
|
+
Requires-Dist: sglang[anthropic]; extra == "all-hpu"
|
284
|
+
Requires-Dist: sglang[litellm]; extra == "all-hpu"
|
285
|
+
Provides-Extra: dev
|
286
|
+
Requires-Dist: sglang[all]; extra == "dev"
|
287
|
+
Requires-Dist: sglang[test]; extra == "dev"
|
288
|
+
Provides-Extra: dev-hip
|
289
|
+
Requires-Dist: sglang[all_hip]; extra == "dev-hip"
|
290
|
+
Requires-Dist: sglang[test]; extra == "dev-hip"
|
291
|
+
Provides-Extra: dev-xpu
|
292
|
+
Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
|
293
|
+
Requires-Dist: sglang[test]; extra == "dev-xpu"
|
294
|
+
Provides-Extra: dev-hpu
|
295
|
+
Requires-Dist: sglang[all_hpu]; extra == "dev-hpu"
|
296
|
+
Requires-Dist: sglang[test]; extra == "dev-hpu"
|
286
297
|
|
287
298
|
<div align="center" id="sglangtop">
|
288
299
|
<img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
|
@@ -321,21 +332,16 @@ SGLang is a fast serving framework for large language models and vision language
|
|
321
332
|
It makes your interaction with models faster and more controllable by co-designing the backend runtime and frontend language.
|
322
333
|
The core features include:
|
323
334
|
|
324
|
-
- **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/
|
335
|
+
- **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, overhead-free CPU scheduler, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (FP8/INT4/AWQ/GPTQ).
|
325
336
|
- **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
|
326
|
-
- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
|
337
|
+
- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
|
327
338
|
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
328
339
|
|
329
340
|
## Getting Started
|
330
|
-
Install SGLang
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
## Backend: SGLang Runtime (SRT)
|
335
|
-
See [https://sgl-project.github.io/backend/backend.html](https://sgl-project.github.io/backend/backend.html)
|
336
|
-
|
337
|
-
## Frontend: Structured Generation Language (SGLang)
|
338
|
-
See [https://sgl-project.github.io/frontend/frontend.html](https://sgl-project.github.io/frontend/frontend.html)
|
341
|
+
- [Install SGLang](https://sgl-project.github.io/start/install.html)
|
342
|
+
- [Send requests](https://sgl-project.github.io/start/send_request.html)
|
343
|
+
- [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html)
|
344
|
+
- [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html)
|
339
345
|
|
340
346
|
## Benchmark And Performance
|
341
347
|
Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)
|
@@ -343,6 +349,9 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
|
|
343
349
|
## Roadmap
|
344
350
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
345
351
|
|
346
|
-
##
|
352
|
+
## Adoption and Sponsorship
|
353
|
+
The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, and xAI.
|
354
|
+
|
355
|
+
## Acknowledgment and Citation
|
356
|
+
We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
|
347
357
|
Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
348
|
-
We also learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
|
@@ -0,0 +1,164 @@
|
|
1
|
+
sglang/__init__.py,sha256=3M0oz0ZA8fULhV5LwQ4hxh-MRdHsOJRD1D63C60pdG4,1616
|
2
|
+
sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
|
3
|
+
sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
4
|
+
sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
|
5
|
+
sglang/bench_one_batch.py,sha256=WxrQUkMcxz5GV8OEHj0ckHgpC76HgO6YxmDvJFRDeyU,15670
|
6
|
+
sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
|
7
|
+
sglang/bench_serving.py,sha256=hI7FjaERyqKBrYtKewDU6E4rSufKxqsUPyUgtWtTKSI,52545
|
8
|
+
sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
|
9
|
+
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
|
+
sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
|
11
|
+
sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
|
12
|
+
sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
|
13
|
+
sglang/version.py,sha256=YrfhKDmn6rTAj_qREKEXk2FahHCqSbHd4BNoD7wlIi0,28
|
14
|
+
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
|
16
|
+
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
|
+
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
|
+
sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
|
19
|
+
sglang/lang/ir.py,sha256=zpzzAO1YVldhE95Vwz5hU_TQltu-xt8A6rfFr0PuIDA,18410
|
20
|
+
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
21
|
+
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
23
|
+
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
24
|
+
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
25
|
+
sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
|
26
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=IWbrAKrUkzNOvwV6V9_y6pkTr2SUYEkKBT-3kirgad0,10514
|
27
|
+
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
28
|
+
sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
|
29
|
+
sglang/srt/hf_transformers_utils.py,sha256=sUUCpjbTHuYDMuwOaz00nH5fataXKjliD8gCxXU64sw,6712
|
30
|
+
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
31
|
+
sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
|
32
|
+
sglang/srt/server.py,sha256=7PSxAUhiS796yQFeiQxiilRhLQ3FpV0wL53CfDgkCIk,30851
|
33
|
+
sglang/srt/server_args.py,sha256=CfmpU6_EDnxJzpJiRx2n6AhOPCtrHPOf-7wEtTF__L0,30834
|
34
|
+
sglang/srt/utils.py,sha256=APZEUancLC0jRI1JMbv7e5bIZy3OEySGyZspxGA60yQ,33509
|
35
|
+
sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
|
36
|
+
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
37
|
+
sglang/srt/configs/model_config.py,sha256=dQ58mYKN3M5IwldFZkwIb4CCBa6dREb5Om4Kg2kffOE,9565
|
38
|
+
sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
|
39
|
+
sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
|
40
|
+
sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
|
41
|
+
sglang/srt/constrained/outlines_backend.py,sha256=IDpyzXJS-ydRXYOHHzx1bO9VjiMRF8E5knn4CLFwPU8,6447
|
42
|
+
sglang/srt/constrained/outlines_jump_forward.py,sha256=IGg6mThDepugfez0jnQ6HfLSHtiUl_Mq7bsPFppb3DA,6196
|
43
|
+
sglang/srt/constrained/xgrammar_backend.py,sha256=4ZCQgcjWEY2Lg4r2V9sAiYJJblkQ_uVbEnvsjqhR1Pc,4548
|
44
|
+
sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwxBQ,5471
|
45
|
+
sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
|
46
|
+
sglang/srt/layers/fused_moe_patch.py,sha256=dxjcBMY_zAqA0pnmy5KDUZZJSd5Q64Xlxhxyb33cdMk,4240
|
47
|
+
sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
|
48
|
+
sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
|
49
|
+
sglang/srt/layers/logits_processor.py,sha256=V8fHxeQK8lzUhGD2Xc7MY1Y9qBhzFyh6hqp31RJVefg,12669
|
50
|
+
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
51
|
+
sglang/srt/layers/radix_attention.py,sha256=C_mK4mfmKlxMRNeKYP9E5R3PRd3eT-OcE_g3mo36dJM,2058
|
52
|
+
sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
|
53
|
+
sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
|
54
|
+
sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
|
55
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
|
56
|
+
sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
|
57
|
+
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
|
58
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=oblYMbmYzK94H3EA9lMhKWaKdi8HLH5NqAiZmjzj4Es,24875
|
59
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
|
60
|
+
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=BE63WhKiutSNkhJLsRwvfsRy-ExvuAv7FZyoWv73ul8,18744
|
61
|
+
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
62
|
+
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=Gfct-0_l-S2ZrP4F-zkzNiFbmd3C3f7uJovacOuDxaA,11472
|
63
|
+
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
|
64
|
+
sglang/srt/layers/fused_moe_grok/__init__.py,sha256=rj_JBzcP--eaaM6LGQ-u580uQvqLisp5JtGBAs1fVYc,80
|
65
|
+
sglang/srt/layers/fused_moe_grok/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
|
66
|
+
sglang/srt/layers/fused_moe_grok/layer.py,sha256=v-o5YHYEU2HIEZwouyuc3UyfNj7YQrEYOO_BXKELU7Y,23453
|
67
|
+
sglang/srt/layers/fused_moe_triton/__init__.py,sha256=PHKFqd2hPOO-g9kSMseg2g76lpg9OGXQDThWU6bt9vs,902
|
68
|
+
sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=qwfRBOeY5DT48Q6z71Eh9cjFehvs_K6eLIVWNL044Ug,28363
|
69
|
+
sglang/srt/layers/fused_moe_triton/layer.py,sha256=URDkTt8xEqnqpO5tb_3L7JlhlO53VWfqDDNSRYEu-LY,21545
|
70
|
+
sglang/srt/layers/quantization/__init__.py,sha256=f9tCC_9sHjp7JCPvyZIvuoTB4KooIucGA9S2w7ADevw,4849
|
71
|
+
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
72
|
+
sglang/srt/lora/lora.py,sha256=KhhO9aKCyFWvJnhI07lZKANIvNjtt882HrTYFNBZMv0,15065
|
73
|
+
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
74
|
+
sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
|
75
|
+
sglang/srt/managers/data_parallel_controller.py,sha256=JxRtJJTVn1FU2iD292rLZPftAsR4_8j4d3yF8j0dvBc,8327
|
76
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=nWBn54pz3aQ8tzVvViwwL2k0V4WATi0qw11H0Bzua-Q,7389
|
77
|
+
sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
|
78
|
+
sglang/srt/managers/io_struct.py,sha256=WLXz-tyn0jR7zNO9feRBXgyjphVa8qR55OoEOUdzoVI,13751
|
79
|
+
sglang/srt/managers/schedule_batch.py,sha256=-5oYdkStPiYjPWl0tCkUVRjTGB7fjA0wIngK-09da7w,43111
|
80
|
+
sglang/srt/managers/schedule_policy.py,sha256=ayFz4iPLIlG8mx5i1glTCAMHJPGpFedMP9UgRtqkNhA,12526
|
81
|
+
sglang/srt/managers/scheduler.py,sha256=8owHPXG6fxZtsCWSJ6K7EOlFDcPxYinZC1DwKMJcEVM,55930
|
82
|
+
sglang/srt/managers/session_controller.py,sha256=jXoPHxMGh8T1iYWIEjSXoPVwaL6NEjv3QtqlsrvPE1c,2355
|
83
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=zYbKEKNuM1B3PXzA7jnDpxew-0rZXSX-7dHmVLWG3e4,26477
|
84
|
+
sglang/srt/managers/tp_worker.py,sha256=1SQJ60iKS9e5vGY555fT1iZ4OtLumXzeWfB08fSWKbk,6176
|
85
|
+
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7vhPebaOS4JamaS08CGf_hwxnUO7Gy_SXZXEPwNHKoY,7621
|
86
|
+
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
|
87
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
|
88
|
+
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
89
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
|
90
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
|
91
|
+
sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
|
92
|
+
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
93
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=4hbCtE3gt5kvMNHrnxkE8YPRFcgmVo0Bwz3lgbYZw_E,14805
|
94
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=n5yk927COTU0klDAkQuwrFzamMygfkHxmDp1I6bJYD8,12612
|
95
|
+
sglang/srt/model_executor/model_runner.py,sha256=AafFWd_EDWbOe0o5etAyutGum5O8_9tO55KRcaAWDW4,29680
|
96
|
+
sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
|
97
|
+
sglang/srt/models/chatglm.py,sha256=OikygdK8Mi6F2QPPhAr2E_P4l2V0yWQjDJOdnBAApPE,13216
|
98
|
+
sglang/srt/models/commandr.py,sha256=XkzpfsdDPDx-W5oOac8nFIe39JJZvmv65K5GIpgJTz0,14212
|
99
|
+
sglang/srt/models/dbrx.py,sha256=ucn3UJ1s4nx2qa5hUb8VhJmfVrDZ59e9oNetMU5EWq8,14624
|
100
|
+
sglang/srt/models/deepseek.py,sha256=B5OuW--kDIPfZesOhvGGUhHQNWh0pMPNCYmdsv9lv5U,15922
|
101
|
+
sglang/srt/models/deepseek_v2.py,sha256=shdHVtZGmLEZMZwGlIPz8NPoSb1c_n6hQxWKG45WahE,32265
|
102
|
+
sglang/srt/models/exaone.py,sha256=6LJ1Mr9MbHOXdH_nK9Dba3SR28LMCJvdH1k53w9M9Vg,13081
|
103
|
+
sglang/srt/models/gemma.py,sha256=079CfoQqBnrLIbW0LWcLp-nmb1aPVN1Tw6PxMQQ3Lsk,12289
|
104
|
+
sglang/srt/models/gemma2.py,sha256=lbfQhQpUhf1MAEB_00Uo6rp20k4Hr353UbPKKuMsxec,15020
|
105
|
+
sglang/srt/models/gemma2_reward.py,sha256=cQawatbsfBuWQTueivYHl_17ZoQUHEelI1sr1y5pvfY,2556
|
106
|
+
sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
|
107
|
+
sglang/srt/models/gpt_bigcode.py,sha256=lYo4ajy49VvvPkaduaFtOaCRT_ItqyNUE158S-BI5QA,10136
|
108
|
+
sglang/srt/models/grok.py,sha256=rDIH_SFzauuEHcL_vCOSrYLjdBC3i3o_AcceL3amsJw,14927
|
109
|
+
sglang/srt/models/internlm2.py,sha256=DxbA15d9QR0tLOczpC6DkB8QyNHXJRdZatY6Nskwv1k,12170
|
110
|
+
sglang/srt/models/internlm2_reward.py,sha256=Lr-JA0vfTQJt9q5oDMiopGuoXAevyEv5PAoDe2rsTJk,2425
|
111
|
+
sglang/srt/models/llama.py,sha256=FSGuM3BamhuT5h2jedh5cSFwFYduOJwkAZJJ672awRw,16423
|
112
|
+
sglang/srt/models/llama_classification.py,sha256=c8WZ1ADa3f6s2IJVoP10ouVgeCwv_ndns_qMgLrC6QI,3413
|
113
|
+
sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
|
114
|
+
sglang/srt/models/llama_reward.py,sha256=prhHDPpf1k6tlQtGE6zq5gx0uSZAD3W5v7W28bdgy4U,4619
|
115
|
+
sglang/srt/models/llava.py,sha256=72DnZXIwu78zYqU8YIElq_AaSIFO_icYOPTHXE0_-YQ,24941
|
116
|
+
sglang/srt/models/llavavid.py,sha256=DeWqGSmXgIYGuLyy2ZrxjM9WqbRjueP4chNmXt7Bnus,12221
|
117
|
+
sglang/srt/models/minicpm.py,sha256=KbiTf-kaDAJxSo9Z4IGMTrs9WrYYji1KXO1kA2iy-as,13816
|
118
|
+
sglang/srt/models/minicpm3.py,sha256=C43mTr2Qjccj4sXuTDgzbfZhvCNbsEHNggMRXQ7SrWs,25108
|
119
|
+
sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
|
120
|
+
sglang/srt/models/mixtral.py,sha256=E3d8I7V3Dp1nCEHRbhh-PKBG8UaVK5XOHwl9QyIjcX0,14043
|
121
|
+
sglang/srt/models/mixtral_quant.py,sha256=o-oTG8BGtWuNu-o6muHSarMNBQwrjQowyBFOQhuclZ8,14065
|
122
|
+
sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
|
123
|
+
sglang/srt/models/olmo.py,sha256=DEUPNDM0z83N-Qdhkj2WJMtbiz5JNbSBMIjUaYZN9RM,12068
|
124
|
+
sglang/srt/models/olmoe.py,sha256=jVKrjqQQrWLdlkGSGUaMPdT9PHzNH4X-RVwON29eaGw,15412
|
125
|
+
sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
|
126
|
+
sglang/srt/models/qwen.py,sha256=P9zcFnz_Tsz73tVtLRwZ8uWzCtMxWOrzlv2o9Ys_Gck,9947
|
127
|
+
sglang/srt/models/qwen2.py,sha256=ApFFASNwvrkDXi-KkCNA7fTk4uLMuJWoMg15zCaAKdA,12514
|
128
|
+
sglang/srt/models/qwen2_moe.py,sha256=1oxDsKDq3jlHKx9jMi1SfHOqCRVyN5n76uw3M-CUODE,17048
|
129
|
+
sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
|
130
|
+
sglang/srt/models/stablelm.py,sha256=jpmsyWMJo_9JapOESnuV7ObNCh78BRznXY0iFvvIbZE,11354
|
131
|
+
sglang/srt/models/torch_native_llama.py,sha256=vNQxsnbVAY1bdyMCCWDZAtWdbaFIiJXhmVxHjk5BB9Y,19400
|
132
|
+
sglang/srt/models/xverse.py,sha256=LGe0ma0wOir3x-OLBT_cRocw8JEo9d3AYNxgA2OcLrk,13659
|
133
|
+
sglang/srt/models/xverse_moe.py,sha256=YqbzkSsnTFt-8-aI8YobF9qJA70qrBjbS1Kjn1KNqVY,15766
|
134
|
+
sglang/srt/models/yivl.py,sha256=yj4aWsOBVGQBLurSrLmYXVC7zGIPH7EYHHtAaAZ7Liw,4859
|
135
|
+
sglang/srt/openai_api/adapter.py,sha256=MhOcWZjcLv4_OuvLvDMcAu6K_u2joJvhaZxaKm0hi3M,53634
|
136
|
+
sglang/srt/openai_api/protocol.py,sha256=vBgrbTqtECsZ5dG0rgP1FHsTBt4eR9zbDX3FBIN-rz4,10172
|
137
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=YC-KPyDWyLGNPL4YVcst4xwP8Wlz2zcCNJHB_5zljXQ,8470
|
138
|
+
sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
|
139
|
+
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
140
|
+
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
|
141
|
+
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
|
142
|
+
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
143
|
+
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
144
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
|
145
|
+
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
146
|
+
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
147
|
+
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
148
|
+
sglang/test/runners.py,sha256=ANzjrHkT_1E0G3UcD47O8XEKst3Si4AOfx-uErbFS7o,15129
|
149
|
+
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
150
|
+
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
151
|
+
sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
|
152
|
+
sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
|
153
|
+
sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
|
154
|
+
sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
|
155
|
+
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
156
|
+
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
157
|
+
sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
|
158
|
+
sglang/test/test_utils.py,sha256=ULF7C3pLXkMevXgE_Dodt29OBfvvXKUnRvwKhaBg1ys,23470
|
159
|
+
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
160
|
+
sglang-0.3.6.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
161
|
+
sglang-0.3.6.post1.dist-info/METADATA,sha256=XwhCEL8SbEVcT7LQLk26g6tzduS6mByBE7dDqZYpQxo,22073
|
162
|
+
sglang-0.3.6.post1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
163
|
+
sglang-0.3.6.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
164
|
+
sglang-0.3.6.post1.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
from sglang.srt.layers.fused_moe.layer import FusedMoE, FusedMoEMethodBase
|