sglang 0.4.1.post4__py3-none-any.whl → 0.4.1.post6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_serving.py +18 -1
- sglang/lang/interpreter.py +71 -1
- sglang/lang/ir.py +2 -0
- sglang/srt/configs/__init__.py +4 -0
- sglang/srt/configs/chatglm.py +78 -0
- sglang/srt/configs/dbrx.py +279 -0
- sglang/srt/configs/model_config.py +16 -7
- sglang/srt/hf_transformers_utils.py +9 -14
- sglang/srt/layers/attention/__init__.py +8 -1
- sglang/srt/layers/attention/flashinfer_backend.py +21 -5
- sglang/srt/layers/linear.py +89 -47
- sglang/srt/layers/logits_processor.py +6 -6
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +16 -5
- sglang/srt/layers/moe/fused_moe_triton/layer.py +39 -12
- sglang/srt/layers/moe/topk.py +4 -2
- sglang/srt/layers/parameter.py +439 -0
- sglang/srt/layers/quantization/__init__.py +5 -2
- sglang/srt/layers/quantization/fp8.py +107 -53
- sglang/srt/layers/quantization/fp8_utils.py +1 -1
- sglang/srt/layers/quantization/int8_kernel.py +54 -0
- sglang/srt/layers/quantization/modelopt_quant.py +174 -0
- sglang/srt/layers/quantization/w8a8_int8.py +117 -0
- sglang/srt/layers/radix_attention.py +2 -0
- sglang/srt/layers/vocab_parallel_embedding.py +16 -3
- sglang/srt/managers/cache_controller.py +307 -0
- sglang/srt/managers/configure_logging.py +43 -0
- sglang/srt/managers/data_parallel_controller.py +2 -0
- sglang/srt/managers/detokenizer_manager.py +0 -2
- sglang/srt/managers/io_struct.py +29 -13
- sglang/srt/managers/schedule_batch.py +7 -1
- sglang/srt/managers/scheduler.py +58 -15
- sglang/srt/managers/session_controller.py +1 -1
- sglang/srt/managers/tokenizer_manager.py +109 -45
- sglang/srt/mem_cache/memory_pool.py +313 -53
- sglang/srt/metrics/collector.py +32 -35
- sglang/srt/model_executor/cuda_graph_runner.py +14 -7
- sglang/srt/model_executor/forward_batch_info.py +20 -15
- sglang/srt/model_executor/model_runner.py +53 -10
- sglang/srt/models/chatglm.py +1 -1
- sglang/srt/models/dbrx.py +1 -1
- sglang/srt/models/grok.py +25 -16
- sglang/srt/models/llama.py +46 -4
- sglang/srt/models/qwen2.py +11 -0
- sglang/srt/models/qwen2_eagle.py +131 -0
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +15 -5
- sglang/srt/sampling/sampling_batch_info.py +15 -5
- sglang/srt/sampling/sampling_params.py +1 -1
- sglang/srt/server.py +125 -69
- sglang/srt/server_args.py +39 -19
- sglang/srt/speculative/eagle_utils.py +93 -85
- sglang/srt/speculative/eagle_worker.py +48 -33
- sglang/srt/torch_memory_saver_adapter.py +59 -0
- sglang/srt/utils.py +61 -5
- sglang/test/test_programs.py +23 -1
- sglang/test/test_utils.py +36 -7
- sglang/version.py +1 -1
- {sglang-0.4.1.post4.dist-info → sglang-0.4.1.post6.dist-info}/METADATA +16 -15
- {sglang-0.4.1.post4.dist-info → sglang-0.4.1.post6.dist-info}/RECORD +61 -51
- {sglang-0.4.1.post4.dist-info → sglang-0.4.1.post6.dist-info}/WHEEL +1 -1
- {sglang-0.4.1.post4.dist-info → sglang-0.4.1.post6.dist-info}/LICENSE +0 -0
- {sglang-0.4.1.post4.dist-info → sglang-0.4.1.post6.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -36,7 +36,7 @@ DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
|
36
36
|
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
|
37
37
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
|
38
38
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it"
|
39
|
-
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct
|
39
|
+
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct"
|
40
40
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8"
|
41
41
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
|
42
42
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
@@ -532,6 +532,8 @@ def run_bench_serving(
|
|
532
532
|
request_rate,
|
533
533
|
other_server_args,
|
534
534
|
dataset_name="random",
|
535
|
+
dataset_path="",
|
536
|
+
tokenizer=None,
|
535
537
|
random_input_len=4096,
|
536
538
|
random_output_len=2048,
|
537
539
|
disable_stream=False,
|
@@ -553,9 +555,9 @@ def run_bench_serving(
|
|
553
555
|
host=None,
|
554
556
|
port=None,
|
555
557
|
dataset_name=dataset_name,
|
556
|
-
dataset_path=
|
558
|
+
dataset_path=dataset_path,
|
557
559
|
model=None,
|
558
|
-
tokenizer=
|
560
|
+
tokenizer=tokenizer,
|
559
561
|
num_prompts=num_prompts,
|
560
562
|
sharegpt_output_len=None,
|
561
563
|
random_input_len=random_input_len,
|
@@ -657,16 +659,16 @@ STDERR_FILENAME = "stderr.txt"
|
|
657
659
|
STDOUT_FILENAME = "stdout.txt"
|
658
660
|
|
659
661
|
|
660
|
-
def read_output(output_lines):
|
662
|
+
def read_output(output_lines: List[str], filename: str = STDERR_FILENAME):
|
661
663
|
"""Print the output in real time with another thread."""
|
662
|
-
while not os.path.exists(
|
664
|
+
while not os.path.exists(filename):
|
663
665
|
time.sleep(1)
|
664
666
|
|
665
667
|
pt = 0
|
666
668
|
while pt >= 0:
|
667
|
-
if pt > 0 and not os.path.exists(
|
669
|
+
if pt > 0 and not os.path.exists(filename):
|
668
670
|
break
|
669
|
-
lines = open(
|
671
|
+
lines = open(filename).readlines()
|
670
672
|
for line in lines[pt:]:
|
671
673
|
print(line, end="", flush=True)
|
672
674
|
output_lines.append(line)
|
@@ -747,6 +749,33 @@ def run_and_check_memory_leak(
|
|
747
749
|
assert has_abort
|
748
750
|
|
749
751
|
|
752
|
+
def run_command_and_capture_output(command, env: Optional[dict] = None):
|
753
|
+
stdout = open(STDOUT_FILENAME, "w")
|
754
|
+
stderr = open(STDERR_FILENAME, "w")
|
755
|
+
process = subprocess.Popen(
|
756
|
+
command, stdout=stdout, stderr=stderr, env=env, text=True
|
757
|
+
)
|
758
|
+
|
759
|
+
# Launch a thread to stream the output
|
760
|
+
output_lines = []
|
761
|
+
t = threading.Thread(target=read_output, args=(output_lines, STDOUT_FILENAME))
|
762
|
+
t.start()
|
763
|
+
|
764
|
+
# Join the process
|
765
|
+
process.wait()
|
766
|
+
|
767
|
+
stdout.close()
|
768
|
+
stderr.close()
|
769
|
+
if os.path.exists(STDOUT_FILENAME):
|
770
|
+
os.remove(STDOUT_FILENAME)
|
771
|
+
if os.path.exists(STDERR_FILENAME):
|
772
|
+
os.remove(STDERR_FILENAME)
|
773
|
+
kill_process_tree(process.pid)
|
774
|
+
t.join()
|
775
|
+
|
776
|
+
return output_lines
|
777
|
+
|
778
|
+
|
750
779
|
def run_mmlu_test(
|
751
780
|
disable_radix_cache=False,
|
752
781
|
enable_mixed_chunk=False,
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.1.
|
1
|
+
__version__ = "0.4.1.post6"
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.1.
|
3
|
+
Version: 0.4.1.post6
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -239,11 +239,11 @@ Requires-Dist: uvloop; extra == "runtime-common"
|
|
239
239
|
Requires-Dist: xgrammar>=0.1.6; extra == "runtime-common"
|
240
240
|
Provides-Extra: srt
|
241
241
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
242
|
+
Requires-Dist: cuda-python; extra == "srt"
|
243
|
+
Requires-Dist: sgl-kernel>=0.0.2.post12; extra == "srt"
|
242
244
|
Requires-Dist: torch; extra == "srt"
|
243
245
|
Requires-Dist: vllm<=0.6.4.post1,>=0.6.3.post1; extra == "srt"
|
244
|
-
Requires-Dist: cuda-python; extra == "srt"
|
245
246
|
Requires-Dist: flashinfer==0.1.6; extra == "srt"
|
246
|
-
Requires-Dist: sgl-kernel>=0.0.2.post11; extra == "srt"
|
247
247
|
Provides-Extra: srt-hip
|
248
248
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
249
249
|
Requires-Dist: torch; extra == "srt-hip"
|
@@ -259,6 +259,8 @@ Provides-Extra: anthropic
|
|
259
259
|
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
260
260
|
Provides-Extra: litellm
|
261
261
|
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
262
|
+
Provides-Extra: torch-memory-saver
|
263
|
+
Requires-Dist: torch_memory_saver; extra == "torch-memory-saver"
|
262
264
|
Provides-Extra: test
|
263
265
|
Requires-Dist: jsonlines; extra == "test"
|
264
266
|
Requires-Dist: matplotlib; extra == "test"
|
@@ -314,9 +316,9 @@ Requires-Dist: sglang[test]; extra == "dev-hpu"
|
|
314
316
|
--------------------------------------------------------------------------------
|
315
317
|
|
316
318
|
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
|
317
|
-
| [**Documentation**](https://
|
318
|
-
| [**Join Slack**](https://
|
319
|
-
| [**Join Bi-Weekly Development Meeting**](https://
|
319
|
+
| [**Documentation**](https://docs.sglang.ai/)
|
320
|
+
| [**Join Slack**](https://slack.sglang.ai/)
|
321
|
+
| [**Join Bi-Weekly Development Meeting**](https://meeting.sglang.ai/)
|
320
322
|
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
|
321
323
|
|
322
324
|
## News
|
@@ -346,14 +348,14 @@ The core features include:
|
|
346
348
|
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
347
349
|
|
348
350
|
## Getting Started
|
349
|
-
- [Install SGLang](https://
|
350
|
-
- [Quick Start](https://
|
351
|
-
- [Backend Tutorial](https://
|
352
|
-
- [Frontend Tutorial](https://
|
353
|
-
- [Contribution Guide](https://
|
351
|
+
- [Install SGLang](https://docs.sglang.ai/start/install.html)
|
352
|
+
- [Quick Start](https://docs.sglang.ai/start/send_request.html)
|
353
|
+
- [Backend Tutorial](https://docs.sglang.ai/backend/openai_api_completions.html)
|
354
|
+
- [Frontend Tutorial](https://docs.sglang.ai/frontend/frontend.html)
|
355
|
+
- [Contribution Guide](https://docs.sglang.ai/references/contribution_guide.html)
|
354
356
|
|
355
357
|
## Benchmark and Performance
|
356
|
-
Learn more in
|
358
|
+
Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/), [v0.4 blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)
|
357
359
|
|
358
360
|
## Roadmap
|
359
361
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
@@ -362,5 +364,4 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
|
|
362
364
|
The project is supported by (alphabetically): AMD, Baseten, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
|
363
365
|
|
364
366
|
## Acknowledgment and Citation
|
365
|
-
We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
|
366
|
-
Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
367
|
+
We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
@@ -4,20 +4,20 @@ sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
|
4
4
|
sglang/bench_offline_throughput.py,sha256=r-uBvpnx-30mAnVwQB4WlqiXxy2fn5a1NUARwZcaIo4,12533
|
5
5
|
sglang/bench_one_batch.py,sha256=uw__0H3e3lY_6EDz4IAZUoYxq9kQIOPbbcyguYxttSA,15975
|
6
6
|
sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
|
7
|
-
sglang/bench_serving.py,sha256=
|
7
|
+
sglang/bench_serving.py,sha256=VCF1PW6zy2lhJBr2owiluHnMDgrakyA0Qw-m--mnehk,54253
|
8
8
|
sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
|
9
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
10
|
sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
|
11
11
|
sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
|
12
12
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
13
13
|
sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
|
14
|
-
sglang/version.py,sha256=
|
14
|
+
sglang/version.py,sha256=67TlBPUpVb158CbDn3v32POQ-USKtg7P1fg71jmrBWc,28
|
15
15
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
|
17
17
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
18
18
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
19
|
-
sglang/lang/interpreter.py,sha256=
|
20
|
-
sglang/lang/ir.py,sha256=
|
19
|
+
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
20
|
+
sglang/lang/ir.py,sha256=dtA6rs5JIN0tMm3jhgRqdpRhH2Sckil-BMyLRMyBEIY,18494
|
21
21
|
sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
|
22
22
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
@@ -29,17 +29,20 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
29
29
|
sglang/srt/_custom_ops.py,sha256=Y4gyTDGhWz-W2Igq25Ojm8XFiyvkawW9I-79iwYvxJ0,3574
|
30
30
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
31
31
|
sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
|
32
|
-
sglang/srt/hf_transformers_utils.py,sha256=
|
32
|
+
sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
|
33
33
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
34
34
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
35
|
-
sglang/srt/server.py,sha256=
|
36
|
-
sglang/srt/server_args.py,sha256=
|
37
|
-
sglang/srt/
|
38
|
-
sglang/srt/
|
35
|
+
sglang/srt/server.py,sha256=g2Wf1S3tOev0T2Wn98UkaOuDYPMixsy2xUzW2jUrQ3o,37148
|
36
|
+
sglang/srt/server_args.py,sha256=N8ByNO3vlQ-nl_-rgiCsRkiksefKtyKY9W7-24rhQKw,36965
|
37
|
+
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
38
|
+
sglang/srt/utils.py,sha256=8TobQ4TwR22aa4j3W-XMkhJVBsuZ85t0zI8Mupx7L3M,46180
|
39
|
+
sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
|
40
|
+
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
41
|
+
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
39
42
|
sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
|
40
43
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
41
44
|
sglang/srt/configs/load_config.py,sha256=TcPi_HY6xu5SiVZsxPOoB5pGeDUNebOk7muoUH9VBDg,3083
|
42
|
-
sglang/srt/configs/model_config.py,sha256=
|
45
|
+
sglang/srt/configs/model_config.py,sha256=qDTL1oxSlCxptPX8AI-VlEuxMB7m0UCAUDsbwXpUjow,16831
|
43
46
|
sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
|
44
47
|
sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
|
45
48
|
sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
|
@@ -62,17 +65,18 @@ sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=P3WKgddcf
|
|
62
65
|
sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwxBQ,5471
|
63
66
|
sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
|
64
67
|
sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
|
65
|
-
sglang/srt/layers/linear.py,sha256=
|
66
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
68
|
+
sglang/srt/layers/linear.py,sha256=s5hGfdBgYkFMHolTTsSLXQdOay9HZxYyrS6AYFZaeYA,48860
|
69
|
+
sglang/srt/layers/logits_processor.py,sha256=r2yGmNqQTpi1l7qvN2Bvjb7lVKfBsxIBrJ6CpBh-_wg,12993
|
70
|
+
sglang/srt/layers/parameter.py,sha256=pC6hz2Vu9bFKH4Mt5lh-BwNWUNrJO_GsaFY9aNVDsrY,14684
|
67
71
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
68
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
72
|
+
sglang/srt/layers/radix_attention.py,sha256=nVHKPFyr-CWNm6AnMGPhuuTFTtgYwPL8sAVBZ5u3d94,2232
|
69
73
|
sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
|
70
74
|
sglang/srt/layers/sampler.py,sha256=HQWi1zb1gmD9pHMQyEP3WPjnL8vy-ncZDVMENbjQW7c,6944
|
71
75
|
sglang/srt/layers/torchao_utils.py,sha256=8c2vzt106iP_QKbJtfN1GuABW8nCuP5dElQLUeci6qg,3934
|
72
|
-
sglang/srt/layers/vocab_parallel_embedding.py,sha256=
|
73
|
-
sglang/srt/layers/attention/__init__.py,sha256=
|
76
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=8TvdxJZipUy6Ewm8Ovsbho7GzZ_yvDZ-eXjK_8vc_8k,22149
|
77
|
+
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
74
78
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
|
75
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
79
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=1He2KvcPQmLbr-8wkgy20NYjsu_hicW6NlumoVP9-kM,33842
|
76
80
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
|
77
81
|
sglang/srt/layers/attention/triton_backend.py,sha256=44ScKsVs-rFvqsaAZG_mREEpczhGaUBvaflvWqrukVE,6743
|
78
82
|
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
|
@@ -80,13 +84,13 @@ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXf
|
|
80
84
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
|
81
85
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
|
82
86
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=8q-LFZMSCGLc2_Gltp2lH0gSb4A1WOuKQW3wo3rpj5g,1601
|
83
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
87
|
+
sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
|
84
88
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
85
89
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
|
86
90
|
sglang/srt/layers/moe/ep_moe/layer.py,sha256=6iQU5ZjQ8IXGoQ8ZlBuJqyQxYTEem9vXI6rbVIWKlZw,22303
|
87
91
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
88
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
89
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
92
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=3at2h0NDC8JF144jH6h5ze_YkBasvjo227bdFLiK0vs,36759
|
93
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=KCYdT1kftwY8V_wRahoW6GbXkrm7lAZ86xvmu1qZK8w,21802
|
90
94
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
91
95
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
92
96
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -178,11 +182,14 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=MZF6BHJVjduz-XerTrHvCP3qS
|
|
178
182
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
|
179
183
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
180
184
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
181
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
185
|
+
sglang/srt/layers/quantization/__init__.py,sha256=vM6Vhlu-Jv4t9DDwywitXGz58psTQ5k7guVuK0o4jTk,4785
|
182
186
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
183
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
187
|
+
sglang/srt/layers/quantization/fp8.py,sha256=2k6vk2sTVB6JCtEJLsFFn5bJKR8lWwMRke4tu9nnTP0,34806
|
184
188
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
|
185
|
-
sglang/srt/layers/quantization/fp8_utils.py,sha256=
|
189
|
+
sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
|
190
|
+
sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
|
191
|
+
sglang/srt/layers/quantization/modelopt_quant.py,sha256=64Qec1kzduAcxyDLd_Y47wDHZ4ShS9Vb-Rf57jc1Zmg,6245
|
192
|
+
sglang/srt/layers/quantization/w8a8_int8.py,sha256=RO_s0KPH5wSx2HaI5PbAkdEXVqPS05AS6yo3oyZnIbw,3353
|
186
193
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
187
194
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
188
195
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
@@ -224,35 +231,37 @@ sglang/srt/layers/quantization/fp8_utils.py,sha256=qBVJXxbxqmf8-Juq0t-IXWjlaZoeP
|
|
224
231
|
sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
|
225
232
|
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
226
233
|
sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
|
227
|
-
sglang/srt/managers/
|
228
|
-
sglang/srt/managers/
|
234
|
+
sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
|
235
|
+
sglang/srt/managers/configure_logging.py,sha256=wa1NLWaxC2NGSTJflZvCvUrONH4i6wreNvVHb90bd14,1374
|
236
|
+
sglang/srt/managers/data_parallel_controller.py,sha256=VZSXGsNJ029BJlu56lCugaapMPvzjzE2yFATd8KWLNY,8468
|
237
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=nZkbwt4yty_oy8rvg4T7PbgyVLoBLohvHl25xlQpBoo,8439
|
229
238
|
sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
|
230
|
-
sglang/srt/managers/io_struct.py,sha256=
|
231
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
239
|
+
sglang/srt/managers/io_struct.py,sha256=H1rNLCl2iqDijUGLBafjodTrohaUi1ztJn69XjkhjTk,16207
|
240
|
+
sglang/srt/managers/schedule_batch.py,sha256=jmPTc-XyI-AXktz9Rofs-Fb3OlOgb-bThI142kOy--g,47134
|
232
241
|
sglang/srt/managers/schedule_policy.py,sha256=aHkIL9pZtc4Kdmy8XU9tsjaDzdChVN2dnGKvJkSyqFg,17965
|
233
|
-
sglang/srt/managers/scheduler.py,sha256=
|
234
|
-
sglang/srt/managers/session_controller.py,sha256=
|
235
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
242
|
+
sglang/srt/managers/scheduler.py,sha256=Kn7NyoLwHIeuGKQercV4jKsC5-KVLK4JhRiflNNLu9A,66790
|
243
|
+
sglang/srt/managers/session_controller.py,sha256=0L9_3lhFGU4kLm8b2G1QAeslxvTT_y_Iw8spwrpgr30,5508
|
244
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=p9k7fvFWyKkHO-Am-2JdbR6-VRsuGEiwQO7t1F7_rfs,35956
|
236
245
|
sglang/srt/managers/tp_worker.py,sha256=-bvUFCo544QQSEHqPPjeOvCWMEFn01Bva6AeO39Qe3o,8043
|
237
246
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=rdHz2thdGSmceDedrolHOqjNPhrralyDTuNREL56oNI,9067
|
238
247
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
|
239
248
|
sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
|
240
249
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
241
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=
|
250
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=McBKAcV444ewM-idOuCbfeKHoF-lhCL9m5R27M8H9ew,20401
|
242
251
|
sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
|
243
|
-
sglang/srt/metrics/collector.py,sha256=
|
252
|
+
sglang/srt/metrics/collector.py,sha256=sbgruNDzxBmTd-lnRi8mBZGCt2J7qgRVvDk2LQ5HvQU,6936
|
244
253
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
245
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
246
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
247
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
254
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=rGG0ZS673YC_RVaXMlmNTBJln-L7ugsgDz0Q6XmO0Cc,18544
|
255
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=Vu6qlbfm6dMUfvGaSmmLIroi8hBqfDpNVLxl7oECzIs,15001
|
256
|
+
sglang/srt/model_executor/model_runner.py,sha256=AQPN4q-Wuw3yCeFjXwWvN5m07geS07l21SXFKr-FeCk,31955
|
248
257
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
249
258
|
sglang/srt/model_loader/loader.py,sha256=7OG_8-66vFDFZ9kVKGNK1BFBjZ6ql449dlyvdCbMqvE,43876
|
250
259
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
251
260
|
sglang/srt/model_loader/weight_utils.py,sha256=kQo9KPThjH3HAOCfC_tdwdrshdWuWJOVpPR0skSyaRY,24193
|
252
261
|
sglang/srt/models/baichuan.py,sha256=PzBOFcEAixakPEkQSaJwC0Xc1fu-yCsN9T0I67r8QmY,14919
|
253
|
-
sglang/srt/models/chatglm.py,sha256=
|
262
|
+
sglang/srt/models/chatglm.py,sha256=uAScfDA9V1FtSdW0sA-QMnluCQoKkfVcDyQ_X4nh1-A,12900
|
254
263
|
sglang/srt/models/commandr.py,sha256=PNXgfOZF84h-rSH0edEECUmEGW8YLb44V75Z_oDhFiA,14223
|
255
|
-
sglang/srt/models/dbrx.py,sha256=
|
264
|
+
sglang/srt/models/dbrx.py,sha256=KwsiP6Bnz-lJGhksHgfPswnLC35hv2etHRKJgWkmvzs,14567
|
256
265
|
sglang/srt/models/deepseek.py,sha256=_cVOvR6eSEgRf6TUBpTD5uMdijDWFw4sSt4lGzl8tbg,15697
|
257
266
|
sglang/srt/models/deepseek_v2.py,sha256=vbRhgI8yD7EmHUpq5pzI_sVpGLnkeyJ7ew-3Pl6D8F4,38499
|
258
267
|
sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
|
@@ -262,10 +271,10 @@ sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb
|
|
262
271
|
sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
|
263
272
|
sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
|
264
273
|
sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,20409
|
265
|
-
sglang/srt/models/grok.py,sha256=
|
274
|
+
sglang/srt/models/grok.py,sha256=gIr6uFNLv42v-yjAko4w8uugAA7vE0396S23V98Aiu4,18002
|
266
275
|
sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
|
267
276
|
sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
|
268
|
-
sglang/srt/models/llama.py,sha256=
|
277
|
+
sglang/srt/models/llama.py,sha256=r9MwIsKv5SrwpLewdB_gqai1YDfjyG-2dlT_pYPNIac,22087
|
269
278
|
sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
|
270
279
|
sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
|
271
280
|
sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
|
@@ -283,7 +292,8 @@ sglang/srt/models/olmo2.py,sha256=aC7svioN7XT5owRxPrvhvWBNMON9QXGQBWJ1KHMyXeA,13
|
|
283
292
|
sglang/srt/models/olmoe.py,sha256=LiHVGfRaC5c_BU_vVgtV9uLuDH_SC0dw1kEc61posmI,15351
|
284
293
|
sglang/srt/models/phi3_small.py,sha256=44_my3QmgJ2N7SOkGZzEb62DXBeCVHojfmCWgkk2uCI,14802
|
285
294
|
sglang/srt/models/qwen.py,sha256=_FKDbwaS5C07uJyyivZpBrXJVej4Ph9ivzJdzWJPxJ4,9904
|
286
|
-
sglang/srt/models/qwen2.py,sha256=
|
295
|
+
sglang/srt/models/qwen2.py,sha256=aRumlGWYYUntMHR3LoOpeduelnzo9Ls0FXVwVKiL7tY,13332
|
296
|
+
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
287
297
|
sglang/srt/models/qwen2_moe.py,sha256=6xRRJxWWh1M5UFPfvhsCpY477zv-30AeSRJXsvOkgFc,16542
|
288
298
|
sglang/srt/models/qwen2_vl.py,sha256=3EaUlTbyWOTRXA7eViK1WqmVbCFhXLIpnos49zzf-yM,26561
|
289
299
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
@@ -294,17 +304,17 @@ sglang/srt/models/xverse_moe.py,sha256=7E60YIST4ELYwLRgjtHiLRI5Uyc7XqQTM7jQXiWaQ
|
|
294
304
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
295
305
|
sglang/srt/openai_api/adapter.py,sha256=Yv-rEA0Jd54iFlnkVy-OZM4EnPqkW_NLtDPGCiPWVWo,56386
|
296
306
|
sglang/srt/openai_api/protocol.py,sha256=v_YUwH1PF4vIVqSE5rj1ODdSglprTe_vGiXoS99cOV4,11613
|
297
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
298
|
-
sglang/srt/sampling/sampling_params.py,sha256=
|
307
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=BEcDjMlTQ6wRuvwwCjB-2cy6GMgS3dpmjG4xetBuI4Q,9637
|
308
|
+
sglang/srt/sampling/sampling_params.py,sha256=YdfObBzfkgK9rU2XY6_7kxl7H1wjtDGrinpyIszTGUw,5678
|
299
309
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
300
310
|
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
|
301
311
|
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
|
302
312
|
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
303
313
|
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
304
|
-
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=
|
314
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=vmE5muVz_ztRA6glgYOiQnKas_zTvQZ3nxcUEQao-L8,3070
|
305
315
|
sglang/srt/speculative/build_eagle_tree.py,sha256=SIKuOFUOIzMLyanL5vViPmFBEiUHm_ezwiGuIyLmauE,9886
|
306
|
-
sglang/srt/speculative/eagle_utils.py,sha256=
|
307
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
316
|
+
sglang/srt/speculative/eagle_utils.py,sha256=Z51xGuvn-ZIMp0OXENZUhpDOz8kTDkujhHZA-Z2MKbA,23422
|
317
|
+
sglang/srt/speculative/eagle_worker.py,sha256=P__BMJ0eKLaPzCS8jEWylk2POstue5u3RIVZeFtj84I,7843
|
308
318
|
sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
|
309
319
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
310
320
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
@@ -319,11 +329,11 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
319
329
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
320
330
|
sglang/test/test_block_fp8.py,sha256=rhrIun8aW5zq2qvuGRlo7F7aZ_upjVxtQMVlyc2Th_E,11771
|
321
331
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
322
|
-
sglang/test/test_programs.py,sha256=
|
323
|
-
sglang/test/test_utils.py,sha256=
|
332
|
+
sglang/test/test_programs.py,sha256=AABFLu0W9FlK-VN2wb2rLkwFCK6YCkLYrgQClymzpcw,18835
|
333
|
+
sglang/test/test_utils.py,sha256=3xUJpb-HNSwzoRZ_eVO_Q52m5pWlQMU84PXnsSzoD9g,24585
|
324
334
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
325
|
-
sglang-0.4.1.
|
326
|
-
sglang-0.4.1.
|
327
|
-
sglang-0.4.1.
|
328
|
-
sglang-0.4.1.
|
329
|
-
sglang-0.4.1.
|
335
|
+
sglang-0.4.1.post6.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
336
|
+
sglang-0.4.1.post6.dist-info/METADATA,sha256=hls-gahHEVIiMlj9JHUiKHzKkiUiS_J5_JACvVh6riM,22527
|
337
|
+
sglang-0.4.1.post6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
338
|
+
sglang-0.4.1.post6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
339
|
+
sglang-0.4.1.post6.dist-info/RECORD,,
|
File without changes
|
File without changes
|