sglang 0.4.0.post1__py3-none-any.whl → 0.4.0.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_offline_throughput.py +18 -6
- sglang/bench_one_batch.py +13 -0
- sglang/bench_serving.py +8 -1
- sglang/check_env.py +140 -48
- sglang/lang/backend/runtime_endpoint.py +1 -0
- sglang/lang/chat_template.py +32 -0
- sglang/llama3_eval.py +316 -0
- sglang/srt/constrained/xgrammar_backend.py +4 -1
- sglang/srt/layers/attention/flashinfer_backend.py +2 -0
- sglang/srt/layers/attention/triton_backend.py +16 -25
- sglang/srt/layers/attention/triton_ops/decode_attention.py +305 -350
- sglang/srt/layers/ep_moe/layer.py +4 -0
- sglang/srt/layers/fused_moe_triton/fused_moe.py +64 -21
- sglang/srt/layers/fused_moe_triton/layer.py +1 -1
- sglang/srt/layers/logits_processor.py +133 -95
- sglang/srt/layers/quantization/__init__.py +2 -47
- sglang/srt/layers/quantization/fp8.py +58 -10
- sglang/srt/layers/radix_attention.py +8 -1
- sglang/srt/layers/sampler.py +27 -5
- sglang/srt/layers/torchao_utils.py +35 -0
- sglang/srt/managers/detokenizer_manager.py +37 -17
- sglang/srt/managers/io_struct.py +39 -10
- sglang/srt/managers/schedule_batch.py +38 -24
- sglang/srt/managers/schedule_policy.py +64 -5
- sglang/srt/managers/scheduler.py +169 -134
- sglang/srt/managers/tokenizer_manager.py +99 -58
- sglang/srt/mem_cache/base_prefix_cache.py +2 -2
- sglang/srt/mem_cache/chunk_cache.py +2 -2
- sglang/srt/mem_cache/radix_cache.py +12 -2
- sglang/srt/model_executor/cuda_graph_runner.py +24 -10
- sglang/srt/model_executor/model_runner.py +22 -14
- sglang/srt/model_parallel.py +66 -5
- sglang/srt/models/gemma2.py +34 -0
- sglang/srt/models/gemma2_reward.py +0 -1
- sglang/srt/models/granite.py +517 -0
- sglang/srt/models/grok.py +72 -8
- sglang/srt/models/llama.py +22 -0
- sglang/srt/models/llama_classification.py +11 -23
- sglang/srt/models/llama_reward.py +0 -2
- sglang/srt/models/llava.py +37 -14
- sglang/srt/models/qwen2.py +20 -0
- sglang/srt/openai_api/adapter.py +4 -0
- sglang/srt/openai_api/protocol.py +9 -4
- sglang/srt/server.py +1 -1
- sglang/srt/server_args.py +19 -9
- sglang/srt/utils.py +7 -10
- sglang/test/test_utils.py +3 -2
- sglang/utils.py +10 -3
- sglang/version.py +1 -1
- {sglang-0.4.0.post1.dist-info → sglang-0.4.0.post2.dist-info}/METADATA +11 -6
- {sglang-0.4.0.post1.dist-info → sglang-0.4.0.post2.dist-info}/RECORD +54 -52
- {sglang-0.4.0.post1.dist-info → sglang-0.4.0.post2.dist-info}/LICENSE +0 -0
- {sglang-0.4.0.post1.dist-info → sglang-0.4.0.post2.dist-info}/WHEEL +0 -0
- {sglang-0.4.0.post1.dist-info → sglang-0.4.0.post2.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -568,6 +568,7 @@ def run_bench_serving(
|
|
568
568
|
disable_tqdm=False,
|
569
569
|
disable_stream=disable_stream,
|
570
570
|
disable_ignore_eos=False,
|
571
|
+
return_logprob=False,
|
571
572
|
lora_name=None,
|
572
573
|
extra_request_body=None,
|
573
574
|
profile=None,
|
@@ -719,13 +720,13 @@ def run_and_check_memory_leak(
|
|
719
720
|
|
720
721
|
# Clean up everything
|
721
722
|
kill_process_tree(process.pid)
|
722
|
-
kill_process_tree(process.pid)
|
723
723
|
stdout.close()
|
724
724
|
stderr.close()
|
725
725
|
if os.path.exists(STDOUT_FILENAME):
|
726
726
|
os.remove(STDOUT_FILENAME)
|
727
727
|
if os.path.exists(STDERR_FILENAME):
|
728
728
|
os.remove(STDERR_FILENAME)
|
729
|
+
kill_process_tree(process.pid)
|
729
730
|
t.join()
|
730
731
|
|
731
732
|
# Assert success
|
@@ -733,7 +734,7 @@ def run_and_check_memory_leak(
|
|
733
734
|
has_leak = False
|
734
735
|
has_abort = False
|
735
736
|
for line in output_lines:
|
736
|
-
if "
|
737
|
+
if "Uvicorn running" in line:
|
737
738
|
has_new_server = True
|
738
739
|
if "leak" in line:
|
739
740
|
has_leak = True
|
sglang/utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Common utilities
|
1
|
+
"""Common utilities"""
|
2
2
|
|
3
3
|
import base64
|
4
4
|
import gc
|
@@ -79,7 +79,14 @@ class HttpResponse:
|
|
79
79
|
return self.resp.status
|
80
80
|
|
81
81
|
|
82
|
-
def http_request(
|
82
|
+
def http_request(
|
83
|
+
url,
|
84
|
+
json=None,
|
85
|
+
stream=False,
|
86
|
+
api_key=None,
|
87
|
+
verify=None,
|
88
|
+
method: Optional[str] = None,
|
89
|
+
):
|
83
90
|
"""A faster version of requests.post with low-level urllib API."""
|
84
91
|
headers = {"Content-Type": "application/json; charset=utf-8"}
|
85
92
|
|
@@ -90,7 +97,7 @@ def http_request(url, json=None, stream=False, api_key=None, verify=None):
|
|
90
97
|
if stream:
|
91
98
|
return requests.post(url, json=json, stream=True, headers=headers)
|
92
99
|
else:
|
93
|
-
req = urllib.request.Request(url, headers=headers)
|
100
|
+
req = urllib.request.Request(url, headers=headers, method=method)
|
94
101
|
if json is None:
|
95
102
|
data = None
|
96
103
|
else:
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.0.
|
1
|
+
__version__ = "0.4.0.post2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.post2
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -215,6 +215,7 @@ Requires-Dist: requests
|
|
215
215
|
Requires-Dist: tqdm
|
216
216
|
Requires-Dist: numpy
|
217
217
|
Requires-Dist: IPython
|
218
|
+
Requires-Dist: setproctitle
|
218
219
|
Provides-Extra: runtime-common
|
219
220
|
Requires-Dist: aiohttp; extra == "runtime-common"
|
220
221
|
Requires-Dist: decord; extra == "runtime-common"
|
@@ -232,16 +233,17 @@ Requires-Dist: psutil; extra == "runtime-common"
|
|
232
233
|
Requires-Dist: pydantic; extra == "runtime-common"
|
233
234
|
Requires-Dist: python-multipart; extra == "runtime-common"
|
234
235
|
Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
235
|
-
Requires-Dist: torchao; extra == "runtime-common"
|
236
|
+
Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
|
237
|
+
Requires-Dist: gemlite; extra == "runtime-common"
|
236
238
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
237
239
|
Requires-Dist: uvloop; extra == "runtime-common"
|
238
|
-
Requires-Dist: xgrammar>=0.1.
|
240
|
+
Requires-Dist: xgrammar>=0.1.6; extra == "runtime-common"
|
239
241
|
Provides-Extra: srt
|
240
242
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
241
243
|
Requires-Dist: torch; extra == "srt"
|
242
244
|
Requires-Dist: vllm<=0.6.4.post1,>=0.6.3.post1; extra == "srt"
|
243
245
|
Requires-Dist: cuda-python; extra == "srt"
|
244
|
-
Requires-Dist: flashinfer
|
246
|
+
Requires-Dist: flashinfer==0.1.6; extra == "srt"
|
245
247
|
Provides-Extra: srt-hip
|
246
248
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
247
249
|
Requires-Dist: torch; extra == "srt-hip"
|
@@ -311,8 +313,11 @@ Requires-Dist: sglang[test]; extra == "dev-hpu"
|
|
311
313
|
|
312
314
|
--------------------------------------------------------------------------------
|
313
315
|
|
314
|
-
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
|
315
|
-
|
316
|
+
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
|
317
|
+
| [**Documentation**](https://sgl-project.github.io/)
|
318
|
+
| [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2tmmp6flg-89dOlJW2TjnBrTRk1I_~GA)
|
319
|
+
| [**Join Bi-Weekly Development Meeting**](https://docs.google.com/document/d/1xEow4eIM152xNcRxqZz9VEcOiTQo8-CEuuQ5qTmkt-E/edit?usp=sharing)
|
320
|
+
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
|
316
321
|
|
317
322
|
## News
|
318
323
|
- [2024/12] 🔥 SGLang v0.4: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
|
@@ -1,18 +1,19 @@
|
|
1
1
|
sglang/__init__.py,sha256=b2oIdWzp5P8SzieeOs2TzJoN3Do3tfJbV8gZS_imVcs,1619
|
2
2
|
sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
|
3
3
|
sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
4
|
-
sglang/bench_offline_throughput.py,sha256=
|
5
|
-
sglang/bench_one_batch.py,sha256=
|
4
|
+
sglang/bench_offline_throughput.py,sha256=rgMWDhA1Hai0gKBzxc0dzTWfI8l39Cyw2VOCyMt1YyY,12771
|
5
|
+
sglang/bench_one_batch.py,sha256=aF0onHeRjy7AYVjsq1IA3rZEhUuYXuslg1fAhuvJ2yo,16120
|
6
6
|
sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
|
7
|
-
sglang/bench_serving.py,sha256=
|
8
|
-
sglang/check_env.py,sha256=
|
7
|
+
sglang/bench_serving.py,sha256=zv_EcbWno79j7WYFL2m6BfCLT6iSOfGV4uwGbDg9KQA,53141
|
8
|
+
sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
|
9
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
10
|
sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
|
11
11
|
sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
|
12
|
-
sglang/
|
13
|
-
sglang/
|
12
|
+
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
13
|
+
sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
|
14
|
+
sglang/version.py,sha256=OUNovuQ1RrdJFYetl0e0U0556H_wiyjhVks9-l-zF94,28
|
14
15
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
sglang/lang/chat_template.py,sha256=
|
16
|
+
sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
|
16
17
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
18
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
19
|
sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
|
@@ -23,16 +24,16 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
|
|
23
24
|
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
24
25
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
25
26
|
sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
|
26
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=
|
27
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=dfs-yZ1ekKmnbpZLluQHWPmMeZJKbaaZRRGYRa9eBE8,10541
|
27
28
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
28
29
|
sglang/srt/_custom_ops.py,sha256=Y4gyTDGhWz-W2Igq25Ojm8XFiyvkawW9I-79iwYvxJ0,3574
|
29
30
|
sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21201
|
30
31
|
sglang/srt/hf_transformers_utils.py,sha256=38Ms0H2-VMerOS6jnczcFtZMS6lhw9B5rSWKAfxVUfQ,7945
|
31
32
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
32
|
-
sglang/srt/model_parallel.py,sha256=
|
33
|
-
sglang/srt/server.py,sha256=
|
34
|
-
sglang/srt/server_args.py,sha256=
|
35
|
-
sglang/srt/utils.py,sha256=
|
33
|
+
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
34
|
+
sglang/srt/server.py,sha256=tEciMH_U6WIZYPUGDDM0c4BQ16cvgVdA4II-ksPZoMo,34621
|
35
|
+
sglang/srt/server_args.py,sha256=LgnQ-kBJZ3E7hMMZj9bSK0mn7Bhjk1nJHxLcxl-lGTM,34572
|
36
|
+
sglang/srt/utils.py,sha256=WWEcMJHmvlOjiqE9UicT0ZYwa2PUKDZorAk2Y8PPRBI,42039
|
36
37
|
sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
|
37
38
|
sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
|
38
39
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
@@ -43,7 +44,7 @@ sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO
|
|
43
44
|
sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
|
44
45
|
sglang/srt/constrained/outlines_backend.py,sha256=CipNHNNXs8xtnJNVNe6FCwZUlSbIXbGmWVlZz3hUpFQ,6820
|
45
46
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
46
|
-
sglang/srt/constrained/xgrammar_backend.py,sha256=
|
47
|
+
sglang/srt/constrained/xgrammar_backend.py,sha256=4It9_GqU4UZFhxIw_7hkzpXaMPUtksk6Xfe0Agsfw7A,4620
|
47
48
|
sglang/srt/distributed/__init__.py,sha256=__tl9Frrf3PFrSyNYcn5i-y2rL-J4-Qn6RJwrsZ4xgc,83
|
48
49
|
sglang/srt/distributed/communication_op.py,sha256=ZoIhboZyefiAwr-1K-wF3rAFSQ4Wt-RxXpsX443Gbt4,1157
|
49
50
|
sglang/srt/distributed/parallel_state.py,sha256=HplRH5S0AWdwSdhoHYX9_UWQZlFjh2Z1LHaz68EXlpE,47555
|
@@ -62,56 +63,56 @@ sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDv
|
|
62
63
|
sglang/srt/layers/fused_moe_patch.py,sha256=DMIyrwOON7OSidKZdreL5HzMhP0AD5Ues0xdY-ADOQw,4471
|
63
64
|
sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
|
64
65
|
sglang/srt/layers/linear.py,sha256=dF2HvqiMbhWlCjvkLFRCcgUFGhG-B0keM_CIpjvgTtg,46154
|
65
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
66
|
+
sglang/srt/layers/logits_processor.py,sha256=JlOU0x8vBGIuTwHSdjR6Kly9_uzilBMv0NE_rvUx0W4,14747
|
66
67
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
67
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
68
|
+
sglang/srt/layers/radix_attention.py,sha256=E4cmvkcCdCtb6VyLNrCKy1D6VwHQ063oH3JQXPaRy6w,2178
|
68
69
|
sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
|
69
|
-
sglang/srt/layers/sampler.py,sha256=
|
70
|
-
sglang/srt/layers/torchao_utils.py,sha256=
|
70
|
+
sglang/srt/layers/sampler.py,sha256=k4Op_HMkQfT7t9wgQwBVotfTUXEocrzRyQqEFnff1pc,5511
|
71
|
+
sglang/srt/layers/torchao_utils.py,sha256=07Fe2Csdh1JiQKPGGHWkbq0-a6bV7Cq136ygdtVAhgI,3708
|
71
72
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=slGwLiWjuFLCUdRe-GTlfumyZpqVX9VF6No_UGOT-hA,21624
|
72
73
|
sglang/srt/layers/attention/__init__.py,sha256=KIJhzOJWYioQE7Va4D83-V-ZUZVMZcczuNgDC3dlSRo,2583
|
73
74
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=RQdEKRykSLf9ilnaHmR6T7RFqh4emH_adfB3aJN2BUU,10920
|
74
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
75
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=NgeigL1WiPOuOry0Gbxv-6HEcERB8Du0mBJgYcTVIAA,24943
|
75
76
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=nQdeqWEMMH_wrod5wssDCJG-uPKm0uslvkALKqPRPQ8,10509
|
76
|
-
sglang/srt/layers/attention/triton_backend.py,sha256
|
77
|
-
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=
|
77
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=-TobyZHwlbJ5HhbFg-jgCqVOw4Y-opgEuFo-EusASQc,6264
|
78
|
+
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=oJ_UK1t229zF3hbTDiQe7t-X-IbM2dOxx4U2ch-vmjA,17847
|
78
79
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
79
80
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=tZJhzqcf1KKMT8z7_32eVk_D1NHP71c-S3UNxemfAHM,11542
|
80
81
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
|
81
82
|
sglang/srt/layers/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
82
83
|
sglang/srt/layers/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
|
83
|
-
sglang/srt/layers/ep_moe/layer.py,sha256=
|
84
|
+
sglang/srt/layers/ep_moe/layer.py,sha256=uMropMhU-MaycoxSLxcfD0jZC_cuL_boRbIu86mbZjY,23034
|
84
85
|
sglang/srt/layers/fused_moe_triton/__init__.py,sha256=PHKFqd2hPOO-g9kSMseg2g76lpg9OGXQDThWU6bt9vs,902
|
85
|
-
sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=
|
86
|
-
sglang/srt/layers/fused_moe_triton/layer.py,sha256=
|
87
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
86
|
+
sglang/srt/layers/fused_moe_triton/fused_moe.py,sha256=fLGmkY6imJYjEw9-3-jJthkMcFGMBcu9HCNIuxAzMhE,29625
|
87
|
+
sglang/srt/layers/fused_moe_triton/layer.py,sha256=eMpbZlP3FAQxbHochis7ybZ-fsNBP0PzKF1PN0Xo7so,21517
|
88
|
+
sglang/srt/layers/quantization/__init__.py,sha256=FgNy_zNWMWnq3lEGyCSyfLSQtcZtWlq99JilkmEDW7I,4594
|
88
89
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
89
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
90
|
+
sglang/srt/layers/quantization/fp8.py,sha256=3oIUPaD0PBXQyTKr44I0YJ8XXDdwyoS_-ZA97XdSxXE,24143
|
90
91
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=eJDLLDu8ZbrbE3BfFIf89JlIMPOP-14DesbeVsajW0Q,1035
|
91
92
|
sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
|
92
93
|
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
93
94
|
sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
|
94
95
|
sglang/srt/managers/data_parallel_controller.py,sha256=psI4FAuBGjtdnEuwagnGdtRqvqSSxOROfNKQqVDqlVA,8382
|
95
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
96
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=nZkbwt4yty_oy8rvg4T7PbgyVLoBLohvHl25xlQpBoo,8439
|
96
97
|
sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
|
97
|
-
sglang/srt/managers/io_struct.py,sha256=
|
98
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
99
|
-
sglang/srt/managers/schedule_policy.py,sha256=
|
100
|
-
sglang/srt/managers/scheduler.py,sha256=
|
98
|
+
sglang/srt/managers/io_struct.py,sha256=_LWWqT3LNwZGaWhg2d3kTg1V2MTHKzRasCvxF9Nfpi4,15429
|
99
|
+
sglang/srt/managers/schedule_batch.py,sha256=SAd7sxhoC3Bp8_xd-TEcXEFZBlGZPbn8-wMvBcjU55Q,45607
|
100
|
+
sglang/srt/managers/schedule_policy.py,sha256=cLNi__smbg02keWgUMfB_nEM3vllocPB0XyG1P5qO7I,15469
|
101
|
+
sglang/srt/managers/scheduler.py,sha256=QlcVMtrLlNcBOkVISdO556jrK8a4LE4ULskC0oCH2IQ,61776
|
101
102
|
sglang/srt/managers/session_controller.py,sha256=Yp-IV3rXczACZxZXmF-QxW9CWICGy8KHQ9ttBGJ8WXA,2800
|
102
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
103
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=gnCCdB5XDobOoBKptwv-o0yYqDkMUxL78s0zBno5lM4,31219
|
103
104
|
sglang/srt/managers/tp_worker.py,sha256=X1EwFX3FSsmXx7jeeX2tjZRocaujabQYWm-M-0CFEBE,7363
|
104
105
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=-QNBJRKxraa9Xt2WI1AFzZYdneIJ1eXv0GjFzDqXoE0,8926
|
105
|
-
sglang/srt/mem_cache/base_prefix_cache.py,sha256=
|
106
|
-
sglang/srt/mem_cache/chunk_cache.py,sha256=
|
106
|
+
sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
|
107
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
|
107
108
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
108
109
|
sglang/srt/mem_cache/memory_pool.py,sha256=l9_srwXEfIIDF46nxykbHIOo1VSvU5_Ew3H0r5EC7Fo,11072
|
109
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=
|
110
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
|
110
111
|
sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
|
111
112
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
112
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
113
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=kZ3nV03MD8EQYQB38u4_88_wyW4unECxAdMVICpPyuk,16241
|
113
114
|
sglang/srt/model_executor/forward_batch_info.py,sha256=L5mVoW5SaO6To-7nGk0TZM-FFB5_78cARpJ-aC2rwD0,12883
|
114
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
115
|
+
sglang/srt/model_executor/model_runner.py,sha256=MLYBcYIQihu2I3PBTUghiU2mSWsDMzlKzcnX7yHa9JU,29837
|
115
116
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
116
117
|
sglang/srt/model_loader/loader.py,sha256=VBrY4W9CiVvS_D8yXhdkW9jReV9rSMSkJplabz0Fxgk,43528
|
117
118
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
@@ -124,18 +125,19 @@ sglang/srt/models/deepseek.py,sha256=BVNICGoLjQoHmR5lc31YrZ6YbxSRTBilHqlLsALr2u8
|
|
124
125
|
sglang/srt/models/deepseek_v2.py,sha256=YKSrqagVcSUwCAi-rwIph-Xu12GrNETMNKxgnffWod8,35349
|
125
126
|
sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
|
126
127
|
sglang/srt/models/gemma.py,sha256=ydRqsG-7004r1fAiz01LHUmcj_6XN0Tn4xO1keJnMQk,12126
|
127
|
-
sglang/srt/models/gemma2.py,sha256=
|
128
|
-
sglang/srt/models/gemma2_reward.py,sha256=
|
128
|
+
sglang/srt/models/gemma2.py,sha256=41PlW8pMb4rMETdAni_JWDhZeIn_QsTQireAyUjsURA,15848
|
129
|
+
sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb-_Hq8,2494
|
129
130
|
sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
|
130
131
|
sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
|
131
|
-
sglang/srt/models/
|
132
|
+
sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,20409
|
133
|
+
sglang/srt/models/grok.py,sha256=UWvVEYfEoH0jGNFSbXpO66OGW5pzmIHlNKcn9gRZEoQ,15664
|
132
134
|
sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
|
133
135
|
sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
|
134
|
-
sglang/srt/models/llama.py,sha256=
|
135
|
-
sglang/srt/models/llama_classification.py,sha256=
|
136
|
+
sglang/srt/models/llama.py,sha256=S7nS05hhFGghXu0v-w9RZyBTY6OCEVF5Aaw4GX_E_9g,19929
|
137
|
+
sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
|
136
138
|
sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
|
137
|
-
sglang/srt/models/llama_reward.py,sha256=
|
138
|
-
sglang/srt/models/llava.py,sha256=
|
139
|
+
sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
|
140
|
+
sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
|
139
141
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
140
142
|
sglang/srt/models/minicpm.py,sha256=ws4AqhOfAvYHGd04QuXCZel-Oxy9_vN4p4rTjs9RSz0,13723
|
141
143
|
sglang/srt/models/minicpm3.py,sha256=YIKJDTpwjmpLlv1sNT93k2yZMvGQlI_H87czjf6QYyo,24707
|
@@ -148,7 +150,7 @@ sglang/srt/models/olmo2.py,sha256=aC7svioN7XT5owRxPrvhvWBNMON9QXGQBWJ1KHMyXeA,13
|
|
148
150
|
sglang/srt/models/olmoe.py,sha256=Rw-3YrHWd90MZQFnmcfUQ-3wAaI0PCFKb0DIrCDND3s,15347
|
149
151
|
sglang/srt/models/phi3_small.py,sha256=44_my3QmgJ2N7SOkGZzEb62DXBeCVHojfmCWgkk2uCI,14802
|
150
152
|
sglang/srt/models/qwen.py,sha256=_FKDbwaS5C07uJyyivZpBrXJVej4Ph9ivzJdzWJPxJ4,9904
|
151
|
-
sglang/srt/models/qwen2.py,sha256=
|
153
|
+
sglang/srt/models/qwen2.py,sha256=be4xgcuqNa9kBdaL7x3PjsnUky6fh5K33c_khAWSi04,12959
|
152
154
|
sglang/srt/models/qwen2_moe.py,sha256=rYUk_vZW3ftKIIlqPvJZ1K-6oZ_PfGspixh1zm2Y8C8,16538
|
153
155
|
sglang/srt/models/qwen2_vl.py,sha256=3EaUlTbyWOTRXA7eViK1WqmVbCFhXLIpnos49zzf-yM,26561
|
154
156
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
@@ -157,8 +159,8 @@ sglang/srt/models/torch_native_llama.py,sha256=YeXHorFm6QfnczLXwPb5TG9a-He0uiA9R
|
|
157
159
|
sglang/srt/models/xverse.py,sha256=Oq--KqvbYu2H4TMVGEHpSnJLEwXBpxlncR9ilsQeckc,13579
|
158
160
|
sglang/srt/models/xverse_moe.py,sha256=AawKEQw--oAl-yzwCjoaZRG7q3rdkyDiam3FS0zjf_c,15537
|
159
161
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
160
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
161
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
162
|
+
sglang/srt/openai_api/adapter.py,sha256=dvKq4O3Rhd77ad6iCtPNykgnk9PVJE-E8wHVsBAfCQQ,53927
|
163
|
+
sglang/srt/openai_api/protocol.py,sha256=ecRNNqkhwwKZaIoJlPhtp2VTcHxBJDbNN8lrKS7uBx8,10406
|
162
164
|
sglang/srt/sampling/sampling_batch_info.py,sha256=s--zNjk-LErZ5lMqnZ7KiuJltaziKRbQAU5qYpKIxAc,8564
|
163
165
|
sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
|
164
166
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
@@ -180,10 +182,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
180
182
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
181
183
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
182
184
|
sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
|
183
|
-
sglang/test/test_utils.py,sha256=
|
185
|
+
sglang/test/test_utils.py,sha256=HJG7kUQOk6n9FBbH89PDtQ41C3kt1cfJODhAEcFT0AQ,23823
|
184
186
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
185
|
-
sglang-0.4.0.
|
186
|
-
sglang-0.4.0.
|
187
|
-
sglang-0.4.0.
|
188
|
-
sglang-0.4.0.
|
189
|
-
sglang-0.4.0.
|
187
|
+
sglang-0.4.0.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
188
|
+
sglang-0.4.0.post2.dist-info/METADATA,sha256=maHXecD3U1DdhzfU2aBMhN96MQRqCBPsIA1KlO7t7dg,22512
|
189
|
+
sglang-0.4.0.post2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
190
|
+
sglang-0.4.0.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
191
|
+
sglang-0.4.0.post2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|