sglang 0.3.5.post2__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +1 -553
- sglang/bench_offline_throughput.py +48 -20
- sglang/bench_one_batch.py +474 -0
- sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
- sglang/bench_serving.py +71 -1
- sglang/check_env.py +3 -6
- sglang/srt/constrained/outlines_backend.py +15 -2
- sglang/srt/constrained/xgrammar_backend.py +22 -14
- sglang/srt/layers/activation.py +3 -0
- sglang/srt/layers/attention/flashinfer_backend.py +93 -48
- sglang/srt/layers/attention/triton_backend.py +9 -7
- sglang/srt/layers/custom_op_util.py +26 -0
- sglang/srt/layers/fused_moe/fused_moe.py +11 -4
- sglang/srt/layers/layernorm.py +4 -0
- sglang/srt/layers/logits_processor.py +10 -10
- sglang/srt/layers/sampler.py +4 -8
- sglang/srt/layers/torchao_utils.py +2 -0
- sglang/srt/managers/data_parallel_controller.py +74 -9
- sglang/srt/managers/detokenizer_manager.py +1 -0
- sglang/srt/managers/io_struct.py +27 -0
- sglang/srt/managers/schedule_batch.py +104 -38
- sglang/srt/managers/schedule_policy.py +5 -1
- sglang/srt/managers/scheduler.py +204 -54
- sglang/srt/managers/session_controller.py +62 -0
- sglang/srt/managers/tokenizer_manager.py +38 -0
- sglang/srt/managers/tp_worker.py +12 -1
- sglang/srt/managers/tp_worker_overlap_thread.py +49 -52
- sglang/srt/model_executor/cuda_graph_runner.py +43 -6
- sglang/srt/model_executor/forward_batch_info.py +109 -15
- sglang/srt/model_executor/model_runner.py +99 -43
- sglang/srt/model_parallel.py +98 -0
- sglang/srt/models/deepseek_v2.py +147 -44
- sglang/srt/models/gemma2.py +9 -8
- sglang/srt/models/llava.py +1 -1
- sglang/srt/models/llavavid.py +1 -1
- sglang/srt/models/olmo.py +3 -3
- sglang/srt/models/phi3_small.py +447 -0
- sglang/srt/models/qwen2_vl.py +13 -6
- sglang/srt/models/torch_native_llama.py +94 -78
- sglang/srt/openai_api/adapter.py +6 -2
- sglang/srt/openai_api/protocol.py +1 -1
- sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
- sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
- sglang/srt/sampling/sampling_batch_info.py +58 -57
- sglang/srt/sampling/sampling_params.py +1 -1
- sglang/srt/server.py +27 -1
- sglang/srt/server_args.py +78 -62
- sglang/srt/utils.py +71 -52
- sglang/test/runners.py +25 -6
- sglang/test/srt/sampling/penaltylib/utils.py +23 -21
- sglang/test/test_utils.py +30 -19
- sglang/version.py +1 -1
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/METADATA +43 -43
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/RECORD +60 -55
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/WHEEL +1 -1
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/LICENSE +0 -0
- {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/top_level.txt +0 -0
sglang/test/runners.py
CHANGED
@@ -58,6 +58,28 @@ def get_top_logprobs(logits, k):
|
|
58
58
|
return logprobs
|
59
59
|
|
60
60
|
|
61
|
+
def _get_sentence_transformer_embedding_model(model_path, torch_dtype):
|
62
|
+
from sentence_transformers import SentenceTransformer
|
63
|
+
from sentence_transformers.util import is_sentence_transformer_model
|
64
|
+
|
65
|
+
if is_sentence_transformer_model(model_path):
|
66
|
+
model = SentenceTransformer(
|
67
|
+
model_path,
|
68
|
+
model_kwargs={"torch_dtype": torch_dtype},
|
69
|
+
)
|
70
|
+
else: # if no pre-trained sentence-transformers model
|
71
|
+
from sentence_transformers import models
|
72
|
+
|
73
|
+
word_embedding_model = models.Transformer(model_path).to(dtype=torch_dtype)
|
74
|
+
pooling_model = models.Pooling(
|
75
|
+
word_embedding_model.get_word_embedding_dimension(),
|
76
|
+
pooling_mode="lasttoken",
|
77
|
+
)
|
78
|
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
79
|
+
|
80
|
+
return model.cuda()
|
81
|
+
|
82
|
+
|
61
83
|
@dataclass
|
62
84
|
class ModelOutput:
|
63
85
|
output_strs: List[str] = None
|
@@ -114,12 +136,9 @@ class HFRunner:
|
|
114
136
|
low_cpu_mem_usage=True,
|
115
137
|
).cuda()
|
116
138
|
elif self.model_type == "embedding":
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
model_path,
|
121
|
-
model_kwargs={"torch_dtype": torch_dtype},
|
122
|
-
).cuda()
|
139
|
+
self.model = _get_sentence_transformer_embedding_model(
|
140
|
+
model_path, torch_dtype
|
141
|
+
)
|
123
142
|
elif self.model_type == "reward":
|
124
143
|
from transformers import AutoModelForSequenceClassification
|
125
144
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import dataclasses
|
2
2
|
import enum
|
3
|
-
import typing
|
4
3
|
import unittest
|
4
|
+
from typing import Dict, List, Optional, Set, Tuple, Type
|
5
5
|
|
6
6
|
import torch
|
7
7
|
|
@@ -16,7 +16,7 @@ from sglang.srt.sampling.penaltylib.orchestrator import (
|
|
16
16
|
class MockSamplingParams:
|
17
17
|
frequency_penalty: float = 0.0
|
18
18
|
min_new_tokens: int = 0
|
19
|
-
stop_token_ids:
|
19
|
+
stop_token_ids: List[int] = None
|
20
20
|
presence_penalty: float = 0.0
|
21
21
|
repetition_penalty: float = 1.0
|
22
22
|
|
@@ -24,12 +24,12 @@ class MockSamplingParams:
|
|
24
24
|
@dataclasses.dataclass
|
25
25
|
class MockTokenizer:
|
26
26
|
eos_token_id: int
|
27
|
-
additional_stop_token_ids:
|
27
|
+
additional_stop_token_ids: Optional[List[int]] = None
|
28
28
|
|
29
29
|
|
30
30
|
@dataclasses.dataclass
|
31
31
|
class MockReq:
|
32
|
-
origin_input_ids:
|
32
|
+
origin_input_ids: List[int]
|
33
33
|
sampling_params: MockSamplingParams
|
34
34
|
tokenizer: MockTokenizer
|
35
35
|
|
@@ -42,8 +42,8 @@ class StepType(enum.Enum):
|
|
42
42
|
@dataclasses.dataclass
|
43
43
|
class Step:
|
44
44
|
type: StepType
|
45
|
-
token_ids:
|
46
|
-
expected_tensors:
|
45
|
+
token_ids: List[int]
|
46
|
+
expected_tensors: Dict[str, torch.Tensor]
|
47
47
|
# assume initial logits are all 1
|
48
48
|
expected_logits: torch.Tensor
|
49
49
|
|
@@ -52,7 +52,7 @@ class Step:
|
|
52
52
|
class Subject:
|
53
53
|
sampling_params: MockSamplingParams
|
54
54
|
# first step must be input, which will be converted to Req
|
55
|
-
steps:
|
55
|
+
steps: List[Step]
|
56
56
|
eos_token_id: int = -1
|
57
57
|
|
58
58
|
def __post_init__(self):
|
@@ -66,7 +66,7 @@ class Subject:
|
|
66
66
|
f"Expected tensors keys must be the same for all steps. Got {self.steps[i].expected_tensors.keys()} for key={i} and {self.steps[0].expected_tensors.keys()}"
|
67
67
|
)
|
68
68
|
|
69
|
-
def tensor_keys(self, i: int = 0) ->
|
69
|
+
def tensor_keys(self, i: int = 0) -> Set[str]:
|
70
70
|
return set(self.steps[i].expected_tensors.keys())
|
71
71
|
|
72
72
|
def to_req(self) -> MockReq:
|
@@ -80,7 +80,7 @@ class Subject:
|
|
80
80
|
@dataclasses.dataclass
|
81
81
|
class Case:
|
82
82
|
enabled: bool
|
83
|
-
test_subjects:
|
83
|
+
test_subjects: List[Subject]
|
84
84
|
|
85
85
|
def __post_init__(self):
|
86
86
|
# each test_subjects.steps should have the same expected_tensors.keys()
|
@@ -90,12 +90,12 @@ class Case:
|
|
90
90
|
f"Expected tensors keys must be the same for all test_subjects. Got {self.test_subjects[i].tensor_keys()} for key={i} and {self.test_subjects[0].tensor_keys()}"
|
91
91
|
)
|
92
92
|
|
93
|
-
def tensor_keys(self, i: int = 0) ->
|
93
|
+
def tensor_keys(self, i: int = 0) -> List[str]:
|
94
94
|
return set(self.test_subjects[i].tensor_keys())
|
95
95
|
|
96
96
|
|
97
97
|
class BaseBatchedPenalizerTest(unittest.TestCase):
|
98
|
-
Penalizer:
|
98
|
+
Penalizer: Type[_BatchedPenalizer]
|
99
99
|
device = "cuda"
|
100
100
|
vocab_size = 5
|
101
101
|
|
@@ -115,7 +115,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
115
115
|
"""
|
116
116
|
return torch.tensor(data, **kwargs, device=self.device)
|
117
117
|
|
118
|
-
def create_test_subjects(self) ->
|
118
|
+
def create_test_subjects(self) -> List[Subject]:
|
119
119
|
raise NotImplementedError()
|
120
120
|
|
121
121
|
def create_test_cases(self):
|
@@ -127,7 +127,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
127
127
|
|
128
128
|
def _create_penalizer(
|
129
129
|
self, case: Case
|
130
|
-
) ->
|
130
|
+
) -> Tuple[BatchedPenalizerOrchestrator, _BatchedPenalizer]:
|
131
131
|
orchestrator = BatchedPenalizerOrchestrator(
|
132
132
|
vocab_size=self.vocab_size,
|
133
133
|
batch=_BatchLike(reqs=[subject.to_req() for subject in case.test_subjects]),
|
@@ -287,22 +287,24 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
287
287
|
if i < len(subject.steps)
|
288
288
|
]
|
289
289
|
|
290
|
-
inputs:
|
291
|
-
outputs:
|
290
|
+
inputs: List[List[int]] = []
|
291
|
+
outputs: List[List[int]] = []
|
292
292
|
for subject in filtered_subjects:
|
293
293
|
step = subject.steps[i]
|
294
294
|
if step.type == StepType.INPUT:
|
295
|
-
|
296
|
-
outputs.append([])
|
295
|
+
raise NotImplementedError()
|
297
296
|
else:
|
298
297
|
inputs.append([])
|
299
298
|
outputs.append(step.token_ids)
|
300
299
|
|
301
|
-
if any(inputs):
|
302
|
-
orchestrator.cumulate_input_tokens(inputs)
|
303
|
-
|
304
300
|
if any(outputs):
|
305
|
-
|
301
|
+
for j in range(max(len(x) for x in outputs)):
|
302
|
+
tmp_outputs = torch.tensor(
|
303
|
+
[x[j] for x in outputs],
|
304
|
+
dtype=torch.int32,
|
305
|
+
device=orchestrator.device,
|
306
|
+
)
|
307
|
+
orchestrator.cumulate_output_tokens(tmp_outputs)
|
306
308
|
|
307
309
|
if penalizer.is_required():
|
308
310
|
self.assertTrue(penalizer.is_prepared())
|
sglang/test/test_utils.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import asyncio
|
5
|
+
import copy
|
5
6
|
import os
|
6
7
|
import random
|
7
8
|
import subprocess
|
@@ -438,18 +439,22 @@ def popen_launch_server(
|
|
438
439
|
process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
|
439
440
|
|
440
441
|
start_time = time.time()
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
442
|
+
with requests.Session() as session:
|
443
|
+
while time.time() - start_time < timeout:
|
444
|
+
try:
|
445
|
+
headers = {
|
446
|
+
"Content-Type": "application/json; charset=utf-8",
|
447
|
+
"Authorization": f"Bearer {api_key}",
|
448
|
+
}
|
449
|
+
response = session.get(
|
450
|
+
f"{base_url}/health_generate",
|
451
|
+
headers=headers,
|
452
|
+
)
|
453
|
+
if response.status_code == 200:
|
454
|
+
return process
|
455
|
+
except requests.RequestException:
|
456
|
+
pass
|
457
|
+
time.sleep(10)
|
453
458
|
raise TimeoutError("Server failed to start within the timeout period.")
|
454
459
|
|
455
460
|
|
@@ -529,6 +534,7 @@ def run_bench_serving(
|
|
529
534
|
random_input_len=4096,
|
530
535
|
random_output_len=2048,
|
531
536
|
disable_stream=False,
|
537
|
+
need_warmup=False,
|
532
538
|
):
|
533
539
|
# Launch the server
|
534
540
|
base_url = DEFAULT_URL_FOR_TEST
|
@@ -562,9 +568,14 @@ def run_bench_serving(
|
|
562
568
|
disable_stream=disable_stream,
|
563
569
|
disable_ignore_eos=False,
|
564
570
|
extra_request_body=None,
|
571
|
+
profile=None,
|
565
572
|
)
|
566
573
|
|
567
574
|
try:
|
575
|
+
if need_warmup:
|
576
|
+
warmup_args = copy.deepcopy(args)
|
577
|
+
warmup_args.num_prompts = 16
|
578
|
+
run_benchmark(warmup_args)
|
568
579
|
res = run_benchmark(args)
|
569
580
|
finally:
|
570
581
|
kill_child_process(process.pid, include_self=True)
|
@@ -573,11 +584,11 @@ def run_bench_serving(
|
|
573
584
|
return res
|
574
585
|
|
575
586
|
|
576
|
-
def
|
587
|
+
def run_bench_one_batch(model, other_args):
|
577
588
|
command = [
|
578
589
|
"python3",
|
579
590
|
"-m",
|
580
|
-
"sglang.
|
591
|
+
"sglang.bench_one_batch",
|
581
592
|
"--model-path",
|
582
593
|
model,
|
583
594
|
"--batch-size",
|
@@ -664,7 +675,7 @@ def run_and_check_memory_leak(
|
|
664
675
|
workload_func,
|
665
676
|
disable_radix_cache,
|
666
677
|
enable_mixed_chunk,
|
667
|
-
|
678
|
+
disable_overlap,
|
668
679
|
chunked_prefill_size,
|
669
680
|
):
|
670
681
|
other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
|
@@ -672,8 +683,8 @@ def run_and_check_memory_leak(
|
|
672
683
|
other_args += ["--disable-radix-cache"]
|
673
684
|
if enable_mixed_chunk:
|
674
685
|
other_args += ["--enable-mixed-chunk"]
|
675
|
-
if
|
676
|
-
other_args += ["--
|
686
|
+
if disable_overlap:
|
687
|
+
other_args += ["--disable-overlap-schedule"]
|
677
688
|
|
678
689
|
model = DEFAULT_MODEL_NAME_FOR_TEST
|
679
690
|
port = random.randint(4000, 5000)
|
@@ -725,7 +736,7 @@ def run_and_check_memory_leak(
|
|
725
736
|
def run_mmlu_test(
|
726
737
|
disable_radix_cache=False,
|
727
738
|
enable_mixed_chunk=False,
|
728
|
-
|
739
|
+
disable_overlap=False,
|
729
740
|
chunked_prefill_size=32,
|
730
741
|
):
|
731
742
|
def workload_func(base_url, model):
|
@@ -748,7 +759,7 @@ def run_mmlu_test(
|
|
748
759
|
workload_func,
|
749
760
|
disable_radix_cache,
|
750
761
|
enable_mixed_chunk,
|
751
|
-
|
762
|
+
disable_overlap,
|
752
763
|
chunked_prefill_size,
|
753
764
|
)
|
754
765
|
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.3.
|
1
|
+
__version__ = "0.3.6"
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.6
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
|
-
License:
|
5
|
+
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
7
7
|
http://www.apache.org/licenses/
|
8
8
|
|
@@ -215,74 +215,74 @@ Requires-Dist: requests
|
|
215
215
|
Requires-Dist: tqdm
|
216
216
|
Requires-Dist: numpy
|
217
217
|
Requires-Dist: IPython
|
218
|
-
Provides-Extra:
|
219
|
-
Requires-Dist: sglang[srt]; extra == "all"
|
220
|
-
Requires-Dist: sglang[openai]; extra == "all"
|
221
|
-
Requires-Dist: sglang[anthropic]; extra == "all"
|
222
|
-
Requires-Dist: sglang[litellm]; extra == "all"
|
223
|
-
Provides-Extra: all_hip
|
224
|
-
Requires-Dist: sglang[srt_hip]; extra == "all-hip"
|
225
|
-
Requires-Dist: sglang[openai]; extra == "all-hip"
|
226
|
-
Requires-Dist: sglang[anthropic]; extra == "all-hip"
|
227
|
-
Requires-Dist: sglang[litellm]; extra == "all-hip"
|
228
|
-
Provides-Extra: all_xpu
|
229
|
-
Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
|
230
|
-
Requires-Dist: sglang[openai]; extra == "all-xpu"
|
231
|
-
Requires-Dist: sglang[anthropic]; extra == "all-xpu"
|
232
|
-
Requires-Dist: sglang[litellm]; extra == "all-xpu"
|
233
|
-
Provides-Extra: anthropic
|
234
|
-
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
235
|
-
Provides-Extra: dev
|
236
|
-
Requires-Dist: sglang[all]; extra == "dev"
|
237
|
-
Requires-Dist: sglang[test]; extra == "dev"
|
238
|
-
Provides-Extra: dev_hip
|
239
|
-
Requires-Dist: sglang[all_hip]; extra == "dev-hip"
|
240
|
-
Requires-Dist: sglang[test]; extra == "dev-hip"
|
241
|
-
Provides-Extra: dev_xpu
|
242
|
-
Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
|
243
|
-
Requires-Dist: sglang[test]; extra == "dev-xpu"
|
244
|
-
Provides-Extra: litellm
|
245
|
-
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
246
|
-
Provides-Extra: openai
|
247
|
-
Requires-Dist: openai>=1.0; extra == "openai"
|
248
|
-
Requires-Dist: tiktoken; extra == "openai"
|
249
|
-
Provides-Extra: runtime_common
|
218
|
+
Provides-Extra: runtime-common
|
250
219
|
Requires-Dist: aiohttp; extra == "runtime-common"
|
251
220
|
Requires-Dist: decord; extra == "runtime-common"
|
252
221
|
Requires-Dist: fastapi; extra == "runtime-common"
|
253
|
-
Requires-Dist:
|
254
|
-
Requires-Dist:
|
222
|
+
Requires-Dist: hf_transfer; extra == "runtime-common"
|
223
|
+
Requires-Dist: huggingface_hub; extra == "runtime-common"
|
255
224
|
Requires-Dist: interegular; extra == "runtime-common"
|
256
225
|
Requires-Dist: orjson; extra == "runtime-common"
|
226
|
+
Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
|
257
227
|
Requires-Dist: packaging; extra == "runtime-common"
|
258
228
|
Requires-Dist: pillow; extra == "runtime-common"
|
259
229
|
Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
|
260
230
|
Requires-Dist: psutil; extra == "runtime-common"
|
261
231
|
Requires-Dist: pydantic; extra == "runtime-common"
|
262
232
|
Requires-Dist: python-multipart; extra == "runtime-common"
|
233
|
+
Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
263
234
|
Requires-Dist: torchao; extra == "runtime-common"
|
264
235
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
265
236
|
Requires-Dist: uvloop; extra == "runtime-common"
|
266
|
-
Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
267
|
-
Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
|
268
237
|
Requires-Dist: modelscope; extra == "runtime-common"
|
269
238
|
Provides-Extra: srt
|
270
239
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
271
240
|
Requires-Dist: torch; extra == "srt"
|
272
|
-
Requires-Dist: vllm
|
273
|
-
Provides-Extra:
|
241
|
+
Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
|
242
|
+
Provides-Extra: srt-hip
|
274
243
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
275
244
|
Requires-Dist: torch; extra == "srt-hip"
|
276
245
|
Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
|
277
|
-
Provides-Extra:
|
246
|
+
Provides-Extra: srt-xpu
|
278
247
|
Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
|
248
|
+
Provides-Extra: openai
|
249
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
250
|
+
Requires-Dist: tiktoken; extra == "openai"
|
251
|
+
Provides-Extra: anthropic
|
252
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
253
|
+
Provides-Extra: litellm
|
254
|
+
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
279
255
|
Provides-Extra: test
|
280
256
|
Requires-Dist: jsonlines; extra == "test"
|
281
257
|
Requires-Dist: matplotlib; extra == "test"
|
282
258
|
Requires-Dist: pandas; extra == "test"
|
283
|
-
Requires-Dist:
|
259
|
+
Requires-Dist: sentence_transformers; extra == "test"
|
284
260
|
Requires-Dist: accelerate; extra == "test"
|
285
261
|
Requires-Dist: peft; extra == "test"
|
262
|
+
Provides-Extra: all
|
263
|
+
Requires-Dist: sglang[srt]; extra == "all"
|
264
|
+
Requires-Dist: sglang[openai]; extra == "all"
|
265
|
+
Requires-Dist: sglang[anthropic]; extra == "all"
|
266
|
+
Requires-Dist: sglang[litellm]; extra == "all"
|
267
|
+
Provides-Extra: all-hip
|
268
|
+
Requires-Dist: sglang[srt_hip]; extra == "all-hip"
|
269
|
+
Requires-Dist: sglang[openai]; extra == "all-hip"
|
270
|
+
Requires-Dist: sglang[anthropic]; extra == "all-hip"
|
271
|
+
Requires-Dist: sglang[litellm]; extra == "all-hip"
|
272
|
+
Provides-Extra: all-xpu
|
273
|
+
Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
|
274
|
+
Requires-Dist: sglang[openai]; extra == "all-xpu"
|
275
|
+
Requires-Dist: sglang[anthropic]; extra == "all-xpu"
|
276
|
+
Requires-Dist: sglang[litellm]; extra == "all-xpu"
|
277
|
+
Provides-Extra: dev
|
278
|
+
Requires-Dist: sglang[all]; extra == "dev"
|
279
|
+
Requires-Dist: sglang[test]; extra == "dev"
|
280
|
+
Provides-Extra: dev-hip
|
281
|
+
Requires-Dist: sglang[all_hip]; extra == "dev-hip"
|
282
|
+
Requires-Dist: sglang[test]; extra == "dev-hip"
|
283
|
+
Provides-Extra: dev-xpu
|
284
|
+
Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
|
285
|
+
Requires-Dist: sglang[test]; extra == "dev-xpu"
|
286
286
|
|
287
287
|
<div align="center" id="sglangtop">
|
288
288
|
<img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
|
@@ -323,7 +323,7 @@ The core features include:
|
|
323
323
|
|
324
324
|
- **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
|
325
325
|
- **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
|
326
|
-
- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
|
326
|
+
- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
|
327
327
|
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
328
328
|
|
329
329
|
## Getting Started
|
@@ -1,15 +1,16 @@
|
|
1
1
|
sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
|
2
2
|
sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
|
3
|
-
sglang/bench_latency.py,sha256=
|
4
|
-
sglang/bench_offline_throughput.py,sha256=
|
5
|
-
sglang/
|
6
|
-
sglang/
|
7
|
-
sglang/
|
3
|
+
sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
4
|
+
sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
|
5
|
+
sglang/bench_one_batch.py,sha256=Ww5Qd1ATaY8zw0mDEGoTYjwxMtxPKmpaHrIdjvS9iVE,15706
|
6
|
+
sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
|
7
|
+
sglang/bench_serving.py,sha256=hn5mihMey8Cik2nvwV30DUQ8C4Goxyt6BWm4YtyjIrI,50511
|
8
|
+
sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
|
8
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
9
10
|
sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
|
10
11
|
sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
|
11
12
|
sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
|
12
|
-
sglang/version.py,sha256=
|
13
|
+
sglang/version.py,sha256=W_9dCm49nLvZulVAvvsafxLJjVBSKDBHz9K7szFZllo,22
|
13
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
15
|
sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
|
15
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -27,38 +28,40 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
27
28
|
sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
|
28
29
|
sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
|
29
30
|
sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
|
30
|
-
sglang/srt/
|
31
|
-
sglang/srt/
|
32
|
-
sglang/srt/
|
31
|
+
sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
|
32
|
+
sglang/srt/server.py,sha256=caZPEoP3zdbEnQJnGzOEqvSdzSjsVUX8opSc-SplH2A,29709
|
33
|
+
sglang/srt/server_args.py,sha256=1VhWGvMOtr7ozW2BJV8KInPyptzfh2UiBN4jqdDJYS8,30714
|
34
|
+
sglang/srt/utils.py,sha256=5YIElk7hP1Zr7ff-jFXBUfM-acurnh5HR1ofC18FOTU,27540
|
33
35
|
sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
|
34
36
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
35
37
|
sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
|
36
38
|
sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
|
37
39
|
sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
|
38
40
|
sglang/srt/constrained/base_grammar_backend.py,sha256=OPuBSd_F_fRwjVj6YFWBQuGeikj7UQtkTvc-JgEYt4I,2259
|
39
|
-
sglang/srt/constrained/outlines_backend.py,sha256=
|
41
|
+
sglang/srt/constrained/outlines_backend.py,sha256=i4dhg3hP406YHzEyP8x2FQmLlGEn8Uby51KNLAcdhak,6353
|
40
42
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
|
41
|
-
sglang/srt/constrained/xgrammar_backend.py,sha256=
|
42
|
-
sglang/srt/layers/activation.py,sha256=
|
43
|
-
sglang/srt/layers/
|
43
|
+
sglang/srt/constrained/xgrammar_backend.py,sha256=r11pWwtctbaBJGdjhQbaD_SN8n9qw902CUDh1I3ZPqo,4738
|
44
|
+
sglang/srt/layers/activation.py,sha256=Yi2xdh7jmHUlRgERQFmStz9JwWvzT-kDmZbuf8yqy2I,5375
|
45
|
+
sglang/srt/layers/custom_op_util.py,sha256=sE0dTU00Mkzu7RiWS0h1OvPzFey_m-StbkeR6grpY7o,827
|
46
|
+
sglang/srt/layers/layernorm.py,sha256=1ceN6DLenmmKdxiif2uecplSUhc58qfd6s-6KWmXS9A,3943
|
44
47
|
sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
|
45
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
48
|
+
sglang/srt/layers/logits_processor.py,sha256=FFW8gVvEFxhUqDFaUPRYf3I5wA9HKsSa2IbDk7TjZZU,12575
|
46
49
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
47
50
|
sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
|
48
51
|
sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
|
49
|
-
sglang/srt/layers/sampler.py,sha256=
|
50
|
-
sglang/srt/layers/torchao_utils.py,sha256=
|
52
|
+
sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
|
53
|
+
sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
|
51
54
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
|
52
55
|
sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
|
53
56
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
|
54
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
55
|
-
sglang/srt/layers/attention/triton_backend.py,sha256=
|
57
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=9V5xVyx4CnT_vN8MPBOfREePgYonwzGa_PesdZClVuI,24619
|
58
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
|
56
59
|
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
|
57
60
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
58
61
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
|
59
62
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
|
60
63
|
sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
|
61
|
-
sglang/srt/layers/fused_moe/fused_moe.py,sha256=
|
64
|
+
sglang/srt/layers/fused_moe/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
|
62
65
|
sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
|
63
66
|
sglang/srt/layers/fused_moe/patch.py,sha256=K5CNLnFVxRPd8_jlY4hW6bj7pAACeCFZQA8y5loqqM4,4029
|
64
67
|
sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
|
@@ -66,16 +69,17 @@ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87M
|
|
66
69
|
sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
|
67
70
|
sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
|
68
71
|
sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
|
69
|
-
sglang/srt/managers/data_parallel_controller.py,sha256=
|
70
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
72
|
+
sglang/srt/managers/data_parallel_controller.py,sha256=7Y3YOYJDe2GUyBBHJXUxDdoz24fuaO-5IGM0TwKxzFw,7895
|
73
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=ovux4AwPPTQ-JpPof7ClSTiA1sphY7IkAxPocCa1ZIs,7349
|
71
74
|
sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
|
72
|
-
sglang/srt/managers/io_struct.py,sha256=
|
73
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
74
|
-
sglang/srt/managers/schedule_policy.py,sha256=
|
75
|
-
sglang/srt/managers/scheduler.py,sha256=
|
76
|
-
sglang/srt/managers/
|
77
|
-
sglang/srt/managers/
|
78
|
-
sglang/srt/managers/
|
75
|
+
sglang/srt/managers/io_struct.py,sha256=tp7RckbDklXW8YW03xXTX3Nv0DpZGjviGPx_iljoQdI,12885
|
76
|
+
sglang/srt/managers/schedule_batch.py,sha256=kJvzb75Jmlo1iJvw1IWmLvKnBRuaUxok3MNOv-t5w18,41928
|
77
|
+
sglang/srt/managers/schedule_policy.py,sha256=zPk5Um5-E65p0cLZ_ZwCCk7DO8dE6pWJAX9_SyfPUvw,12432
|
78
|
+
sglang/srt/managers/scheduler.py,sha256=djbeXw7cfZBEu0uBOsQ-Wz4RCyvSWJ8ulpgaO6cSFyU,54711
|
79
|
+
sglang/srt/managers/session_controller.py,sha256=vf2nQrxIu_14PO5xqVBhcw3WdqbdmufBOcIwnFpuyrc,2308
|
80
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=v1iCmFPhkT5IzK_LMJ-O0UPcov7pwjT49StRflBBK7Y,25882
|
81
|
+
sglang/srt/managers/tp_worker.py,sha256=P8QQ9kAqPi7RYXkXVjFIWaZW2F5ezxQtYTJA6gJleBE,6082
|
82
|
+
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=f-zsbb6FcDrxNhLoRp2jjqSJE-tyAzZo0HAKVnx1PUY,7527
|
79
83
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
|
80
84
|
sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
|
81
85
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
@@ -83,18 +87,18 @@ sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47
|
|
83
87
|
sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
|
84
88
|
sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
|
85
89
|
sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
|
86
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
87
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
88
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
90
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=Rm4yt4RSbFf2Dee4gI5UrbJKWgGk4quomRlVJ90TaH4,14521
|
91
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=4PGHIQM-ZckRosIFF987xhTlotEHkt9dTMKrZQUUKqU,12397
|
92
|
+
sglang/srt/model_executor/model_runner.py,sha256=iUKjnn0oaa2KMJgeRm4rUYrDYhg35Eg7DlBnB8OUPSw,29116
|
89
93
|
sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
|
90
94
|
sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
|
91
95
|
sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
|
92
96
|
sglang/srt/models/dbrx.py,sha256=IiVIk_rVd0RlvfIJGIThPOPkoYT3U649PrduThiKRzg,14545
|
93
97
|
sglang/srt/models/deepseek.py,sha256=DjW2B21isWE6A2C8A3VGZ-G0k1DkhWHO3dZZjcOVG50,15828
|
94
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
98
|
+
sglang/srt/models/deepseek_v2.py,sha256=irh-2TE5PpwjsCojxpdDQCmBTuF016BTNKD673Gf4dY,32171
|
95
99
|
sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
|
96
100
|
sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
|
97
|
-
sglang/srt/models/gemma2.py,sha256=
|
101
|
+
sglang/srt/models/gemma2.py,sha256=6B999ZZBMl5twr_DMK9lnSmxwZAvVavpFHaOat71ANg,14783
|
98
102
|
sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
|
99
103
|
sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
|
100
104
|
sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
|
@@ -105,39 +109,40 @@ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16
|
|
105
109
|
sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
|
106
110
|
sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
|
107
111
|
sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
|
108
|
-
sglang/srt/models/llava.py,sha256=
|
109
|
-
sglang/srt/models/llavavid.py,sha256=
|
112
|
+
sglang/srt/models/llava.py,sha256=URAPE0xB878s_pNacA4Z2t4lAxMuzzMjLZu5gf5MseA,24847
|
113
|
+
sglang/srt/models/llavavid.py,sha256=bqFZ0qIBlOqp-mDsBFB-QGVSemYmN6wftUKcff3r3MM,12127
|
110
114
|
sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
|
111
115
|
sglang/srt/models/minicpm3.py,sha256=O6092exfoq8iHLmyfpVCubyQEzcfp4SmqtZJs7x4A8s,25014
|
112
116
|
sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
|
113
117
|
sglang/srt/models/mixtral.py,sha256=b6AqEgL4y9wQpKKAGnhgzBtUypYo3dca5UOpGNLEt_A,13949
|
114
118
|
sglang/srt/models/mixtral_quant.py,sha256=2ND-aOHjcyeQMUvqLLqhXwOdlR_bEftMFk3hc3lnpvc,13969
|
115
119
|
sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
|
116
|
-
sglang/srt/models/olmo.py,sha256=
|
120
|
+
sglang/srt/models/olmo.py,sha256=OPEZCpFrwy47IGiwLZFYxX7UXpE5PP3KdC7UKxRhngE,11884
|
117
121
|
sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15242
|
122
|
+
sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
|
118
123
|
sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
|
119
124
|
sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
|
120
125
|
sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
|
121
|
-
sglang/srt/models/qwen2_vl.py,sha256=
|
126
|
+
sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
|
122
127
|
sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
|
123
|
-
sglang/srt/models/torch_native_llama.py,sha256=
|
128
|
+
sglang/srt/models/torch_native_llama.py,sha256=RTIO2qp1SitOwNZNVzMBz8i0Gbud3t1nxTCImTguVQg,19362
|
124
129
|
sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
|
125
130
|
sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
|
126
131
|
sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
|
127
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
128
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
129
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
130
|
-
sglang/srt/sampling/sampling_params.py,sha256=
|
132
|
+
sglang/srt/openai_api/adapter.py,sha256=10jD3QLOAlbxTUO4-PnhgoaiNtWxbadUfb9bWyqN6gw,53540
|
133
|
+
sglang/srt/openai_api/protocol.py,sha256=dRundxpM2kutsz-03u2nPfd3jVA0zJKmPYGAEY93t8c,10078
|
134
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=8bQ1UvsJooPEBq_t6BXSocDAcm8OqivSUYXm4mBtnUQ,8379
|
135
|
+
sglang/srt/sampling/sampling_params.py,sha256=u9RL8yTXYSPD6OZPvGdKvD1hmmRDY2_dg6cs2CaJhbg,5192
|
131
136
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
132
|
-
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=
|
133
|
-
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=
|
134
|
-
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=
|
135
|
-
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=
|
136
|
-
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=
|
137
|
+
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
|
138
|
+
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
|
139
|
+
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
140
|
+
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
141
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
|
137
142
|
sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
|
138
143
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
139
144
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
140
|
-
sglang/test/runners.py,sha256=
|
145
|
+
sglang/test/runners.py,sha256=31tkr6ZZ4WksLXZglAil05E1JiO71kftlg9dBiHq_u0,15034
|
141
146
|
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
142
147
|
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
143
148
|
sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
|
@@ -147,10 +152,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
147
152
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
148
153
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
149
154
|
sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
|
150
|
-
sglang/test/test_utils.py,sha256=
|
151
|
-
sglang/test/srt/sampling/penaltylib/utils.py,sha256=
|
152
|
-
sglang-0.3.
|
153
|
-
sglang-0.3.
|
154
|
-
sglang-0.3.
|
155
|
-
sglang-0.3.
|
156
|
-
sglang-0.3.
|
155
|
+
sglang/test/test_utils.py,sha256=lBwINKlekJx03zJbnjEcO_KIkCMcBnfFa22LNt5Mwy4,23462
|
156
|
+
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
157
|
+
sglang-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
158
|
+
sglang-0.3.6.dist-info/METADATA,sha256=Xqs3Fv5BkPx7ROZyCxhEBfIJzESsYz4PzjihzkA-ZZ8,21602
|
159
|
+
sglang-0.3.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
160
|
+
sglang-0.3.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
161
|
+
sglang-0.3.6.dist-info/RECORD,,
|