sglang 0.3.5.post2__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. sglang/bench_latency.py +1 -553
  2. sglang/bench_offline_throughput.py +48 -20
  3. sglang/bench_one_batch.py +474 -0
  4. sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
  5. sglang/bench_serving.py +71 -1
  6. sglang/check_env.py +3 -6
  7. sglang/srt/constrained/outlines_backend.py +15 -2
  8. sglang/srt/constrained/xgrammar_backend.py +22 -14
  9. sglang/srt/layers/activation.py +3 -0
  10. sglang/srt/layers/attention/flashinfer_backend.py +93 -48
  11. sglang/srt/layers/attention/triton_backend.py +9 -7
  12. sglang/srt/layers/custom_op_util.py +26 -0
  13. sglang/srt/layers/fused_moe/fused_moe.py +11 -4
  14. sglang/srt/layers/layernorm.py +4 -0
  15. sglang/srt/layers/logits_processor.py +10 -10
  16. sglang/srt/layers/sampler.py +4 -8
  17. sglang/srt/layers/torchao_utils.py +2 -0
  18. sglang/srt/managers/data_parallel_controller.py +74 -9
  19. sglang/srt/managers/detokenizer_manager.py +1 -0
  20. sglang/srt/managers/io_struct.py +27 -0
  21. sglang/srt/managers/schedule_batch.py +104 -38
  22. sglang/srt/managers/schedule_policy.py +5 -1
  23. sglang/srt/managers/scheduler.py +204 -54
  24. sglang/srt/managers/session_controller.py +62 -0
  25. sglang/srt/managers/tokenizer_manager.py +38 -0
  26. sglang/srt/managers/tp_worker.py +12 -1
  27. sglang/srt/managers/tp_worker_overlap_thread.py +49 -52
  28. sglang/srt/model_executor/cuda_graph_runner.py +43 -6
  29. sglang/srt/model_executor/forward_batch_info.py +109 -15
  30. sglang/srt/model_executor/model_runner.py +99 -43
  31. sglang/srt/model_parallel.py +98 -0
  32. sglang/srt/models/deepseek_v2.py +147 -44
  33. sglang/srt/models/gemma2.py +9 -8
  34. sglang/srt/models/llava.py +1 -1
  35. sglang/srt/models/llavavid.py +1 -1
  36. sglang/srt/models/olmo.py +3 -3
  37. sglang/srt/models/phi3_small.py +447 -0
  38. sglang/srt/models/qwen2_vl.py +13 -6
  39. sglang/srt/models/torch_native_llama.py +94 -78
  40. sglang/srt/openai_api/adapter.py +6 -2
  41. sglang/srt/openai_api/protocol.py +1 -1
  42. sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
  43. sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
  44. sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
  45. sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
  46. sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
  47. sglang/srt/sampling/sampling_batch_info.py +58 -57
  48. sglang/srt/sampling/sampling_params.py +1 -1
  49. sglang/srt/server.py +27 -1
  50. sglang/srt/server_args.py +78 -62
  51. sglang/srt/utils.py +71 -52
  52. sglang/test/runners.py +25 -6
  53. sglang/test/srt/sampling/penaltylib/utils.py +23 -21
  54. sglang/test/test_utils.py +30 -19
  55. sglang/version.py +1 -1
  56. {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/METADATA +43 -43
  57. {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/RECORD +60 -55
  58. {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/WHEEL +1 -1
  59. {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/LICENSE +0 -0
  60. {sglang-0.3.5.post2.dist-info → sglang-0.3.6.dist-info}/top_level.txt +0 -0
sglang/test/runners.py CHANGED
@@ -58,6 +58,28 @@ def get_top_logprobs(logits, k):
58
58
  return logprobs
59
59
 
60
60
 
61
+ def _get_sentence_transformer_embedding_model(model_path, torch_dtype):
62
+ from sentence_transformers import SentenceTransformer
63
+ from sentence_transformers.util import is_sentence_transformer_model
64
+
65
+ if is_sentence_transformer_model(model_path):
66
+ model = SentenceTransformer(
67
+ model_path,
68
+ model_kwargs={"torch_dtype": torch_dtype},
69
+ )
70
+ else: # if no pre-trained sentence-transformers model
71
+ from sentence_transformers import models
72
+
73
+ word_embedding_model = models.Transformer(model_path).to(dtype=torch_dtype)
74
+ pooling_model = models.Pooling(
75
+ word_embedding_model.get_word_embedding_dimension(),
76
+ pooling_mode="lasttoken",
77
+ )
78
+ model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
79
+
80
+ return model.cuda()
81
+
82
+
61
83
  @dataclass
62
84
  class ModelOutput:
63
85
  output_strs: List[str] = None
@@ -114,12 +136,9 @@ class HFRunner:
114
136
  low_cpu_mem_usage=True,
115
137
  ).cuda()
116
138
  elif self.model_type == "embedding":
117
- from sentence_transformers import SentenceTransformer
118
-
119
- self.model = SentenceTransformer(
120
- model_path,
121
- model_kwargs={"torch_dtype": torch_dtype},
122
- ).cuda()
139
+ self.model = _get_sentence_transformer_embedding_model(
140
+ model_path, torch_dtype
141
+ )
123
142
  elif self.model_type == "reward":
124
143
  from transformers import AutoModelForSequenceClassification
125
144
 
@@ -1,7 +1,7 @@
1
1
  import dataclasses
2
2
  import enum
3
- import typing
4
3
  import unittest
4
+ from typing import Dict, List, Optional, Set, Tuple, Type
5
5
 
6
6
  import torch
7
7
 
@@ -16,7 +16,7 @@ from sglang.srt.sampling.penaltylib.orchestrator import (
16
16
  class MockSamplingParams:
17
17
  frequency_penalty: float = 0.0
18
18
  min_new_tokens: int = 0
19
- stop_token_ids: typing.List[int] = None
19
+ stop_token_ids: List[int] = None
20
20
  presence_penalty: float = 0.0
21
21
  repetition_penalty: float = 1.0
22
22
 
@@ -24,12 +24,12 @@ class MockSamplingParams:
24
24
  @dataclasses.dataclass
25
25
  class MockTokenizer:
26
26
  eos_token_id: int
27
- additional_stop_token_ids: typing.Optional[typing.List[int]] = None
27
+ additional_stop_token_ids: Optional[List[int]] = None
28
28
 
29
29
 
30
30
  @dataclasses.dataclass
31
31
  class MockReq:
32
- origin_input_ids: typing.List[int]
32
+ origin_input_ids: List[int]
33
33
  sampling_params: MockSamplingParams
34
34
  tokenizer: MockTokenizer
35
35
 
@@ -42,8 +42,8 @@ class StepType(enum.Enum):
42
42
  @dataclasses.dataclass
43
43
  class Step:
44
44
  type: StepType
45
- token_ids: typing.List[int]
46
- expected_tensors: typing.Dict[str, torch.Tensor]
45
+ token_ids: List[int]
46
+ expected_tensors: Dict[str, torch.Tensor]
47
47
  # assume initial logits are all 1
48
48
  expected_logits: torch.Tensor
49
49
 
@@ -52,7 +52,7 @@ class Step:
52
52
  class Subject:
53
53
  sampling_params: MockSamplingParams
54
54
  # first step must be input, which will be converted to Req
55
- steps: typing.List[Step]
55
+ steps: List[Step]
56
56
  eos_token_id: int = -1
57
57
 
58
58
  def __post_init__(self):
@@ -66,7 +66,7 @@ class Subject:
66
66
  f"Expected tensors keys must be the same for all steps. Got {self.steps[i].expected_tensors.keys()} for key={i} and {self.steps[0].expected_tensors.keys()}"
67
67
  )
68
68
 
69
- def tensor_keys(self, i: int = 0) -> typing.Set[str]:
69
+ def tensor_keys(self, i: int = 0) -> Set[str]:
70
70
  return set(self.steps[i].expected_tensors.keys())
71
71
 
72
72
  def to_req(self) -> MockReq:
@@ -80,7 +80,7 @@ class Subject:
80
80
  @dataclasses.dataclass
81
81
  class Case:
82
82
  enabled: bool
83
- test_subjects: typing.List[Subject]
83
+ test_subjects: List[Subject]
84
84
 
85
85
  def __post_init__(self):
86
86
  # each test_subjects.steps should have the same expected_tensors.keys()
@@ -90,12 +90,12 @@ class Case:
90
90
  f"Expected tensors keys must be the same for all test_subjects. Got {self.test_subjects[i].tensor_keys()} for key={i} and {self.test_subjects[0].tensor_keys()}"
91
91
  )
92
92
 
93
- def tensor_keys(self, i: int = 0) -> typing.List[str]:
93
+ def tensor_keys(self, i: int = 0) -> List[str]:
94
94
  return set(self.test_subjects[i].tensor_keys())
95
95
 
96
96
 
97
97
  class BaseBatchedPenalizerTest(unittest.TestCase):
98
- Penalizer: typing.Type[_BatchedPenalizer]
98
+ Penalizer: Type[_BatchedPenalizer]
99
99
  device = "cuda"
100
100
  vocab_size = 5
101
101
 
@@ -115,7 +115,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
115
115
  """
116
116
  return torch.tensor(data, **kwargs, device=self.device)
117
117
 
118
- def create_test_subjects(self) -> typing.List[Subject]:
118
+ def create_test_subjects(self) -> List[Subject]:
119
119
  raise NotImplementedError()
120
120
 
121
121
  def create_test_cases(self):
@@ -127,7 +127,7 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
127
127
 
128
128
  def _create_penalizer(
129
129
  self, case: Case
130
- ) -> typing.Tuple[BatchedPenalizerOrchestrator, _BatchedPenalizer]:
130
+ ) -> Tuple[BatchedPenalizerOrchestrator, _BatchedPenalizer]:
131
131
  orchestrator = BatchedPenalizerOrchestrator(
132
132
  vocab_size=self.vocab_size,
133
133
  batch=_BatchLike(reqs=[subject.to_req() for subject in case.test_subjects]),
@@ -287,22 +287,24 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
287
287
  if i < len(subject.steps)
288
288
  ]
289
289
 
290
- inputs: typing.List[typing.List[int]] = []
291
- outputs: typing.List[typing.List[int]] = []
290
+ inputs: List[List[int]] = []
291
+ outputs: List[List[int]] = []
292
292
  for subject in filtered_subjects:
293
293
  step = subject.steps[i]
294
294
  if step.type == StepType.INPUT:
295
- inputs.append(step.token_ids)
296
- outputs.append([])
295
+ raise NotImplementedError()
297
296
  else:
298
297
  inputs.append([])
299
298
  outputs.append(step.token_ids)
300
299
 
301
- if any(inputs):
302
- orchestrator.cumulate_input_tokens(inputs)
303
-
304
300
  if any(outputs):
305
- orchestrator.cumulate_output_tokens(outputs)
301
+ for j in range(max(len(x) for x in outputs)):
302
+ tmp_outputs = torch.tensor(
303
+ [x[j] for x in outputs],
304
+ dtype=torch.int32,
305
+ device=orchestrator.device,
306
+ )
307
+ orchestrator.cumulate_output_tokens(tmp_outputs)
306
308
 
307
309
  if penalizer.is_required():
308
310
  self.assertTrue(penalizer.is_prepared())
sglang/test/test_utils.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import argparse
4
4
  import asyncio
5
+ import copy
5
6
  import os
6
7
  import random
7
8
  import subprocess
@@ -438,18 +439,22 @@ def popen_launch_server(
438
439
  process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
439
440
 
440
441
  start_time = time.time()
441
- while time.time() - start_time < timeout:
442
- try:
443
- headers = {
444
- "Content-Type": "application/json; charset=utf-8",
445
- "Authorization": f"Bearer {api_key}",
446
- }
447
- response = requests.get(f"{base_url}/health_generate", headers=headers)
448
- if response.status_code == 200:
449
- return process
450
- except requests.RequestException:
451
- pass
452
- time.sleep(10)
442
+ with requests.Session() as session:
443
+ while time.time() - start_time < timeout:
444
+ try:
445
+ headers = {
446
+ "Content-Type": "application/json; charset=utf-8",
447
+ "Authorization": f"Bearer {api_key}",
448
+ }
449
+ response = session.get(
450
+ f"{base_url}/health_generate",
451
+ headers=headers,
452
+ )
453
+ if response.status_code == 200:
454
+ return process
455
+ except requests.RequestException:
456
+ pass
457
+ time.sleep(10)
453
458
  raise TimeoutError("Server failed to start within the timeout period.")
454
459
 
455
460
 
@@ -529,6 +534,7 @@ def run_bench_serving(
529
534
  random_input_len=4096,
530
535
  random_output_len=2048,
531
536
  disable_stream=False,
537
+ need_warmup=False,
532
538
  ):
533
539
  # Launch the server
534
540
  base_url = DEFAULT_URL_FOR_TEST
@@ -562,9 +568,14 @@ def run_bench_serving(
562
568
  disable_stream=disable_stream,
563
569
  disable_ignore_eos=False,
564
570
  extra_request_body=None,
571
+ profile=None,
565
572
  )
566
573
 
567
574
  try:
575
+ if need_warmup:
576
+ warmup_args = copy.deepcopy(args)
577
+ warmup_args.num_prompts = 16
578
+ run_benchmark(warmup_args)
568
579
  res = run_benchmark(args)
569
580
  finally:
570
581
  kill_child_process(process.pid, include_self=True)
@@ -573,11 +584,11 @@ def run_bench_serving(
573
584
  return res
574
585
 
575
586
 
576
- def run_bench_latency(model, other_args):
587
+ def run_bench_one_batch(model, other_args):
577
588
  command = [
578
589
  "python3",
579
590
  "-m",
580
- "sglang.bench_latency",
591
+ "sglang.bench_one_batch",
581
592
  "--model-path",
582
593
  model,
583
594
  "--batch-size",
@@ -664,7 +675,7 @@ def run_and_check_memory_leak(
664
675
  workload_func,
665
676
  disable_radix_cache,
666
677
  enable_mixed_chunk,
667
- enable_overlap,
678
+ disable_overlap,
668
679
  chunked_prefill_size,
669
680
  ):
670
681
  other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
@@ -672,8 +683,8 @@ def run_and_check_memory_leak(
672
683
  other_args += ["--disable-radix-cache"]
673
684
  if enable_mixed_chunk:
674
685
  other_args += ["--enable-mixed-chunk"]
675
- if enable_overlap:
676
- other_args += ["--enable-overlap-schedule"]
686
+ if disable_overlap:
687
+ other_args += ["--disable-overlap-schedule"]
677
688
 
678
689
  model = DEFAULT_MODEL_NAME_FOR_TEST
679
690
  port = random.randint(4000, 5000)
@@ -725,7 +736,7 @@ def run_and_check_memory_leak(
725
736
  def run_mmlu_test(
726
737
  disable_radix_cache=False,
727
738
  enable_mixed_chunk=False,
728
- enable_overlap=False,
739
+ disable_overlap=False,
729
740
  chunked_prefill_size=32,
730
741
  ):
731
742
  def workload_func(base_url, model):
@@ -748,7 +759,7 @@ def run_mmlu_test(
748
759
  workload_func,
749
760
  disable_radix_cache,
750
761
  enable_mixed_chunk,
751
- enable_overlap,
762
+ disable_overlap,
752
763
  chunked_prefill_size,
753
764
  )
754
765
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.5.post2"
1
+ __version__ = "0.3.6"
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.5.post2
3
+ Version: 0.3.6
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
- License: Apache License
5
+ License: Apache License
6
6
  Version 2.0, January 2004
7
7
  http://www.apache.org/licenses/
8
8
 
@@ -215,74 +215,74 @@ Requires-Dist: requests
215
215
  Requires-Dist: tqdm
216
216
  Requires-Dist: numpy
217
217
  Requires-Dist: IPython
218
- Provides-Extra: all
219
- Requires-Dist: sglang[srt]; extra == "all"
220
- Requires-Dist: sglang[openai]; extra == "all"
221
- Requires-Dist: sglang[anthropic]; extra == "all"
222
- Requires-Dist: sglang[litellm]; extra == "all"
223
- Provides-Extra: all_hip
224
- Requires-Dist: sglang[srt_hip]; extra == "all-hip"
225
- Requires-Dist: sglang[openai]; extra == "all-hip"
226
- Requires-Dist: sglang[anthropic]; extra == "all-hip"
227
- Requires-Dist: sglang[litellm]; extra == "all-hip"
228
- Provides-Extra: all_xpu
229
- Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
230
- Requires-Dist: sglang[openai]; extra == "all-xpu"
231
- Requires-Dist: sglang[anthropic]; extra == "all-xpu"
232
- Requires-Dist: sglang[litellm]; extra == "all-xpu"
233
- Provides-Extra: anthropic
234
- Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
235
- Provides-Extra: dev
236
- Requires-Dist: sglang[all]; extra == "dev"
237
- Requires-Dist: sglang[test]; extra == "dev"
238
- Provides-Extra: dev_hip
239
- Requires-Dist: sglang[all_hip]; extra == "dev-hip"
240
- Requires-Dist: sglang[test]; extra == "dev-hip"
241
- Provides-Extra: dev_xpu
242
- Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
243
- Requires-Dist: sglang[test]; extra == "dev-xpu"
244
- Provides-Extra: litellm
245
- Requires-Dist: litellm>=1.0.0; extra == "litellm"
246
- Provides-Extra: openai
247
- Requires-Dist: openai>=1.0; extra == "openai"
248
- Requires-Dist: tiktoken; extra == "openai"
249
- Provides-Extra: runtime_common
218
+ Provides-Extra: runtime-common
250
219
  Requires-Dist: aiohttp; extra == "runtime-common"
251
220
  Requires-Dist: decord; extra == "runtime-common"
252
221
  Requires-Dist: fastapi; extra == "runtime-common"
253
- Requires-Dist: hf-transfer; extra == "runtime-common"
254
- Requires-Dist: huggingface-hub; extra == "runtime-common"
222
+ Requires-Dist: hf_transfer; extra == "runtime-common"
223
+ Requires-Dist: huggingface_hub; extra == "runtime-common"
255
224
  Requires-Dist: interegular; extra == "runtime-common"
256
225
  Requires-Dist: orjson; extra == "runtime-common"
226
+ Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
257
227
  Requires-Dist: packaging; extra == "runtime-common"
258
228
  Requires-Dist: pillow; extra == "runtime-common"
259
229
  Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
260
230
  Requires-Dist: psutil; extra == "runtime-common"
261
231
  Requires-Dist: pydantic; extra == "runtime-common"
262
232
  Requires-Dist: python-multipart; extra == "runtime-common"
233
+ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
263
234
  Requires-Dist: torchao; extra == "runtime-common"
264
235
  Requires-Dist: uvicorn; extra == "runtime-common"
265
236
  Requires-Dist: uvloop; extra == "runtime-common"
266
- Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
267
- Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
268
237
  Requires-Dist: modelscope; extra == "runtime-common"
269
238
  Provides-Extra: srt
270
239
  Requires-Dist: sglang[runtime_common]; extra == "srt"
271
240
  Requires-Dist: torch; extra == "srt"
272
- Requires-Dist: vllm==0.6.3.post1; extra == "srt"
273
- Provides-Extra: srt_hip
241
+ Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
242
+ Provides-Extra: srt-hip
274
243
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
275
244
  Requires-Dist: torch; extra == "srt-hip"
276
245
  Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
277
- Provides-Extra: srt_xpu
246
+ Provides-Extra: srt-xpu
278
247
  Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
248
+ Provides-Extra: openai
249
+ Requires-Dist: openai>=1.0; extra == "openai"
250
+ Requires-Dist: tiktoken; extra == "openai"
251
+ Provides-Extra: anthropic
252
+ Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
253
+ Provides-Extra: litellm
254
+ Requires-Dist: litellm>=1.0.0; extra == "litellm"
279
255
  Provides-Extra: test
280
256
  Requires-Dist: jsonlines; extra == "test"
281
257
  Requires-Dist: matplotlib; extra == "test"
282
258
  Requires-Dist: pandas; extra == "test"
283
- Requires-Dist: sentence-transformers; extra == "test"
259
+ Requires-Dist: sentence_transformers; extra == "test"
284
260
  Requires-Dist: accelerate; extra == "test"
285
261
  Requires-Dist: peft; extra == "test"
262
+ Provides-Extra: all
263
+ Requires-Dist: sglang[srt]; extra == "all"
264
+ Requires-Dist: sglang[openai]; extra == "all"
265
+ Requires-Dist: sglang[anthropic]; extra == "all"
266
+ Requires-Dist: sglang[litellm]; extra == "all"
267
+ Provides-Extra: all-hip
268
+ Requires-Dist: sglang[srt_hip]; extra == "all-hip"
269
+ Requires-Dist: sglang[openai]; extra == "all-hip"
270
+ Requires-Dist: sglang[anthropic]; extra == "all-hip"
271
+ Requires-Dist: sglang[litellm]; extra == "all-hip"
272
+ Provides-Extra: all-xpu
273
+ Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
274
+ Requires-Dist: sglang[openai]; extra == "all-xpu"
275
+ Requires-Dist: sglang[anthropic]; extra == "all-xpu"
276
+ Requires-Dist: sglang[litellm]; extra == "all-xpu"
277
+ Provides-Extra: dev
278
+ Requires-Dist: sglang[all]; extra == "dev"
279
+ Requires-Dist: sglang[test]; extra == "dev"
280
+ Provides-Extra: dev-hip
281
+ Requires-Dist: sglang[all_hip]; extra == "dev-hip"
282
+ Requires-Dist: sglang[test]; extra == "dev-hip"
283
+ Provides-Extra: dev-xpu
284
+ Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
285
+ Requires-Dist: sglang[test]; extra == "dev-xpu"
286
286
 
287
287
  <div align="center" id="sglangtop">
288
288
  <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
@@ -323,7 +323,7 @@ The core features include:
323
323
 
324
324
  - **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
325
325
  - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
326
- - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
326
+ - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
327
327
  - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
328
328
 
329
329
  ## Getting Started
@@ -1,15 +1,16 @@
1
1
  sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
2
2
  sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
3
- sglang/bench_latency.py,sha256=SSqZjcCNO88ExpT94qBZ5CmuA5o0T8wMTBnxLsNMqik,18259
4
- sglang/bench_offline_throughput.py,sha256=xBr7gI_ZbrpXXD72Nzu1F228oNyz1jggcblZCeUWJgw,9975
5
- sglang/bench_server_latency.py,sha256=N1MODIzcMk74yOWmY19d36aih3ewtHOemLxoieKtdhw,5866
6
- sglang/bench_serving.py,sha256=ytef89P9bqKRaMGXAqq69SmLTlNXWyHyhEraISLKYME,47975
7
- sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
3
+ sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
+ sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
5
+ sglang/bench_one_batch.py,sha256=Ww5Qd1ATaY8zw0mDEGoTYjwxMtxPKmpaHrIdjvS9iVE,15706
6
+ sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
7
+ sglang/bench_serving.py,sha256=hn5mihMey8Cik2nvwV30DUQ8C4Goxyt6BWm4YtyjIrI,50511
8
+ sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
8
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
9
10
  sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
10
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
11
12
  sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
12
- sglang/version.py,sha256=NlX-QUNR7ogIH-GcgzllsyHox7ItJoycFEUM_EYuhW4,28
13
+ sglang/version.py,sha256=W_9dCm49nLvZulVAvvsafxLJjVBSKDBHz9K7szFZllo,22
13
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
15
  sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
15
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -27,38 +28,40 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
27
28
  sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
28
29
  sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
29
30
  sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
30
- sglang/srt/server.py,sha256=JUYAE8MDGYou_HbmuR10QFZfg319fGt9VamskvBkpFo,28776
31
- sglang/srt/server_args.py,sha256=V8sx2oY0yphHC_uATwv4UTiLUFnvMQl85o6y5AyaoXM,30086
32
- sglang/srt/utils.py,sha256=jGSlxbvI50xEybdupDQNHpsCaF1U_5buADrD149766g,27013
31
+ sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
32
+ sglang/srt/server.py,sha256=caZPEoP3zdbEnQJnGzOEqvSdzSjsVUX8opSc-SplH2A,29709
33
+ sglang/srt/server_args.py,sha256=1VhWGvMOtr7ozW2BJV8KInPyptzfh2UiBN4jqdDJYS8,30714
34
+ sglang/srt/utils.py,sha256=5YIElk7hP1Zr7ff-jFXBUfM-acurnh5HR1ofC18FOTU,27540
33
35
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
34
36
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
35
37
  sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
36
38
  sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
37
39
  sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
38
40
  sglang/srt/constrained/base_grammar_backend.py,sha256=OPuBSd_F_fRwjVj6YFWBQuGeikj7UQtkTvc-JgEYt4I,2259
39
- sglang/srt/constrained/outlines_backend.py,sha256=J03QQiT9pkdXyoYGw3Rj6taEyWlIr4VCBvxQ3aMiB8A,5786
41
+ sglang/srt/constrained/outlines_backend.py,sha256=i4dhg3hP406YHzEyP8x2FQmLlGEn8Uby51KNLAcdhak,6353
40
42
  sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
41
- sglang/srt/constrained/xgrammar_backend.py,sha256=wMWqkLN5KhnJXL6GBqbcrhxvAAMx60nG88KIBU1bFSc,4505
42
- sglang/srt/layers/activation.py,sha256=7VEkCrx2dvl629Lz0fkJcJfVoZA-ykEdkpTzKEc_drQ,5225
43
- sglang/srt/layers/layernorm.py,sha256=HCj8Y_X6MNNdtQU2sWKgyjIqVERxl9dqrmjbBbyJjpE,3796
43
+ sglang/srt/constrained/xgrammar_backend.py,sha256=r11pWwtctbaBJGdjhQbaD_SN8n9qw902CUDh1I3ZPqo,4738
44
+ sglang/srt/layers/activation.py,sha256=Yi2xdh7jmHUlRgERQFmStz9JwWvzT-kDmZbuf8yqy2I,5375
45
+ sglang/srt/layers/custom_op_util.py,sha256=sE0dTU00Mkzu7RiWS0h1OvPzFey_m-StbkeR6grpY7o,827
46
+ sglang/srt/layers/layernorm.py,sha256=1ceN6DLenmmKdxiif2uecplSUhc58qfd6s-6KWmXS9A,3943
44
47
  sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
45
- sglang/srt/layers/logits_processor.py,sha256=1l-hJoeZUfrPPmCWcyscl0ThgKWpprUELiL1mVDfbPE,12556
48
+ sglang/srt/layers/logits_processor.py,sha256=FFW8gVvEFxhUqDFaUPRYf3I5wA9HKsSa2IbDk7TjZZU,12575
46
49
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
47
50
  sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
48
51
  sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
49
- sglang/srt/layers/sampler.py,sha256=3zfth1Kz24X4sUq7Z_cjZwHgPVivI-rgPtIeUbsiiWU,4589
50
- sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
52
+ sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
53
+ sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
51
54
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
52
55
  sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
53
56
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
54
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=843CbZsRfzWp5FTusNXXL1o4N3jd0hoCNpsoUR6Qjxk,23306
55
- sglang/srt/layers/attention/triton_backend.py,sha256=DKUEzxQE8iBvJPNHmQwP1pyx2wXmSsLqzBhLjJznIUk,6482
57
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=9V5xVyx4CnT_vN8MPBOfREePgYonwzGa_PesdZClVuI,24619
58
+ sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
56
59
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
57
60
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
58
61
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
59
62
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
60
63
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
61
- sglang/srt/layers/fused_moe/fused_moe.py,sha256=N15tWTm2SGuesJxDIJAdV5FsDUpE-15sb_AIgr4swlw,23656
64
+ sglang/srt/layers/fused_moe/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
62
65
  sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
63
66
  sglang/srt/layers/fused_moe/patch.py,sha256=K5CNLnFVxRPd8_jlY4hW6bj7pAACeCFZQA8y5loqqM4,4029
64
67
  sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
@@ -66,16 +69,17 @@ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87M
66
69
  sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
67
70
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
68
71
  sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
69
- sglang/srt/managers/data_parallel_controller.py,sha256=_XB6Ianc8TiqwLTW-7DH6gGjVYBeBU_6WjjaDk0snIY,5686
70
- sglang/srt/managers/detokenizer_manager.py,sha256=erRgf8RijFrGnYjZawu9an1u2mFPRY3tnxzF9PbKc80,7295
72
+ sglang/srt/managers/data_parallel_controller.py,sha256=7Y3YOYJDe2GUyBBHJXUxDdoz24fuaO-5IGM0TwKxzFw,7895
73
+ sglang/srt/managers/detokenizer_manager.py,sha256=ovux4AwPPTQ-JpPof7ClSTiA1sphY7IkAxPocCa1ZIs,7349
71
74
  sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
72
- sglang/srt/managers/io_struct.py,sha256=O_oHnikwmOexNqH4HP6bwAI5d_jG_C96JGapkLg8B7c,12289
73
- sglang/srt/managers/schedule_batch.py,sha256=4BgocYdKFTDCrrBkSXCT75EALBx-3RYnoN3SgtdsHlU,39595
74
- sglang/srt/managers/schedule_policy.py,sha256=LH0rh1PiI5LK-dSd3dar8_po6FidiBUuj0Xcp_yNQAA,12295
75
- sglang/srt/managers/scheduler.py,sha256=ty1sJ9U6JxifIGF4uzZX6CANMJtbjNWPe2k8aRPS6aI,48133
76
- sglang/srt/managers/tokenizer_manager.py,sha256=n_XCsCOwLZWCLv1ZJLGjyKgrAWCAQDyEhjnkxOptSa8,24436
77
- sglang/srt/managers/tp_worker.py,sha256=S5oim5xrkg1j68hYq6LfC8T533JYmQX9Kabt6U8ZXn4,5726
78
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=j5J4yHyR7w2HgAbN7S__299ADvsoyap5HK63SWMNavQ,7546
75
+ sglang/srt/managers/io_struct.py,sha256=tp7RckbDklXW8YW03xXTX3Nv0DpZGjviGPx_iljoQdI,12885
76
+ sglang/srt/managers/schedule_batch.py,sha256=kJvzb75Jmlo1iJvw1IWmLvKnBRuaUxok3MNOv-t5w18,41928
77
+ sglang/srt/managers/schedule_policy.py,sha256=zPk5Um5-E65p0cLZ_ZwCCk7DO8dE6pWJAX9_SyfPUvw,12432
78
+ sglang/srt/managers/scheduler.py,sha256=djbeXw7cfZBEu0uBOsQ-Wz4RCyvSWJ8ulpgaO6cSFyU,54711
79
+ sglang/srt/managers/session_controller.py,sha256=vf2nQrxIu_14PO5xqVBhcw3WdqbdmufBOcIwnFpuyrc,2308
80
+ sglang/srt/managers/tokenizer_manager.py,sha256=v1iCmFPhkT5IzK_LMJ-O0UPcov7pwjT49StRflBBK7Y,25882
81
+ sglang/srt/managers/tp_worker.py,sha256=P8QQ9kAqPi7RYXkXVjFIWaZW2F5ezxQtYTJA6gJleBE,6082
82
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=f-zsbb6FcDrxNhLoRp2jjqSJE-tyAzZo0HAKVnx1PUY,7527
79
83
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
80
84
  sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
81
85
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
@@ -83,18 +87,18 @@ sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47
83
87
  sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
84
88
  sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
85
89
  sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
86
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZMkyfZpWgDXfBpJ4cenh1TxXtt1O2xqeiXhDkq6E5pU,12936
87
- sglang/srt/model_executor/forward_batch_info.py,sha256=61TVExbiXDQRvZ6oevNz9AIxG7e-KVddgj4I6MTivLg,9426
88
- sglang/srt/model_executor/model_runner.py,sha256=QdFjQRnxZU8r7-MP-NdsnFnPWMRfxa-zTUmKOYmM8HE,26879
90
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=Rm4yt4RSbFf2Dee4gI5UrbJKWgGk4quomRlVJ90TaH4,14521
91
+ sglang/srt/model_executor/forward_batch_info.py,sha256=4PGHIQM-ZckRosIFF987xhTlotEHkt9dTMKrZQUUKqU,12397
92
+ sglang/srt/model_executor/model_runner.py,sha256=iUKjnn0oaa2KMJgeRm4rUYrDYhg35Eg7DlBnB8OUPSw,29116
89
93
  sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
90
94
  sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
91
95
  sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
92
96
  sglang/srt/models/dbrx.py,sha256=IiVIk_rVd0RlvfIJGIThPOPkoYT3U649PrduThiKRzg,14545
93
97
  sglang/srt/models/deepseek.py,sha256=DjW2B21isWE6A2C8A3VGZ-G0k1DkhWHO3dZZjcOVG50,15828
94
- sglang/srt/models/deepseek_v2.py,sha256=z6532MRN1tBltFNteFJfimnaGpyNmK6g_sdNmTzsVmk,28230
98
+ sglang/srt/models/deepseek_v2.py,sha256=irh-2TE5PpwjsCojxpdDQCmBTuF016BTNKD673Gf4dY,32171
95
99
  sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
96
100
  sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
97
- sglang/srt/models/gemma2.py,sha256=iE56CYzPn-QCis4kcU7Yi0jvJ04KeU2deuZH2DaS2lM,14768
101
+ sglang/srt/models/gemma2.py,sha256=6B999ZZBMl5twr_DMK9lnSmxwZAvVavpFHaOat71ANg,14783
98
102
  sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
99
103
  sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
100
104
  sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
@@ -105,39 +109,40 @@ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16
105
109
  sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
106
110
  sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
107
111
  sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
108
- sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
109
- sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
112
+ sglang/srt/models/llava.py,sha256=URAPE0xB878s_pNacA4Z2t4lAxMuzzMjLZu5gf5MseA,24847
113
+ sglang/srt/models/llavavid.py,sha256=bqFZ0qIBlOqp-mDsBFB-QGVSemYmN6wftUKcff3r3MM,12127
110
114
  sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
111
115
  sglang/srt/models/minicpm3.py,sha256=O6092exfoq8iHLmyfpVCubyQEzcfp4SmqtZJs7x4A8s,25014
112
116
  sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
113
117
  sglang/srt/models/mixtral.py,sha256=b6AqEgL4y9wQpKKAGnhgzBtUypYo3dca5UOpGNLEt_A,13949
114
118
  sglang/srt/models/mixtral_quant.py,sha256=2ND-aOHjcyeQMUvqLLqhXwOdlR_bEftMFk3hc3lnpvc,13969
115
119
  sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
116
- sglang/srt/models/olmo.py,sha256=eWPmo5AAnBhNGdMwklh1of3JnRzAszgQp4opeiiYidI,11887
120
+ sglang/srt/models/olmo.py,sha256=OPEZCpFrwy47IGiwLZFYxX7UXpE5PP3KdC7UKxRhngE,11884
117
121
  sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15242
122
+ sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
118
123
  sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
119
124
  sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
120
125
  sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
121
- sglang/srt/models/qwen2_vl.py,sha256=jb0RYMo0ShPIt4NtPCEcFGciZKstM-gYwVKND_LK7Ls,26052
126
+ sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
122
127
  sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
123
- sglang/srt/models/torch_native_llama.py,sha256=d8gVNurlVVZ-tD3Uc_aHyGCVUUp1gR8awOH4fLRZHDE,19145
128
+ sglang/srt/models/torch_native_llama.py,sha256=RTIO2qp1SitOwNZNVzMBz8i0Gbud3t1nxTCImTguVQg,19362
124
129
  sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
125
130
  sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
126
131
  sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
127
- sglang/srt/openai_api/adapter.py,sha256=xYBmBLZ_JxfMt_m8LtVe_OB70GV4S9zBOL8e5g_VRvs,53432
128
- sglang/srt/openai_api/protocol.py,sha256=Mou5JUMKJkxVxoj4n8R4_sgnYY3OcwniiAi2TEM3hfY,10070
129
- sglang/srt/sampling/sampling_batch_info.py,sha256=7uoHypbbp4o71DfPmF22R_LeyM_Q9BTxBFg8O4lkd9w,7648
130
- sglang/srt/sampling/sampling_params.py,sha256=zzWVm8DxcUDdPwV1MIh5q76mmLwtkun0E08T6U3ZyWA,5192
132
+ sglang/srt/openai_api/adapter.py,sha256=10jD3QLOAlbxTUO4-PnhgoaiNtWxbadUfb9bWyqN6gw,53540
133
+ sglang/srt/openai_api/protocol.py,sha256=dRundxpM2kutsz-03u2nPfd3jVA0zJKmPYGAEY93t8c,10078
134
+ sglang/srt/sampling/sampling_batch_info.py,sha256=8bQ1UvsJooPEBq_t6BXSocDAcm8OqivSUYXm4mBtnUQ,8379
135
+ sglang/srt/sampling/sampling_params.py,sha256=u9RL8yTXYSPD6OZPvGdKvD1hmmRDY2_dg6cs2CaJhbg,5192
131
136
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
132
- sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
133
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
134
- sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=MmfqRqJ-leSoY9iO5Hg_ILlX-M0M0tObYrxrb_quStg,3717
135
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
136
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
137
+ sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
138
+ sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
139
+ sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
140
+ sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
141
+ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
137
142
  sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
138
143
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
139
144
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
140
- sglang/test/runners.py,sha256=JxfsGEW9L3cz87fHYmWqb3Vnbk6K1csLLLftR3LogxU,14297
145
+ sglang/test/runners.py,sha256=31tkr6ZZ4WksLXZglAil05E1JiO71kftlg9dBiHq_u0,15034
141
146
  sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
142
147
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
143
148
  sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
@@ -147,10 +152,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
147
152
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
148
153
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
149
154
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
150
- sglang/test/test_utils.py,sha256=XvIAMeLXr4D7uLxCUSLTKP5Upc1EJd0JX2egL897Jfo,23100
151
- sglang/test/srt/sampling/penaltylib/utils.py,sha256=q98pQDikkmvvvvAG-AXMYaYte1iHHW2TFhKGtAeGvdE,12802
152
- sglang-0.3.5.post2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
153
- sglang-0.3.5.post2.dist-info/METADATA,sha256=ajoktPOWOAmE37TcZw562A22FmxntBUWO4zLOShVKpQ,21568
154
- sglang-0.3.5.post2.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
155
- sglang-0.3.5.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
156
- sglang-0.3.5.post2.dist-info/RECORD,,
155
+ sglang/test/test_utils.py,sha256=lBwINKlekJx03zJbnjEcO_KIkCMcBnfFa22LNt5Mwy4,23462
156
+ sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
157
+ sglang-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
158
+ sglang-0.3.6.dist-info/METADATA,sha256=Xqs3Fv5BkPx7ROZyCxhEBfIJzESsYz4PzjihzkA-ZZ8,21602
159
+ sglang-0.3.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
160
+ sglang-0.3.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
161
+ sglang-0.3.6.dist-info/RECORD,,