sglang 0.3.5.post1__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. sglang/bench_latency.py +1 -553
  2. sglang/bench_offline_throughput.py +337 -0
  3. sglang/bench_one_batch.py +474 -0
  4. sglang/{bench_server_latency.py → bench_one_batch_server.py} +3 -3
  5. sglang/bench_serving.py +115 -31
  6. sglang/check_env.py +3 -6
  7. sglang/srt/constrained/base_grammar_backend.py +4 -3
  8. sglang/srt/constrained/outlines_backend.py +39 -26
  9. sglang/srt/constrained/xgrammar_backend.py +58 -14
  10. sglang/srt/layers/activation.py +3 -0
  11. sglang/srt/layers/attention/flashinfer_backend.py +93 -48
  12. sglang/srt/layers/attention/triton_backend.py +9 -7
  13. sglang/srt/layers/custom_op_util.py +26 -0
  14. sglang/srt/layers/fused_moe/fused_moe.py +11 -4
  15. sglang/srt/layers/fused_moe/patch.py +4 -2
  16. sglang/srt/layers/layernorm.py +4 -0
  17. sglang/srt/layers/logits_processor.py +10 -10
  18. sglang/srt/layers/sampler.py +4 -8
  19. sglang/srt/layers/torchao_utils.py +2 -0
  20. sglang/srt/managers/data_parallel_controller.py +74 -9
  21. sglang/srt/managers/detokenizer_manager.py +1 -14
  22. sglang/srt/managers/io_struct.py +27 -0
  23. sglang/srt/managers/schedule_batch.py +104 -38
  24. sglang/srt/managers/schedule_policy.py +5 -1
  25. sglang/srt/managers/scheduler.py +210 -56
  26. sglang/srt/managers/session_controller.py +62 -0
  27. sglang/srt/managers/tokenizer_manager.py +38 -0
  28. sglang/srt/managers/tp_worker.py +12 -1
  29. sglang/srt/managers/tp_worker_overlap_thread.py +49 -52
  30. sglang/srt/model_executor/cuda_graph_runner.py +43 -6
  31. sglang/srt/model_executor/forward_batch_info.py +109 -15
  32. sglang/srt/model_executor/model_runner.py +102 -43
  33. sglang/srt/model_parallel.py +98 -0
  34. sglang/srt/models/deepseek_v2.py +147 -44
  35. sglang/srt/models/gemma2.py +9 -8
  36. sglang/srt/models/llava.py +1 -1
  37. sglang/srt/models/llavavid.py +1 -1
  38. sglang/srt/models/olmo.py +3 -3
  39. sglang/srt/models/phi3_small.py +447 -0
  40. sglang/srt/models/qwen2_vl.py +13 -6
  41. sglang/srt/models/torch_native_llama.py +94 -78
  42. sglang/srt/openai_api/adapter.py +11 -4
  43. sglang/srt/openai_api/protocol.py +30 -27
  44. sglang/srt/sampling/penaltylib/orchestrator.py +49 -79
  45. sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +3 -8
  46. sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +3 -9
  47. sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +3 -8
  48. sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +3 -8
  49. sglang/srt/sampling/sampling_batch_info.py +58 -57
  50. sglang/srt/sampling/sampling_params.py +3 -3
  51. sglang/srt/server.py +29 -2
  52. sglang/srt/server_args.py +97 -60
  53. sglang/srt/utils.py +103 -51
  54. sglang/test/runners.py +25 -6
  55. sglang/test/srt/sampling/penaltylib/utils.py +23 -21
  56. sglang/test/test_utils.py +33 -22
  57. sglang/version.py +1 -1
  58. {sglang-0.3.5.post1.dist-info → sglang-0.3.6.dist-info}/METADATA +43 -43
  59. {sglang-0.3.5.post1.dist-info → sglang-0.3.6.dist-info}/RECORD +62 -56
  60. {sglang-0.3.5.post1.dist-info → sglang-0.3.6.dist-info}/WHEEL +1 -1
  61. {sglang-0.3.5.post1.dist-info → sglang-0.3.6.dist-info}/LICENSE +0 -0
  62. {sglang-0.3.5.post1.dist-info → sglang-0.3.6.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import argparse
4
4
  import asyncio
5
+ import copy
5
6
  import os
6
7
  import random
7
8
  import subprocess
@@ -28,8 +29,9 @@ from sglang.utils import get_exception_traceback
28
29
  DEFAULT_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/Meta-Llama-3.1-8B-FP8"
29
30
  DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
30
31
  DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
31
- DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
32
32
  DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
33
+ DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST = "Qwen/Qwen1.5-MoE-A2.7B"
34
+ DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
33
35
  DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
34
36
  DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
35
37
  DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
@@ -437,18 +439,22 @@ def popen_launch_server(
437
439
  process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
438
440
 
439
441
  start_time = time.time()
440
- while time.time() - start_time < timeout:
441
- try:
442
- headers = {
443
- "Content-Type": "application/json; charset=utf-8",
444
- "Authorization": f"Bearer {api_key}",
445
- }
446
- response = requests.get(f"{base_url}/health_generate", headers=headers)
447
- if response.status_code == 200:
448
- return process
449
- except requests.RequestException:
450
- pass
451
- time.sleep(10)
442
+ with requests.Session() as session:
443
+ while time.time() - start_time < timeout:
444
+ try:
445
+ headers = {
446
+ "Content-Type": "application/json; charset=utf-8",
447
+ "Authorization": f"Bearer {api_key}",
448
+ }
449
+ response = session.get(
450
+ f"{base_url}/health_generate",
451
+ headers=headers,
452
+ )
453
+ if response.status_code == 200:
454
+ return process
455
+ except requests.RequestException:
456
+ pass
457
+ time.sleep(10)
452
458
  raise TimeoutError("Server failed to start within the timeout period.")
453
459
 
454
460
 
@@ -528,6 +534,7 @@ def run_bench_serving(
528
534
  random_input_len=4096,
529
535
  random_output_len=2048,
530
536
  disable_stream=False,
537
+ need_warmup=False,
531
538
  ):
532
539
  # Launch the server
533
540
  base_url = DEFAULT_URL_FOR_TEST
@@ -561,9 +568,14 @@ def run_bench_serving(
561
568
  disable_stream=disable_stream,
562
569
  disable_ignore_eos=False,
563
570
  extra_request_body=None,
571
+ profile=None,
564
572
  )
565
573
 
566
574
  try:
575
+ if need_warmup:
576
+ warmup_args = copy.deepcopy(args)
577
+ warmup_args.num_prompts = 16
578
+ run_benchmark(warmup_args)
567
579
  res = run_benchmark(args)
568
580
  finally:
569
581
  kill_child_process(process.pid, include_self=True)
@@ -572,11 +584,11 @@ def run_bench_serving(
572
584
  return res
573
585
 
574
586
 
575
- def run_bench_latency(model, other_args):
587
+ def run_bench_one_batch(model, other_args):
576
588
  command = [
577
589
  "python3",
578
590
  "-m",
579
- "sglang.bench_latency",
591
+ "sglang.bench_one_batch",
580
592
  "--model-path",
581
593
  model,
582
594
  "--batch-size",
@@ -663,7 +675,7 @@ def run_and_check_memory_leak(
663
675
  workload_func,
664
676
  disable_radix_cache,
665
677
  enable_mixed_chunk,
666
- enable_overlap,
678
+ disable_overlap,
667
679
  chunked_prefill_size,
668
680
  ):
669
681
  other_args = ["--chunked-prefill-size", str(chunked_prefill_size)]
@@ -671,8 +683,8 @@ def run_and_check_memory_leak(
671
683
  other_args += ["--disable-radix-cache"]
672
684
  if enable_mixed_chunk:
673
685
  other_args += ["--enable-mixed-chunk"]
674
- if enable_overlap:
675
- other_args += ["--enable-overlap-scheduler"]
686
+ if disable_overlap:
687
+ other_args += ["--disable-overlap-schedule"]
676
688
 
677
689
  model = DEFAULT_MODEL_NAME_FOR_TEST
678
690
  port = random.randint(4000, 5000)
@@ -724,7 +736,7 @@ def run_and_check_memory_leak(
724
736
  def run_mmlu_test(
725
737
  disable_radix_cache=False,
726
738
  enable_mixed_chunk=False,
727
- enable_overlap=False,
739
+ disable_overlap=False,
728
740
  chunked_prefill_size=32,
729
741
  ):
730
742
  def workload_func(base_url, model):
@@ -739,8 +751,7 @@ def run_mmlu_test(
739
751
 
740
752
  try:
741
753
  metrics = run_eval(args)
742
- print(f"{metrics=}")
743
- assert metrics["score"] >= 0.65
754
+ assert metrics["score"] >= 0.65, f"{metrics=}"
744
755
  finally:
745
756
  pass
746
757
 
@@ -748,7 +759,7 @@ def run_mmlu_test(
748
759
  workload_func,
749
760
  disable_radix_cache,
750
761
  enable_mixed_chunk,
751
- enable_overlap,
762
+ disable_overlap,
752
763
  chunked_prefill_size,
753
764
  )
754
765
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.5.post1"
1
+ __version__ = "0.3.6"
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.5.post1
3
+ Version: 0.3.6
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
- License: Apache License
5
+ License: Apache License
6
6
  Version 2.0, January 2004
7
7
  http://www.apache.org/licenses/
8
8
 
@@ -215,74 +215,74 @@ Requires-Dist: requests
215
215
  Requires-Dist: tqdm
216
216
  Requires-Dist: numpy
217
217
  Requires-Dist: IPython
218
- Provides-Extra: all
219
- Requires-Dist: sglang[srt]; extra == "all"
220
- Requires-Dist: sglang[openai]; extra == "all"
221
- Requires-Dist: sglang[anthropic]; extra == "all"
222
- Requires-Dist: sglang[litellm]; extra == "all"
223
- Provides-Extra: all_hip
224
- Requires-Dist: sglang[srt_hip]; extra == "all-hip"
225
- Requires-Dist: sglang[openai]; extra == "all-hip"
226
- Requires-Dist: sglang[anthropic]; extra == "all-hip"
227
- Requires-Dist: sglang[litellm]; extra == "all-hip"
228
- Provides-Extra: all_xpu
229
- Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
230
- Requires-Dist: sglang[openai]; extra == "all-xpu"
231
- Requires-Dist: sglang[anthropic]; extra == "all-xpu"
232
- Requires-Dist: sglang[litellm]; extra == "all-xpu"
233
- Provides-Extra: anthropic
234
- Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
235
- Provides-Extra: dev
236
- Requires-Dist: sglang[all]; extra == "dev"
237
- Requires-Dist: sglang[test]; extra == "dev"
238
- Provides-Extra: dev_hip
239
- Requires-Dist: sglang[all_hip]; extra == "dev-hip"
240
- Requires-Dist: sglang[test]; extra == "dev-hip"
241
- Provides-Extra: dev_xpu
242
- Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
243
- Requires-Dist: sglang[test]; extra == "dev-xpu"
244
- Provides-Extra: litellm
245
- Requires-Dist: litellm>=1.0.0; extra == "litellm"
246
- Provides-Extra: openai
247
- Requires-Dist: openai>=1.0; extra == "openai"
248
- Requires-Dist: tiktoken; extra == "openai"
249
- Provides-Extra: runtime_common
218
+ Provides-Extra: runtime-common
250
219
  Requires-Dist: aiohttp; extra == "runtime-common"
251
220
  Requires-Dist: decord; extra == "runtime-common"
252
221
  Requires-Dist: fastapi; extra == "runtime-common"
253
- Requires-Dist: hf-transfer; extra == "runtime-common"
254
- Requires-Dist: huggingface-hub; extra == "runtime-common"
222
+ Requires-Dist: hf_transfer; extra == "runtime-common"
223
+ Requires-Dist: huggingface_hub; extra == "runtime-common"
255
224
  Requires-Dist: interegular; extra == "runtime-common"
256
225
  Requires-Dist: orjson; extra == "runtime-common"
226
+ Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
257
227
  Requires-Dist: packaging; extra == "runtime-common"
258
228
  Requires-Dist: pillow; extra == "runtime-common"
259
229
  Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
260
230
  Requires-Dist: psutil; extra == "runtime-common"
261
231
  Requires-Dist: pydantic; extra == "runtime-common"
262
232
  Requires-Dist: python-multipart; extra == "runtime-common"
233
+ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
263
234
  Requires-Dist: torchao; extra == "runtime-common"
264
235
  Requires-Dist: uvicorn; extra == "runtime-common"
265
236
  Requires-Dist: uvloop; extra == "runtime-common"
266
- Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
267
- Requires-Dist: outlines>=0.0.44; extra == "runtime-common"
268
237
  Requires-Dist: modelscope; extra == "runtime-common"
269
238
  Provides-Extra: srt
270
239
  Requires-Dist: sglang[runtime_common]; extra == "srt"
271
240
  Requires-Dist: torch; extra == "srt"
272
- Requires-Dist: vllm==0.6.3.post1; extra == "srt"
273
- Provides-Extra: srt_hip
241
+ Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
242
+ Provides-Extra: srt-hip
274
243
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
275
244
  Requires-Dist: torch; extra == "srt-hip"
276
245
  Requires-Dist: vllm==0.6.3.dev13; extra == "srt-hip"
277
- Provides-Extra: srt_xpu
246
+ Provides-Extra: srt-xpu
278
247
  Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
248
+ Provides-Extra: openai
249
+ Requires-Dist: openai>=1.0; extra == "openai"
250
+ Requires-Dist: tiktoken; extra == "openai"
251
+ Provides-Extra: anthropic
252
+ Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
253
+ Provides-Extra: litellm
254
+ Requires-Dist: litellm>=1.0.0; extra == "litellm"
279
255
  Provides-Extra: test
280
256
  Requires-Dist: jsonlines; extra == "test"
281
257
  Requires-Dist: matplotlib; extra == "test"
282
258
  Requires-Dist: pandas; extra == "test"
283
- Requires-Dist: sentence-transformers; extra == "test"
259
+ Requires-Dist: sentence_transformers; extra == "test"
284
260
  Requires-Dist: accelerate; extra == "test"
285
261
  Requires-Dist: peft; extra == "test"
262
+ Provides-Extra: all
263
+ Requires-Dist: sglang[srt]; extra == "all"
264
+ Requires-Dist: sglang[openai]; extra == "all"
265
+ Requires-Dist: sglang[anthropic]; extra == "all"
266
+ Requires-Dist: sglang[litellm]; extra == "all"
267
+ Provides-Extra: all-hip
268
+ Requires-Dist: sglang[srt_hip]; extra == "all-hip"
269
+ Requires-Dist: sglang[openai]; extra == "all-hip"
270
+ Requires-Dist: sglang[anthropic]; extra == "all-hip"
271
+ Requires-Dist: sglang[litellm]; extra == "all-hip"
272
+ Provides-Extra: all-xpu
273
+ Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
274
+ Requires-Dist: sglang[openai]; extra == "all-xpu"
275
+ Requires-Dist: sglang[anthropic]; extra == "all-xpu"
276
+ Requires-Dist: sglang[litellm]; extra == "all-xpu"
277
+ Provides-Extra: dev
278
+ Requires-Dist: sglang[all]; extra == "dev"
279
+ Requires-Dist: sglang[test]; extra == "dev"
280
+ Provides-Extra: dev-hip
281
+ Requires-Dist: sglang[all_hip]; extra == "dev-hip"
282
+ Requires-Dist: sglang[test]; extra == "dev-hip"
283
+ Provides-Extra: dev-xpu
284
+ Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
285
+ Requires-Dist: sglang[test]; extra == "dev-xpu"
286
286
 
287
287
  <div align="center" id="sglangtop">
288
288
  <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
@@ -323,7 +323,7 @@ The core features include:
323
323
 
324
324
  - **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
325
325
  - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
326
- - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
326
+ - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
327
327
  - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
328
328
 
329
329
  ## Getting Started
@@ -1,14 +1,16 @@
1
1
  sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
2
2
  sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
3
- sglang/bench_latency.py,sha256=SSqZjcCNO88ExpT94qBZ5CmuA5o0T8wMTBnxLsNMqik,18259
4
- sglang/bench_server_latency.py,sha256=N1MODIzcMk74yOWmY19d36aih3ewtHOemLxoieKtdhw,5866
5
- sglang/bench_serving.py,sha256=vYlXSXnAeUuF6oCW7r07pkQgnK9UR42B-XHyDu22erM,47620
6
- sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
3
+ sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
+ sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
5
+ sglang/bench_one_batch.py,sha256=Ww5Qd1ATaY8zw0mDEGoTYjwxMtxPKmpaHrIdjvS9iVE,15706
6
+ sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
7
+ sglang/bench_serving.py,sha256=hn5mihMey8Cik2nvwV30DUQ8C4Goxyt6BWm4YtyjIrI,50511
8
+ sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
7
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
8
10
  sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
9
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
10
12
  sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
11
- sglang/version.py,sha256=zPnEkP8KmACe4vaOxE-TiO3Jo-alnSUGAjnKThcNdBg,28
13
+ sglang/version.py,sha256=W_9dCm49nLvZulVAvvsafxLJjVBSKDBHz9K7szFZllo,22
12
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
15
  sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
14
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -26,55 +28,58 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
26
28
  sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
27
29
  sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
28
30
  sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
29
- sglang/srt/server.py,sha256=mpZmCVNSN_Go-mEKaYYhRNDFJHbmsK8WCc786oSCf5c,28685
30
- sglang/srt/server_args.py,sha256=9sosvHumMtf5L6jKnFNQ0_MMIg3BkaRCPmnGY2niQps,29472
31
- sglang/srt/utils.py,sha256=WtUZafw6WjAbjtRn_rTW5i2HgYJ65rrtZGpob3ngeuA,26016
31
+ sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
32
+ sglang/srt/server.py,sha256=caZPEoP3zdbEnQJnGzOEqvSdzSjsVUX8opSc-SplH2A,29709
33
+ sglang/srt/server_args.py,sha256=1VhWGvMOtr7ozW2BJV8KInPyptzfh2UiBN4jqdDJYS8,30714
34
+ sglang/srt/utils.py,sha256=5YIElk7hP1Zr7ff-jFXBUfM-acurnh5HR1ofC18FOTU,27540
32
35
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
33
36
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
34
37
  sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
35
38
  sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
36
39
  sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
37
- sglang/srt/constrained/base_grammar_backend.py,sha256=jRLKExPzMiM6GjryunJNEVrRMmHV-aJ21VhtB9c6bDw,2194
38
- sglang/srt/constrained/outlines_backend.py,sha256=mrubHYHdalbsgHgeu9Ct5OFUd7RnMok5jLXjdKHv-PE,5857
40
+ sglang/srt/constrained/base_grammar_backend.py,sha256=OPuBSd_F_fRwjVj6YFWBQuGeikj7UQtkTvc-JgEYt4I,2259
41
+ sglang/srt/constrained/outlines_backend.py,sha256=i4dhg3hP406YHzEyP8x2FQmLlGEn8Uby51KNLAcdhak,6353
39
42
  sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
40
- sglang/srt/constrained/xgrammar_backend.py,sha256=ZvEDDI_huTn2OjOfQQhqfxJU2w4R1tR1v7PwV98A0u4,3640
41
- sglang/srt/layers/activation.py,sha256=7VEkCrx2dvl629Lz0fkJcJfVoZA-ykEdkpTzKEc_drQ,5225
42
- sglang/srt/layers/layernorm.py,sha256=HCj8Y_X6MNNdtQU2sWKgyjIqVERxl9dqrmjbBbyJjpE,3796
43
+ sglang/srt/constrained/xgrammar_backend.py,sha256=r11pWwtctbaBJGdjhQbaD_SN8n9qw902CUDh1I3ZPqo,4738
44
+ sglang/srt/layers/activation.py,sha256=Yi2xdh7jmHUlRgERQFmStz9JwWvzT-kDmZbuf8yqy2I,5375
45
+ sglang/srt/layers/custom_op_util.py,sha256=sE0dTU00Mkzu7RiWS0h1OvPzFey_m-StbkeR6grpY7o,827
46
+ sglang/srt/layers/layernorm.py,sha256=1ceN6DLenmmKdxiif2uecplSUhc58qfd6s-6KWmXS9A,3943
43
47
  sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
44
- sglang/srt/layers/logits_processor.py,sha256=1l-hJoeZUfrPPmCWcyscl0ThgKWpprUELiL1mVDfbPE,12556
48
+ sglang/srt/layers/logits_processor.py,sha256=FFW8gVvEFxhUqDFaUPRYf3I5wA9HKsSa2IbDk7TjZZU,12575
45
49
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
46
50
  sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
47
51
  sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
48
- sglang/srt/layers/sampler.py,sha256=3zfth1Kz24X4sUq7Z_cjZwHgPVivI-rgPtIeUbsiiWU,4589
49
- sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
52
+ sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
53
+ sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
50
54
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
51
55
  sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
52
56
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
53
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=843CbZsRfzWp5FTusNXXL1o4N3jd0hoCNpsoUR6Qjxk,23306
54
- sglang/srt/layers/attention/triton_backend.py,sha256=DKUEzxQE8iBvJPNHmQwP1pyx2wXmSsLqzBhLjJznIUk,6482
57
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=9V5xVyx4CnT_vN8MPBOfREePgYonwzGa_PesdZClVuI,24619
58
+ sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
55
59
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
56
60
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
57
61
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
58
62
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
59
63
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
60
- sglang/srt/layers/fused_moe/fused_moe.py,sha256=N15tWTm2SGuesJxDIJAdV5FsDUpE-15sb_AIgr4swlw,23656
64
+ sglang/srt/layers/fused_moe/fused_moe.py,sha256=bxRcjdALxeY3FDnKivGOoNr6Er1kh6CCPtlAp7pjz50,23844
61
65
  sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
62
- sglang/srt/layers/fused_moe/patch.py,sha256=B9cDtHqHfnWE0QqZAffvUi6cVRKcMBMKDGJWGIaKh3U,3898
66
+ sglang/srt/layers/fused_moe/patch.py,sha256=K5CNLnFVxRPd8_jlY4hW6bj7pAACeCFZQA8y5loqqM4,4029
63
67
  sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
64
68
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
65
69
  sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
66
70
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
67
71
  sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
68
- sglang/srt/managers/data_parallel_controller.py,sha256=_XB6Ianc8TiqwLTW-7DH6gGjVYBeBU_6WjjaDk0snIY,5686
69
- sglang/srt/managers/detokenizer_manager.py,sha256=pBCcK-wKgPk4Ty-vQFSGovEZEE_yKK1f7YVDW8vDcYw,7962
72
+ sglang/srt/managers/data_parallel_controller.py,sha256=7Y3YOYJDe2GUyBBHJXUxDdoz24fuaO-5IGM0TwKxzFw,7895
73
+ sglang/srt/managers/detokenizer_manager.py,sha256=ovux4AwPPTQ-JpPof7ClSTiA1sphY7IkAxPocCa1ZIs,7349
70
74
  sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
71
- sglang/srt/managers/io_struct.py,sha256=O_oHnikwmOexNqH4HP6bwAI5d_jG_C96JGapkLg8B7c,12289
72
- sglang/srt/managers/schedule_batch.py,sha256=4BgocYdKFTDCrrBkSXCT75EALBx-3RYnoN3SgtdsHlU,39595
73
- sglang/srt/managers/schedule_policy.py,sha256=LH0rh1PiI5LK-dSd3dar8_po6FidiBUuj0Xcp_yNQAA,12295
74
- sglang/srt/managers/scheduler.py,sha256=6vqsrZu2roxzXJpNeFQRbDvERTxqbDmbvrGDp1E7FRA,47926
75
- sglang/srt/managers/tokenizer_manager.py,sha256=n_XCsCOwLZWCLv1ZJLGjyKgrAWCAQDyEhjnkxOptSa8,24436
76
- sglang/srt/managers/tp_worker.py,sha256=S5oim5xrkg1j68hYq6LfC8T533JYmQX9Kabt6U8ZXn4,5726
77
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=j5J4yHyR7w2HgAbN7S__299ADvsoyap5HK63SWMNavQ,7546
75
+ sglang/srt/managers/io_struct.py,sha256=tp7RckbDklXW8YW03xXTX3Nv0DpZGjviGPx_iljoQdI,12885
76
+ sglang/srt/managers/schedule_batch.py,sha256=kJvzb75Jmlo1iJvw1IWmLvKnBRuaUxok3MNOv-t5w18,41928
77
+ sglang/srt/managers/schedule_policy.py,sha256=zPk5Um5-E65p0cLZ_ZwCCk7DO8dE6pWJAX9_SyfPUvw,12432
78
+ sglang/srt/managers/scheduler.py,sha256=djbeXw7cfZBEu0uBOsQ-Wz4RCyvSWJ8ulpgaO6cSFyU,54711
79
+ sglang/srt/managers/session_controller.py,sha256=vf2nQrxIu_14PO5xqVBhcw3WdqbdmufBOcIwnFpuyrc,2308
80
+ sglang/srt/managers/tokenizer_manager.py,sha256=v1iCmFPhkT5IzK_LMJ-O0UPcov7pwjT49StRflBBK7Y,25882
81
+ sglang/srt/managers/tp_worker.py,sha256=P8QQ9kAqPi7RYXkXVjFIWaZW2F5ezxQtYTJA6gJleBE,6082
82
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=f-zsbb6FcDrxNhLoRp2jjqSJE-tyAzZo0HAKVnx1PUY,7527
78
83
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
79
84
  sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
80
85
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
@@ -82,18 +87,18 @@ sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47
82
87
  sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
83
88
  sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
84
89
  sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
85
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZMkyfZpWgDXfBpJ4cenh1TxXtt1O2xqeiXhDkq6E5pU,12936
86
- sglang/srt/model_executor/forward_batch_info.py,sha256=61TVExbiXDQRvZ6oevNz9AIxG7e-KVddgj4I6MTivLg,9426
87
- sglang/srt/model_executor/model_runner.py,sha256=AYMLc5Rd32ZyWnI6rERPuIASv6D-uA3ztoj9bh0VpcM,26800
90
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=Rm4yt4RSbFf2Dee4gI5UrbJKWgGk4quomRlVJ90TaH4,14521
91
+ sglang/srt/model_executor/forward_batch_info.py,sha256=4PGHIQM-ZckRosIFF987xhTlotEHkt9dTMKrZQUUKqU,12397
92
+ sglang/srt/model_executor/model_runner.py,sha256=iUKjnn0oaa2KMJgeRm4rUYrDYhg35Eg7DlBnB8OUPSw,29116
88
93
  sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
89
94
  sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
90
95
  sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
91
96
  sglang/srt/models/dbrx.py,sha256=IiVIk_rVd0RlvfIJGIThPOPkoYT3U649PrduThiKRzg,14545
92
97
  sglang/srt/models/deepseek.py,sha256=DjW2B21isWE6A2C8A3VGZ-G0k1DkhWHO3dZZjcOVG50,15828
93
- sglang/srt/models/deepseek_v2.py,sha256=z6532MRN1tBltFNteFJfimnaGpyNmK6g_sdNmTzsVmk,28230
98
+ sglang/srt/models/deepseek_v2.py,sha256=irh-2TE5PpwjsCojxpdDQCmBTuF016BTNKD673Gf4dY,32171
94
99
  sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
95
100
  sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
96
- sglang/srt/models/gemma2.py,sha256=iE56CYzPn-QCis4kcU7Yi0jvJ04KeU2deuZH2DaS2lM,14768
101
+ sglang/srt/models/gemma2.py,sha256=6B999ZZBMl5twr_DMK9lnSmxwZAvVavpFHaOat71ANg,14783
97
102
  sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
98
103
  sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
99
104
  sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
@@ -104,39 +109,40 @@ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16
104
109
  sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
105
110
  sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
106
111
  sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
107
- sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
108
- sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
112
+ sglang/srt/models/llava.py,sha256=URAPE0xB878s_pNacA4Z2t4lAxMuzzMjLZu5gf5MseA,24847
113
+ sglang/srt/models/llavavid.py,sha256=bqFZ0qIBlOqp-mDsBFB-QGVSemYmN6wftUKcff3r3MM,12127
109
114
  sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
110
115
  sglang/srt/models/minicpm3.py,sha256=O6092exfoq8iHLmyfpVCubyQEzcfp4SmqtZJs7x4A8s,25014
111
116
  sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
112
117
  sglang/srt/models/mixtral.py,sha256=b6AqEgL4y9wQpKKAGnhgzBtUypYo3dca5UOpGNLEt_A,13949
113
118
  sglang/srt/models/mixtral_quant.py,sha256=2ND-aOHjcyeQMUvqLLqhXwOdlR_bEftMFk3hc3lnpvc,13969
114
119
  sglang/srt/models/mllama.py,sha256=pET1x8wY04yoS8HMCncKx0tFPqGp78K8rlA7Eq7XioE,37889
115
- sglang/srt/models/olmo.py,sha256=eWPmo5AAnBhNGdMwklh1of3JnRzAszgQp4opeiiYidI,11887
120
+ sglang/srt/models/olmo.py,sha256=OPEZCpFrwy47IGiwLZFYxX7UXpE5PP3KdC7UKxRhngE,11884
116
121
  sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15242
122
+ sglang/srt/models/phi3_small.py,sha256=fxqGU0xphJzTeuBW38SRRYpRb2rcsg53JxuObK0pZig,15141
117
123
  sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
118
124
  sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
119
125
  sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
120
- sglang/srt/models/qwen2_vl.py,sha256=jb0RYMo0ShPIt4NtPCEcFGciZKstM-gYwVKND_LK7Ls,26052
126
+ sglang/srt/models/qwen2_vl.py,sha256=G3FNa_N2-CzB56LVrukwBtJazxMrDC_GPNjK6Wqxc4s,26415
121
127
  sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
122
- sglang/srt/models/torch_native_llama.py,sha256=d8gVNurlVVZ-tD3Uc_aHyGCVUUp1gR8awOH4fLRZHDE,19145
128
+ sglang/srt/models/torch_native_llama.py,sha256=RTIO2qp1SitOwNZNVzMBz8i0Gbud3t1nxTCImTguVQg,19362
123
129
  sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
124
130
  sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
125
131
  sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
126
- sglang/srt/openai_api/adapter.py,sha256=TFRafrvLvxGx93AZ8OByVwW7Y3ozBdAXg6gX5KU6hK8,53238
127
- sglang/srt/openai_api/protocol.py,sha256=EZ6G209rBEDP7cepO2kAYqE8wMe1ksYdN7to1iT97Lw,10248
128
- sglang/srt/sampling/sampling_batch_info.py,sha256=7uoHypbbp4o71DfPmF22R_LeyM_Q9BTxBFg8O4lkd9w,7648
129
- sglang/srt/sampling/sampling_params.py,sha256=O8w5yTLP1dwuCdb8kMBBhMSdMWvWxSv3fz2Eq07Tm88,5192
132
+ sglang/srt/openai_api/adapter.py,sha256=10jD3QLOAlbxTUO4-PnhgoaiNtWxbadUfb9bWyqN6gw,53540
133
+ sglang/srt/openai_api/protocol.py,sha256=dRundxpM2kutsz-03u2nPfd3jVA0zJKmPYGAEY93t8c,10078
134
+ sglang/srt/sampling/sampling_batch_info.py,sha256=8bQ1UvsJooPEBq_t6BXSocDAcm8OqivSUYXm4mBtnUQ,8379
135
+ sglang/srt/sampling/sampling_params.py,sha256=u9RL8yTXYSPD6OZPvGdKvD1hmmRDY2_dg6cs2CaJhbg,5192
130
136
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
131
- sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
132
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
133
- sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=MmfqRqJ-leSoY9iO5Hg_ILlX-M0M0tObYrxrb_quStg,3717
134
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
135
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
137
+ sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
138
+ sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
139
+ sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
140
+ sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
141
+ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
136
142
  sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
137
143
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
138
144
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
139
- sglang/test/runners.py,sha256=JxfsGEW9L3cz87fHYmWqb3Vnbk6K1csLLLftR3LogxU,14297
145
+ sglang/test/runners.py,sha256=31tkr6ZZ4WksLXZglAil05E1JiO71kftlg9dBiHq_u0,15034
140
146
  sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
141
147
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
142
148
  sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
@@ -146,10 +152,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
146
152
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
147
153
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
148
154
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
149
- sglang/test/test_utils.py,sha256=lgLPp27xQ1NfSdeJ1YUZeOer8I6G8UDce7YPyG637gY,23054
150
- sglang/test/srt/sampling/penaltylib/utils.py,sha256=q98pQDikkmvvvvAG-AXMYaYte1iHHW2TFhKGtAeGvdE,12802
151
- sglang-0.3.5.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
152
- sglang-0.3.5.post1.dist-info/METADATA,sha256=bTPgfYz1f3ZJPNiIxNPLOoTIGKACad-XLIZ8DOlszu0,21561
153
- sglang-0.3.5.post1.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
154
- sglang-0.3.5.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
155
- sglang-0.3.5.post1.dist-info/RECORD,,
155
+ sglang/test/test_utils.py,sha256=lBwINKlekJx03zJbnjEcO_KIkCMcBnfFa22LNt5Mwy4,23462
156
+ sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
157
+ sglang-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
158
+ sglang-0.3.6.dist-info/METADATA,sha256=Xqs3Fv5BkPx7ROZyCxhEBfIJzESsYz4PzjihzkA-ZZ8,21602
159
+ sglang-0.3.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
160
+ sglang-0.3.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
161
+ sglang-0.3.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5