sglang 0.3.5__py3-none-any.whl → 0.3.5.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sglang/bench_offline_throughput.py +309 -0
  2. sglang/bench_serving.py +148 -24
  3. sglang/srt/configs/model_config.py +5 -2
  4. sglang/srt/constrained/__init__.py +2 -66
  5. sglang/srt/constrained/base_grammar_backend.py +73 -0
  6. sglang/srt/constrained/outlines_backend.py +165 -0
  7. sglang/srt/constrained/outlines_jump_forward.py +182 -0
  8. sglang/srt/constrained/xgrammar_backend.py +150 -0
  9. sglang/srt/layers/attention/triton_ops/decode_attention.py +7 -0
  10. sglang/srt/layers/attention/triton_ops/extend_attention.py +6 -0
  11. sglang/srt/layers/fused_moe/fused_moe.py +23 -7
  12. sglang/srt/layers/fused_moe/patch.py +4 -2
  13. sglang/srt/layers/quantization/base_config.py +4 -6
  14. sglang/srt/layers/vocab_parallel_embedding.py +216 -150
  15. sglang/srt/managers/detokenizer_manager.py +0 -14
  16. sglang/srt/managers/io_struct.py +5 -3
  17. sglang/srt/managers/schedule_batch.py +14 -20
  18. sglang/srt/managers/scheduler.py +159 -96
  19. sglang/srt/managers/tokenizer_manager.py +81 -17
  20. sglang/srt/metrics/collector.py +211 -0
  21. sglang/srt/metrics/func_timer.py +108 -0
  22. sglang/srt/mm_utils.py +1 -1
  23. sglang/srt/model_executor/cuda_graph_runner.py +2 -2
  24. sglang/srt/model_executor/forward_batch_info.py +7 -3
  25. sglang/srt/model_executor/model_runner.py +6 -2
  26. sglang/srt/models/gemma2_reward.py +69 -0
  27. sglang/srt/models/gpt2.py +31 -37
  28. sglang/srt/models/internlm2_reward.py +62 -0
  29. sglang/srt/models/llama.py +11 -6
  30. sglang/srt/models/llama_reward.py +5 -26
  31. sglang/srt/models/qwen2_vl.py +5 -7
  32. sglang/srt/openai_api/adapter.py +11 -4
  33. sglang/srt/openai_api/protocol.py +29 -26
  34. sglang/srt/sampling/sampling_batch_info.py +2 -3
  35. sglang/srt/sampling/sampling_params.py +2 -16
  36. sglang/srt/server.py +60 -17
  37. sglang/srt/server_args.py +66 -25
  38. sglang/srt/utils.py +120 -0
  39. sglang/test/simple_eval_common.py +1 -1
  40. sglang/test/simple_eval_humaneval.py +2 -2
  41. sglang/test/simple_eval_mgsm.py +2 -2
  42. sglang/test/test_utils.py +21 -7
  43. sglang/utils.py +1 -0
  44. sglang/version.py +1 -1
  45. {sglang-0.3.5.dist-info → sglang-0.3.5.post2.dist-info}/METADATA +12 -8
  46. {sglang-0.3.5.dist-info → sglang-0.3.5.post2.dist-info}/RECORD +49 -45
  47. {sglang-0.3.5.dist-info → sglang-0.3.5.post2.dist-info}/WHEEL +1 -1
  48. sglang/srt/constrained/base_tool_cache.py +0 -65
  49. sglang/srt/constrained/bnf_cache.py +0 -61
  50. sglang/srt/constrained/fsm_cache.py +0 -95
  51. sglang/srt/constrained/grammar.py +0 -190
  52. sglang/srt/constrained/jump_forward.py +0 -203
  53. {sglang-0.3.5.dist-info → sglang-0.3.5.post2.dist-info}/LICENSE +0 -0
  54. {sglang-0.3.5.dist-info → sglang-0.3.5.post2.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py CHANGED
@@ -27,7 +27,10 @@ from sglang.utils import get_exception_traceback
27
27
 
28
28
  DEFAULT_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/Meta-Llama-3.1-8B-FP8"
29
29
  DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
30
+ DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
30
31
  DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
32
+ DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST = "Qwen/Qwen1.5-MoE-A2.7B"
33
+ DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
31
34
  DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
32
35
  DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
33
36
  DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
@@ -441,7 +444,7 @@ def popen_launch_server(
441
444
  "Content-Type": "application/json; charset=utf-8",
442
445
  "Authorization": f"Bearer {api_key}",
443
446
  }
444
- response = requests.get(f"{base_url}/v1/models", headers=headers)
447
+ response = requests.get(f"{base_url}/health_generate", headers=headers)
445
448
  if response.status_code == 200:
446
449
  return process
447
450
  except requests.RequestException:
@@ -636,8 +639,8 @@ def calculate_rouge_l(output_strs_list1, output_strs_list2):
636
639
  return rouge_l_scores
637
640
 
638
641
 
639
- STDOUT_FILENAME = "stdout.txt"
640
642
  STDERR_FILENAME = "stderr.txt"
643
+ STDOUT_FILENAME = "stdout.txt"
641
644
 
642
645
 
643
646
  def read_output(output_lines):
@@ -670,7 +673,7 @@ def run_and_check_memory_leak(
670
673
  if enable_mixed_chunk:
671
674
  other_args += ["--enable-mixed-chunk"]
672
675
  if enable_overlap:
673
- other_args += ["--enable-overlap-scheduler"]
676
+ other_args += ["--enable-overlap-schedule"]
674
677
 
675
678
  model = DEFAULT_MODEL_NAME_FOR_TEST
676
679
  port = random.randint(4000, 5000)
@@ -737,12 +740,17 @@ def run_mmlu_test(
737
740
 
738
741
  try:
739
742
  metrics = run_eval(args)
740
- print(f"{metrics=}")
741
- assert metrics["score"] >= 0.65
743
+ assert metrics["score"] >= 0.65, f"{metrics=}"
742
744
  finally:
743
745
  pass
744
746
 
745
- run_and_check_memory_leak(workload_func, disable_radix_cache, enable_mixed_chunk, enable_overlap, chunked_prefill_size)
747
+ run_and_check_memory_leak(
748
+ workload_func,
749
+ disable_radix_cache,
750
+ enable_mixed_chunk,
751
+ enable_overlap,
752
+ chunked_prefill_size,
753
+ )
746
754
 
747
755
 
748
756
  def run_mulit_request_test(
@@ -775,4 +783,10 @@ def run_mulit_request_test(
775
783
  with ThreadPoolExecutor(2) as executor:
776
784
  list(executor.map(run_one, list(range(4))))
777
785
 
778
- run_and_check_memory_leak(workload_func, disable_radix_cache, enable_mixed_chunk, enable_overlap, chunked_prefill_size)
786
+ run_and_check_memory_leak(
787
+ workload_func,
788
+ disable_radix_cache,
789
+ enable_mixed_chunk,
790
+ enable_overlap,
791
+ chunked_prefill_size,
792
+ )
sglang/utils.py CHANGED
@@ -349,6 +349,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
349
349
 
350
350
  def terminate_process(process):
351
351
  from sglang.srt.utils import kill_child_process
352
+
352
353
  kill_child_process(process.pid, include_self=True)
353
354
 
354
355
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.5"
1
+ __version__ = "0.3.5.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.5
3
+ Version: 0.3.5.post2
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -256,14 +256,15 @@ Requires-Dist: interegular; extra == "runtime-common"
256
256
  Requires-Dist: orjson; extra == "runtime-common"
257
257
  Requires-Dist: packaging; extra == "runtime-common"
258
258
  Requires-Dist: pillow; extra == "runtime-common"
259
+ Requires-Dist: prometheus-client>=0.20.0; extra == "runtime-common"
259
260
  Requires-Dist: psutil; extra == "runtime-common"
260
261
  Requires-Dist: pydantic; extra == "runtime-common"
261
262
  Requires-Dist: python-multipart; extra == "runtime-common"
262
263
  Requires-Dist: torchao; extra == "runtime-common"
263
264
  Requires-Dist: uvicorn; extra == "runtime-common"
264
265
  Requires-Dist: uvloop; extra == "runtime-common"
265
- Requires-Dist: zmq; extra == "runtime-common"
266
- Requires-Dist: outlines>=0.0.44; extra == "runtime-common"
266
+ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
267
+ Requires-Dist: outlines<0.1.0,>=0.0.44; extra == "runtime-common"
267
268
  Requires-Dist: modelscope; extra == "runtime-common"
268
269
  Provides-Extra: srt
269
270
  Requires-Dist: sglang[runtime_common]; extra == "srt"
@@ -291,13 +292,14 @@ Requires-Dist: peft; extra == "test"
291
292
  [![license](https://img.shields.io/github/license/sgl-project/sglang.svg)](https://github.com/sgl-project/sglang/tree/main/LICENSE)
292
293
  [![issue resolution](https://img.shields.io/github/issues-closed-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
293
294
  [![open issues](https://img.shields.io/github/issues-raw/sgl-project/sglang)](https://github.com/sgl-project/sglang/issues)
295
+ [![](https://img.shields.io/badge/Gurubase-(experimental)-006BFF)](https://gurubase.io/g/sglang)
294
296
 
295
297
  </div>
296
298
 
297
299
  --------------------------------------------------------------------------------
298
300
 
299
- | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Documentation**](https://sgl-project.github.io/) | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) |
300
- [**Join Bi-Weekly Development Meeting**](https://docs.google.com/document/d/1xEow4eIM152xNcRxqZz9VEcOiTQo8-CEuuQ5qTmkt-E/edit?usp=sharing) |
301
+ | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Documentation**](https://sgl-project.github.io/) | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2tmmp6flg-89dOlJW2TjnBrTRk1I_~GA) |
302
+ [**Join Bi-Weekly Development Meeting**](https://docs.google.com/document/d/1xEow4eIM152xNcRxqZz9VEcOiTQo8-CEuuQ5qTmkt-E/edit?usp=sharing) | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
301
303
 
302
304
  ## News
303
305
  - [2024/10] 🔥 The First SGLang Online Meetup ([slides](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#the-first-sglang-online-meetup)).
@@ -321,11 +323,13 @@ The core features include:
321
323
 
322
324
  - **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
323
325
  - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
324
- - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.) and embedding models (e5-mistral), with easy extensibility for integrating new models.
326
+ - **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
325
327
  - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
326
328
 
327
- ## Install
328
- See [https://sgl-project.github.io/start/install.html](https://sgl-project.github.io/start/install.html)
329
+ ## Getting Started
330
+ Install SGLang: See [https://sgl-project.github.io/start/install.html](https://sgl-project.github.io/start/install.html)
331
+
332
+ Send requests: See [https://sgl-project.github.io/start/send_request.html](https://sgl-project.github.io/start/send_request.html)
329
333
 
330
334
  ## Backend: SGLang Runtime (SRT)
331
335
  See [https://sgl-project.github.io/backend/backend.html](https://sgl-project.github.io/backend/backend.html)
@@ -1,14 +1,15 @@
1
1
  sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
2
2
  sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
3
3
  sglang/bench_latency.py,sha256=SSqZjcCNO88ExpT94qBZ5CmuA5o0T8wMTBnxLsNMqik,18259
4
+ sglang/bench_offline_throughput.py,sha256=xBr7gI_ZbrpXXD72Nzu1F228oNyz1jggcblZCeUWJgw,9975
4
5
  sglang/bench_server_latency.py,sha256=N1MODIzcMk74yOWmY19d36aih3ewtHOemLxoieKtdhw,5866
5
- sglang/bench_serving.py,sha256=0RR0RsrQqLWqcIPENfrS97F9HJiVXIZvGOWy4R2GvDA,43680
6
+ sglang/bench_serving.py,sha256=ytef89P9bqKRaMGXAqq69SmLTlNXWyHyhEraISLKYME,47975
6
7
  sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
7
8
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
8
9
  sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
9
10
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
10
- sglang/utils.py,sha256=73tkeT4gDzmVkWO4nVXQHS9XlzH7CSL-I_uRpEDsCPg,11546
11
- sglang/version.py,sha256=ThnCuF3X7rsQSd5PAea_jfYA70ZmhLvkFcLBxBPwZnY,22
11
+ sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
12
+ sglang/version.py,sha256=NlX-QUNR7ogIH-GcgzllsyHox7ItJoycFEUM_EYuhW4,28
12
13
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
14
  sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
14
15
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -25,20 +26,19 @@ sglang/lang/backend/runtime_endpoint.py,sha256=iVb7SlrpJ1ic92QG5kQUphZUb2EaVWY43
25
26
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
26
27
  sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
27
28
  sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
28
- sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
29
- sglang/srt/server.py,sha256=4yKD85OlhhkneF7VOzWZMro0P8n1xdKgnZfCWdjrXao,27502
30
- sglang/srt/server_args.py,sha256=AfbBXcrC_XpTWOoZcace0iRksKwyh8-NS1E7RMTWM5A,28912
31
- sglang/srt/utils.py,sha256=zdoZlo0_R18mAWFc4tYnkxVb7qhqcCTKovaEn2dAHLw,23121
29
+ sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
30
+ sglang/srt/server.py,sha256=JUYAE8MDGYou_HbmuR10QFZfg319fGt9VamskvBkpFo,28776
31
+ sglang/srt/server_args.py,sha256=V8sx2oY0yphHC_uATwv4UTiLUFnvMQl85o6y5AyaoXM,30086
32
+ sglang/srt/utils.py,sha256=jGSlxbvI50xEybdupDQNHpsCaF1U_5buADrD149766g,27013
32
33
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
33
34
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
34
- sglang/srt/configs/model_config.py,sha256=bxG-vD8ZmXbypKW6Hvz8AS7rcwjTxt0TzG7p59m3t2E,9387
35
+ sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
35
36
  sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
36
- sglang/srt/constrained/__init__.py,sha256=VXEY9K8HrEBv6QHe3X7J5ingiDugSF9_cpEbEcpBId4,2466
37
- sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
38
- sglang/srt/constrained/bnf_cache.py,sha256=c8msJ57Gj7aMy1ccTRERLgVuovEeDJx-wVPOhYF0w9k,2057
39
- sglang/srt/constrained/fsm_cache.py,sha256=CWwtOHTU3sHmw71OhWxl05YgU7cNNVWohlUt71rG230,3536
40
- sglang/srt/constrained/grammar.py,sha256=kvfyP2E53xo8jVWVZ_qHlJn0U4Qi2WaNi2yMZPKgI_0,6952
41
- sglang/srt/constrained/jump_forward.py,sha256=o-CzJu3DEs0eFKlLzsQVYMSo4vBKpffs25sXLOJd6jc,6997
37
+ sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
38
+ sglang/srt/constrained/base_grammar_backend.py,sha256=OPuBSd_F_fRwjVj6YFWBQuGeikj7UQtkTvc-JgEYt4I,2259
39
+ sglang/srt/constrained/outlines_backend.py,sha256=J03QQiT9pkdXyoYGw3Rj6taEyWlIr4VCBvxQ3aMiB8A,5786
40
+ sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
41
+ sglang/srt/constrained/xgrammar_backend.py,sha256=wMWqkLN5KhnJXL6GBqbcrhxvAAMx60nG88KIBU1bFSc,4505
42
42
  sglang/srt/layers/activation.py,sha256=7VEkCrx2dvl629Lz0fkJcJfVoZA-ykEdkpTzKEc_drQ,5225
43
43
  sglang/srt/layers/layernorm.py,sha256=HCj8Y_X6MNNdtQU2sWKgyjIqVERxl9dqrmjbBbyJjpE,3796
44
44
  sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
@@ -48,32 +48,32 @@ sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8Do
48
48
  sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
49
49
  sglang/srt/layers/sampler.py,sha256=3zfth1Kz24X4sUq7Z_cjZwHgPVivI-rgPtIeUbsiiWU,4589
50
50
  sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
51
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=8Tx0WUNibDoNkGruGzRIkvp6t7D54e-nchdezeQ5Nzk,22302
51
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
52
52
  sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
53
53
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
54
54
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=843CbZsRfzWp5FTusNXXL1o4N3jd0hoCNpsoUR6Qjxk,23306
55
55
  sglang/srt/layers/attention/triton_backend.py,sha256=DKUEzxQE8iBvJPNHmQwP1pyx2wXmSsLqzBhLjJznIUk,6482
56
- sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=7cDNPMMkz7--ebNKUeSaLY_6hBbvr_NqDodYFtW9ahA,18433
56
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
57
57
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
58
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=nEG7iBh1pAy3WaqPdLZwCJwDgyk5HLQ181kBS2nxbwg,11179
58
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
59
59
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
60
60
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
61
- sglang/srt/layers/fused_moe/fused_moe.py,sha256=uRmDUleTaJKBbsTfum6RgHifUbgi6yKuB2dw_mIhw3M,23250
61
+ sglang/srt/layers/fused_moe/fused_moe.py,sha256=N15tWTm2SGuesJxDIJAdV5FsDUpE-15sb_AIgr4swlw,23656
62
62
  sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
63
- sglang/srt/layers/fused_moe/patch.py,sha256=B9cDtHqHfnWE0QqZAffvUi6cVRKcMBMKDGJWGIaKh3U,3898
63
+ sglang/srt/layers/fused_moe/patch.py,sha256=K5CNLnFVxRPd8_jlY4hW6bj7pAACeCFZQA8y5loqqM4,4029
64
64
  sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
65
- sglang/srt/layers/quantization/base_config.py,sha256=fx-FeA1a4jg7HDoYvIKC5G_wLcfeOOyIJQ6MtCaHpZ4,4664
65
+ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
66
66
  sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
67
67
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
68
68
  sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
69
69
  sglang/srt/managers/data_parallel_controller.py,sha256=_XB6Ianc8TiqwLTW-7DH6gGjVYBeBU_6WjjaDk0snIY,5686
70
- sglang/srt/managers/detokenizer_manager.py,sha256=pBCcK-wKgPk4Ty-vQFSGovEZEE_yKK1f7YVDW8vDcYw,7962
70
+ sglang/srt/managers/detokenizer_manager.py,sha256=erRgf8RijFrGnYjZawu9an1u2mFPRY3tnxzF9PbKc80,7295
71
71
  sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
72
- sglang/srt/managers/io_struct.py,sha256=23-eJQrpMw7OJ0LiDvBVKpI36rdyxJluFlHJ7wXjKqw,12261
73
- sglang/srt/managers/schedule_batch.py,sha256=LIkxGNZC_PWIX7-BJGLRpzgNIGH-1ZxL9RUZE-dgo70,39653
72
+ sglang/srt/managers/io_struct.py,sha256=O_oHnikwmOexNqH4HP6bwAI5d_jG_C96JGapkLg8B7c,12289
73
+ sglang/srt/managers/schedule_batch.py,sha256=4BgocYdKFTDCrrBkSXCT75EALBx-3RYnoN3SgtdsHlU,39595
74
74
  sglang/srt/managers/schedule_policy.py,sha256=LH0rh1PiI5LK-dSd3dar8_po6FidiBUuj0Xcp_yNQAA,12295
75
- sglang/srt/managers/scheduler.py,sha256=p72s46nNnUl5YTKfgwRNmcc8NZbBSGudYuqOP2bZsyc,45524
76
- sglang/srt/managers/tokenizer_manager.py,sha256=fGVMxJb-UQPokqdlbphWHSVnLyKWAY8JK7fHe6iVa2I,21793
75
+ sglang/srt/managers/scheduler.py,sha256=ty1sJ9U6JxifIGF4uzZX6CANMJtbjNWPe2k8aRPS6aI,48133
76
+ sglang/srt/managers/tokenizer_manager.py,sha256=n_XCsCOwLZWCLv1ZJLGjyKgrAWCAQDyEhjnkxOptSa8,24436
77
77
  sglang/srt/managers/tp_worker.py,sha256=S5oim5xrkg1j68hYq6LfC8T533JYmQX9Kabt6U8ZXn4,5726
78
78
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=j5J4yHyR7w2HgAbN7S__299ADvsoyap5HK63SWMNavQ,7546
79
79
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
@@ -81,9 +81,11 @@ sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbD
81
81
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
82
82
  sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
83
83
  sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
84
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=zRxXxV54b4SUXk9BQ1zPAS2VXCBRBvT15A64Yf0kBSE,12909
85
- sglang/srt/model_executor/forward_batch_info.py,sha256=1GM6A-tqTDD0MEMQx93PC7XahABr0vlv7JBXohaehkc,9272
86
- sglang/srt/model_executor/model_runner.py,sha256=Zs-u9sJREJD-1omhaFjBYfgR_2_7Cj0O5mGgQ0NtR8s,26793
84
+ sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
85
+ sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
86
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZMkyfZpWgDXfBpJ4cenh1TxXtt1O2xqeiXhDkq6E5pU,12936
87
+ sglang/srt/model_executor/forward_batch_info.py,sha256=61TVExbiXDQRvZ6oevNz9AIxG7e-KVddgj4I6MTivLg,9426
88
+ sglang/srt/model_executor/model_runner.py,sha256=QdFjQRnxZU8r7-MP-NdsnFnPWMRfxa-zTUmKOYmM8HE,26879
87
89
  sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
88
90
  sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
89
91
  sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
@@ -93,14 +95,16 @@ sglang/srt/models/deepseek_v2.py,sha256=z6532MRN1tBltFNteFJfimnaGpyNmK6g_sdNmTzs
93
95
  sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
94
96
  sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
95
97
  sglang/srt/models/gemma2.py,sha256=iE56CYzPn-QCis4kcU7Yi0jvJ04KeU2deuZH2DaS2lM,14768
96
- sglang/srt/models/gpt2.py,sha256=xWqU66KO6rNBnzA6uOBrlLWnwVzLKuC4UWHc5WuEHw8,10151
98
+ sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
99
+ sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
97
100
  sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
98
101
  sglang/srt/models/grok.py,sha256=iSkvt7whYyMndUHBekM4vKHaDXnnmeJMErkklGpz624,14826
99
102
  sglang/srt/models/internlm2.py,sha256=HOVOXz3b7eLF2wpG_FEK5PYnYOEpHPGJ0pufvL7HPD0,12099
100
- sglang/srt/models/llama.py,sha256=X_LKJ02ofDfpgVVFexf_C6g4FikadfMikhuRVAuLN5I,16094
103
+ sglang/srt/models/internlm2_reward.py,sha256=dtT1vupWv6dXk17XYYdsmsR027GiP_WOxtMvwRC7Y84,2330
104
+ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16286
101
105
  sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
102
106
  sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
103
- sglang/srt/models/llama_reward.py,sha256=48J6PmZJRFRv-6mEF6y5fxNKtRRZVQzvJqg3XaWDWa0,5448
107
+ sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
104
108
  sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
105
109
  sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
106
110
  sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
@@ -114,16 +118,16 @@ sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15
114
118
  sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
115
119
  sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
116
120
  sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
117
- sglang/srt/models/qwen2_vl.py,sha256=scKzs-KTI64CRRcBNWQniXURLO3WiJEzx-MsisH1Als,26093
121
+ sglang/srt/models/qwen2_vl.py,sha256=jb0RYMo0ShPIt4NtPCEcFGciZKstM-gYwVKND_LK7Ls,26052
118
122
  sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
119
123
  sglang/srt/models/torch_native_llama.py,sha256=d8gVNurlVVZ-tD3Uc_aHyGCVUUp1gR8awOH4fLRZHDE,19145
120
124
  sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
121
125
  sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
122
126
  sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
123
- sglang/srt/openai_api/adapter.py,sha256=nZOVjZ-q4eULl19oT97_u7z63SQiWW7IzbYzJeWE7os,53069
124
- sglang/srt/openai_api/protocol.py,sha256=EZ6G209rBEDP7cepO2kAYqE8wMe1ksYdN7to1iT97Lw,10248
125
- sglang/srt/sampling/sampling_batch_info.py,sha256=qrijXoMhF-V_x3g6lumsfXgsGaPiKLIJ2pUz6ii-O2s,7735
126
- sglang/srt/sampling/sampling_params.py,sha256=u1UWt9biIFXKymAg56RbkMa8oe5jxsoMvsv3cH7_kZ0,5692
127
+ sglang/srt/openai_api/adapter.py,sha256=xYBmBLZ_JxfMt_m8LtVe_OB70GV4S9zBOL8e5g_VRvs,53432
128
+ sglang/srt/openai_api/protocol.py,sha256=Mou5JUMKJkxVxoj4n8R4_sgnYY3OcwniiAi2TEM3hfY,10070
129
+ sglang/srt/sampling/sampling_batch_info.py,sha256=7uoHypbbp4o71DfPmF22R_LeyM_Q9BTxBFg8O4lkd9w,7648
130
+ sglang/srt/sampling/sampling_params.py,sha256=zzWVm8DxcUDdPwV1MIh5q76mmLwtkun0E08T6U3ZyWA,5192
127
131
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
128
132
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
129
133
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
@@ -134,19 +138,19 @@ sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8
134
138
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
135
139
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
136
140
  sglang/test/runners.py,sha256=JxfsGEW9L3cz87fHYmWqb3Vnbk6K1csLLLftR3LogxU,14297
137
- sglang/test/simple_eval_common.py,sha256=r0G-9QLycs2ax3RMc44T_61fzMxlpTzv6pececC7lyY,12379
141
+ sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
138
142
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
139
- sglang/test/simple_eval_humaneval.py,sha256=7lTi841NT58smNOtRwCedrdX9IWWypdLkOtaQOBy-GI,5687
143
+ sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
140
144
  sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
141
- sglang/test/simple_eval_mgsm.py,sha256=wfbqJW9Rkc66vzq2fEMF6jchmoA8mw1OUiGU55cZ2B0,10261
145
+ sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
142
146
  sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
143
147
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
144
148
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
145
149
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
146
- sglang/test/test_utils.py,sha256=zspkM9VSm6QXI4wVG-75r8ttGgylnPOEH7nuYjp5plU,22799
150
+ sglang/test/test_utils.py,sha256=XvIAMeLXr4D7uLxCUSLTKP5Upc1EJd0JX2egL897Jfo,23100
147
151
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=q98pQDikkmvvvvAG-AXMYaYte1iHHW2TFhKGtAeGvdE,12802
148
- sglang-0.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
149
- sglang-0.3.5.dist-info/METADATA,sha256=FQ8MBpLt6W0-43VhtuwEWgqomXaFwUumiBd6T8xPWG0,21099
150
- sglang-0.3.5.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
151
- sglang-0.3.5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
152
- sglang-0.3.5.dist-info/RECORD,,
152
+ sglang-0.3.5.post2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
153
+ sglang-0.3.5.post2.dist-info/METADATA,sha256=ajoktPOWOAmE37TcZw562A22FmxntBUWO4zLOShVKpQ,21568
154
+ sglang-0.3.5.post2.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
155
+ sglang-0.3.5.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
156
+ sglang-0.3.5.post2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,65 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
14
- """
15
-
16
- """Base tool cache for constrained decoding tools."""
17
-
18
- import time
19
-
20
-
21
- class BaseToolCache:
22
- def __init__(self, enable=True):
23
- self.enable = enable
24
- self.reset()
25
-
26
- def reset(self):
27
- self.cache = {}
28
- self.metrics = {"total": 0, "hit": 0, "avg_init_time": 0}
29
-
30
- def query(self, key):
31
- def _init_with_timer(key):
32
- start = time.monotonic()
33
- val = self.init_value(key)
34
- init_time = time.monotonic() - start
35
- curr_total = self.metrics["total"]
36
- new_total = curr_total + 1
37
-
38
- # Update average init time without old_avg * old_total to avoid overflow.
39
- self.metrics["avg_init_time"] = (init_time / new_total) + (
40
- curr_total / new_total
41
- ) * self.metrics["avg_init_time"]
42
- return val
43
-
44
- if key in self.cache:
45
- self.metrics["hit"] += 1
46
- val = self.cache[key]
47
- else:
48
- # Cache miss or disabled.
49
- val = _init_with_timer(key)
50
-
51
- if self.enable:
52
- self.metrics["total"] += 1
53
- self.cache[key] = val
54
- return val
55
-
56
- def init_value(self, key):
57
- raise NotImplementedError()
58
-
59
- def get_cache_hit_rate(self):
60
- if self.metrics["total"] == 0:
61
- return 0
62
- return self.metrics["hit"] / self.metrics["total"]
63
-
64
- def get_avg_init_time(self):
65
- return self.metrics["avg_init_time"]
@@ -1,61 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
- http://www.apache.org/licenses/LICENSE-2.0
7
- Unless required by applicable law or agreed to in writing, software
8
- distributed under the License is distributed on an "AS IS" BASIS,
9
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
- See the License for the specific language governing permissions and
11
- limitations under the License.
12
- """
13
-
14
- """Cache for the compressed finite state machine."""
15
-
16
- from typing import Tuple
17
-
18
- from transformers import AutoTokenizer
19
-
20
- from sglang.srt.constrained import (
21
- GrammarMatcher,
22
- GrammarMatcherInitContext,
23
- GrammarMatcherInitContextCache,
24
- )
25
-
26
- MAX_ROLLBACK_TOKENS = 10
27
-
28
-
29
- class BNFCache:
30
- grammar_cache: GrammarMatcherInitContextCache
31
-
32
- def __init__(
33
- self,
34
- tokenizer_path,
35
- tokenizer_args_dict,
36
- skip_tokenizer_init=False,
37
- whitespace_patterns=None,
38
- ):
39
- # TODO(dark): how to deal with whitespace_patterns and skip_tokenizer_init
40
- if skip_tokenizer_init:
41
- return
42
-
43
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, **tokenizer_args_dict)
44
- self.grammar_cache = GrammarMatcherInitContextCache(
45
- tokenizer_or_vocab=tokenizer
46
- )
47
-
48
- def get_context(self, key: Tuple[str, str]) -> GrammarMatcherInitContext:
49
- key_type, key_string = key
50
- if key_type == "json":
51
- return self.grammar_cache.get_init_context_for_json_schema(key_string)
52
- elif key_type == "regex":
53
- raise ValueError(f"regex hasn't been supported by xgrammar yet")
54
- else:
55
- raise ValueError(f"Invalid key_type: {key_type}")
56
-
57
- def query(self, key: Tuple[str, str], vocab_size: int) -> GrammarMatcher:
58
- ctx = self.get_context(key)
59
- return GrammarMatcher(
60
- ctx, max_rollback_tokens=MAX_ROLLBACK_TOKENS, mask_vocab_size=vocab_size
61
- )
@@ -1,95 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
14
- """
15
-
16
- """Cache for the compressed finite state machine."""
17
- import logging
18
-
19
- from interegular import InvalidSyntax, parse_pattern
20
- from outlines.fsm.json_schema import build_regex_from_schema
21
- from transformers import AutoTokenizer
22
-
23
- from sglang.srt.constrained import RegexGuide, TransformerTokenizer
24
- from sglang.srt.constrained.base_tool_cache import BaseToolCache
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- class FSMCache(BaseToolCache):
30
- def __init__(
31
- self,
32
- tokenizer_path,
33
- tokenizer_args_dict,
34
- enable=True,
35
- skip_tokenizer_init=False,
36
- constrained_json_whitespace_pattern=None,
37
- ):
38
- super().__init__(enable=enable)
39
-
40
- if (
41
- skip_tokenizer_init
42
- or tokenizer_path.endswith(".json")
43
- or tokenizer_path.endswith(".model")
44
- ):
45
- # Do not support TiktokenTokenizer or SentencePieceTokenizer
46
- return
47
-
48
- tokenizer_args_dict.setdefault("padding_side", "left")
49
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, **tokenizer_args_dict)
50
- try:
51
- self.outlines_tokenizer = TransformerTokenizer(tokenizer)
52
- except AttributeError:
53
- # FIXME: tmp fix for chatglm2 & chatglm3 (pad_token_id=0)
54
- origin_pad_token_id = tokenizer.pad_token_id
55
-
56
- def fset(self, value):
57
- self._value = value
58
-
59
- type(tokenizer).pad_token_id = property(
60
- fget=type(tokenizer).pad_token_id.fget, fset=fset
61
- )
62
- self.outlines_tokenizer = TransformerTokenizer(tokenizer)
63
- self.outlines_tokenizer.tokenizer.pad_token_id = origin_pad_token_id
64
- self.outlines_tokenizer.pad_token_id = origin_pad_token_id
65
- self.outlines_tokenizer.pad_token = (
66
- self.outlines_tokenizer.tokenizer.pad_token
67
- )
68
- self.outlines_tokenizer.vocabulary = (
69
- self.outlines_tokenizer.tokenizer.get_vocab()
70
- )
71
- self.constrained_json_whitespace_pattern = constrained_json_whitespace_pattern
72
-
73
- def init_value(self, key):
74
- key_type, key_string = key
75
- if key_type == "json":
76
- try:
77
- regex = build_regex_from_schema(
78
- key_string,
79
- whitespace_pattern=self.constrained_json_whitespace_pattern,
80
- )
81
- except NotImplementedError as e:
82
- logger.warning(
83
- f"skip invalid json schema: json_schema={key_string}, {e=}"
84
- )
85
- return None, key_string
86
- elif key_type == "regex":
87
- regex = key_string
88
- else:
89
- raise ValueError(f"Invalid key_type: {key_type}")
90
- try:
91
- parse_pattern(regex)
92
- except InvalidSyntax as e:
93
- logger.warning(f"skip invalid regex guide: {regex=}, {e=}")
94
- return None, regex
95
- return RegexGuide(regex, self.outlines_tokenizer), regex