sglang 0.3.5__py3-none-any.whl → 0.3.5.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sglang/bench_serving.py +113 -3
  2. sglang/srt/configs/model_config.py +5 -2
  3. sglang/srt/constrained/__init__.py +2 -66
  4. sglang/srt/constrained/base_grammar_backend.py +72 -0
  5. sglang/srt/constrained/outlines_backend.py +165 -0
  6. sglang/srt/constrained/outlines_jump_forward.py +182 -0
  7. sglang/srt/constrained/xgrammar_backend.py +114 -0
  8. sglang/srt/layers/attention/triton_ops/decode_attention.py +7 -0
  9. sglang/srt/layers/attention/triton_ops/extend_attention.py +6 -0
  10. sglang/srt/layers/fused_moe/fused_moe.py +23 -7
  11. sglang/srt/layers/quantization/base_config.py +4 -6
  12. sglang/srt/layers/vocab_parallel_embedding.py +216 -150
  13. sglang/srt/managers/io_struct.py +5 -3
  14. sglang/srt/managers/schedule_batch.py +14 -20
  15. sglang/srt/managers/scheduler.py +153 -94
  16. sglang/srt/managers/tokenizer_manager.py +81 -17
  17. sglang/srt/metrics/collector.py +211 -0
  18. sglang/srt/metrics/func_timer.py +108 -0
  19. sglang/srt/mm_utils.py +1 -1
  20. sglang/srt/model_executor/cuda_graph_runner.py +2 -2
  21. sglang/srt/model_executor/forward_batch_info.py +7 -3
  22. sglang/srt/model_executor/model_runner.py +2 -1
  23. sglang/srt/models/gemma2_reward.py +69 -0
  24. sglang/srt/models/gpt2.py +31 -37
  25. sglang/srt/models/internlm2_reward.py +62 -0
  26. sglang/srt/models/llama.py +11 -6
  27. sglang/srt/models/llama_reward.py +5 -26
  28. sglang/srt/models/qwen2_vl.py +5 -7
  29. sglang/srt/openai_api/adapter.py +6 -2
  30. sglang/srt/sampling/sampling_batch_info.py +2 -3
  31. sglang/srt/sampling/sampling_params.py +0 -14
  32. sglang/srt/server.py +58 -16
  33. sglang/srt/server_args.py +42 -22
  34. sglang/srt/utils.py +87 -0
  35. sglang/test/simple_eval_common.py +1 -1
  36. sglang/test/simple_eval_humaneval.py +2 -2
  37. sglang/test/simple_eval_mgsm.py +2 -2
  38. sglang/test/test_utils.py +18 -4
  39. sglang/utils.py +1 -0
  40. sglang/version.py +1 -1
  41. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/METADATA +11 -7
  42. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/RECORD +45 -42
  43. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/WHEEL +1 -1
  44. sglang/srt/constrained/base_tool_cache.py +0 -65
  45. sglang/srt/constrained/bnf_cache.py +0 -61
  46. sglang/srt/constrained/fsm_cache.py +0 -95
  47. sglang/srt/constrained/grammar.py +0 -190
  48. sglang/srt/constrained/jump_forward.py +0 -203
  49. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/LICENSE +0 -0
  50. {sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,13 @@ sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
2
2
  sglang/api.py,sha256=3I9YUJNOeCqwKymZec2JR_agjTyKIx4XoT6IGdZ4_Cs,6953
3
3
  sglang/bench_latency.py,sha256=SSqZjcCNO88ExpT94qBZ5CmuA5o0T8wMTBnxLsNMqik,18259
4
4
  sglang/bench_server_latency.py,sha256=N1MODIzcMk74yOWmY19d36aih3ewtHOemLxoieKtdhw,5866
5
- sglang/bench_serving.py,sha256=0RR0RsrQqLWqcIPENfrS97F9HJiVXIZvGOWy4R2GvDA,43680
5
+ sglang/bench_serving.py,sha256=vYlXSXnAeUuF6oCW7r07pkQgnK9UR42B-XHyDu22erM,47620
6
6
  sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
7
7
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
8
8
  sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
9
9
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
10
- sglang/utils.py,sha256=73tkeT4gDzmVkWO4nVXQHS9XlzH7CSL-I_uRpEDsCPg,11546
11
- sglang/version.py,sha256=ThnCuF3X7rsQSd5PAea_jfYA70ZmhLvkFcLBxBPwZnY,22
10
+ sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
11
+ sglang/version.py,sha256=zPnEkP8KmACe4vaOxE-TiO3Jo-alnSUGAjnKThcNdBg,28
12
12
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
14
14
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -25,20 +25,19 @@ sglang/lang/backend/runtime_endpoint.py,sha256=iVb7SlrpJ1ic92QG5kQUphZUb2EaVWY43
25
25
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
26
26
  sglang/srt/conversation.py,sha256=erz6wEXMcSmBlskuUhX2c-MT0EMyqyFpTem9PgastEE,21107
27
27
  sglang/srt/hf_transformers_utils.py,sha256=QbYVTnz0UdaXESPMAaq1OMzzznn95J_l08eXJuB68aU,6618
28
- sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
29
- sglang/srt/server.py,sha256=4yKD85OlhhkneF7VOzWZMro0P8n1xdKgnZfCWdjrXao,27502
30
- sglang/srt/server_args.py,sha256=AfbBXcrC_XpTWOoZcace0iRksKwyh8-NS1E7RMTWM5A,28912
31
- sglang/srt/utils.py,sha256=zdoZlo0_R18mAWFc4tYnkxVb7qhqcCTKovaEn2dAHLw,23121
28
+ sglang/srt/mm_utils.py,sha256=ml68nWUJhs_FS2FU1oB9UPHKZmF7P2DQHl1ddywn4ao,12272
29
+ sglang/srt/server.py,sha256=mpZmCVNSN_Go-mEKaYYhRNDFJHbmsK8WCc786oSCf5c,28685
30
+ sglang/srt/server_args.py,sha256=9sosvHumMtf5L6jKnFNQ0_MMIg3BkaRCPmnGY2niQps,29472
31
+ sglang/srt/utils.py,sha256=WtUZafw6WjAbjtRn_rTW5i2HgYJ65rrtZGpob3ngeuA,26016
32
32
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
33
33
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
34
- sglang/srt/configs/model_config.py,sha256=bxG-vD8ZmXbypKW6Hvz8AS7rcwjTxt0TzG7p59m3t2E,9387
34
+ sglang/srt/configs/model_config.py,sha256=mBXeDfFUijQnxd38gVGJ6QxgsiitDklfHvbjYBJFKQY,9470
35
35
  sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
36
- sglang/srt/constrained/__init__.py,sha256=VXEY9K8HrEBv6QHe3X7J5ingiDugSF9_cpEbEcpBId4,2466
37
- sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
38
- sglang/srt/constrained/bnf_cache.py,sha256=c8msJ57Gj7aMy1ccTRERLgVuovEeDJx-wVPOhYF0w9k,2057
39
- sglang/srt/constrained/fsm_cache.py,sha256=CWwtOHTU3sHmw71OhWxl05YgU7cNNVWohlUt71rG230,3536
40
- sglang/srt/constrained/grammar.py,sha256=kvfyP2E53xo8jVWVZ_qHlJn0U4Qi2WaNi2yMZPKgI_0,6952
41
- sglang/srt/constrained/jump_forward.py,sha256=o-CzJu3DEs0eFKlLzsQVYMSo4vBKpffs25sXLOJd6jc,6997
36
+ sglang/srt/constrained/__init__.py,sha256=LHj0-NxDQ7S_N3Pc1gJ-FmIJVN_PTP9ytitWOICSMHk,691
37
+ sglang/srt/constrained/base_grammar_backend.py,sha256=jRLKExPzMiM6GjryunJNEVrRMmHV-aJ21VhtB9c6bDw,2194
38
+ sglang/srt/constrained/outlines_backend.py,sha256=mrubHYHdalbsgHgeu9Ct5OFUd7RnMok5jLXjdKHv-PE,5857
39
+ sglang/srt/constrained/outlines_jump_forward.py,sha256=1fnYxlrc24xjcW3Wx59Hyg0L9hiHIVgMVUsld3UDfW4,6102
40
+ sglang/srt/constrained/xgrammar_backend.py,sha256=ZvEDDI_huTn2OjOfQQhqfxJU2w4R1tR1v7PwV98A0u4,3640
42
41
  sglang/srt/layers/activation.py,sha256=7VEkCrx2dvl629Lz0fkJcJfVoZA-ykEdkpTzKEc_drQ,5225
43
42
  sglang/srt/layers/layernorm.py,sha256=HCj8Y_X6MNNdtQU2sWKgyjIqVERxl9dqrmjbBbyJjpE,3796
44
43
  sglang/srt/layers/linear.py,sha256=EOdlpAf6srqxzvPpxcv10KFJKedNc22CGP1qEvpRbDg,46131
@@ -48,32 +47,32 @@ sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8Do
48
47
  sglang/srt/layers/rotary_embedding.py,sha256=gfRKBB8FmsQKiDH0Crh_KRIGRUuvEgazH1p_n9D_m7E,3889
49
48
  sglang/srt/layers/sampler.py,sha256=3zfth1Kz24X4sUq7Z_cjZwHgPVivI-rgPtIeUbsiiWU,4589
50
49
  sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
51
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=8Tx0WUNibDoNkGruGzRIkvp6t7D54e-nchdezeQ5Nzk,22302
50
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
52
51
  sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
53
52
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
54
53
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=843CbZsRfzWp5FTusNXXL1o4N3jd0hoCNpsoUR6Qjxk,23306
55
54
  sglang/srt/layers/attention/triton_backend.py,sha256=DKUEzxQE8iBvJPNHmQwP1pyx2wXmSsLqzBhLjJznIUk,6482
56
- sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=7cDNPMMkz7--ebNKUeSaLY_6hBbvr_NqDodYFtW9ahA,18433
55
+ sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=Xbp2cQFYddenlReAqThN_EV7TmbSj5K3Cv5QTR5Ueqo,18787
57
56
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
58
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=nEG7iBh1pAy3WaqPdLZwCJwDgyk5HLQ181kBS2nxbwg,11179
57
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=JKiDqyndNiLF8qUrG_rcdiyZvczXthO6WuSYTqd3fAo,11359
59
58
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=LnuWqGAba03e25adxS_lFgjTV6nBWsVBUGUvrl-8alQ,5993
60
59
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
61
- sglang/srt/layers/fused_moe/fused_moe.py,sha256=uRmDUleTaJKBbsTfum6RgHifUbgi6yKuB2dw_mIhw3M,23250
60
+ sglang/srt/layers/fused_moe/fused_moe.py,sha256=N15tWTm2SGuesJxDIJAdV5FsDUpE-15sb_AIgr4swlw,23656
62
61
  sglang/srt/layers/fused_moe/layer.py,sha256=tbHnUJs3uvdDsl3VnwtyGA31VtFouNTPD7h7fPSCYOc,23613
63
62
  sglang/srt/layers/fused_moe/patch.py,sha256=B9cDtHqHfnWE0QqZAffvUi6cVRKcMBMKDGJWGIaKh3U,3898
64
63
  sglang/srt/layers/quantization/__init__.py,sha256=QilMNqgu3eOFUkEjXLSDa1NvoNdi_CAvC8a1hprOgN8,2979
65
- sglang/srt/layers/quantization/base_config.py,sha256=fx-FeA1a4jg7HDoYvIKC5G_wLcfeOOyIJQ6MtCaHpZ4,4664
64
+ sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
66
65
  sglang/srt/lora/lora.py,sha256=meRL7oBUx8mxV_isc3Lp0EIsFQWC2PvaN-fE78BmMwg,14970
67
66
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
68
67
  sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
69
68
  sglang/srt/managers/data_parallel_controller.py,sha256=_XB6Ianc8TiqwLTW-7DH6gGjVYBeBU_6WjjaDk0snIY,5686
70
69
  sglang/srt/managers/detokenizer_manager.py,sha256=pBCcK-wKgPk4Ty-vQFSGovEZEE_yKK1f7YVDW8vDcYw,7962
71
70
  sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
72
- sglang/srt/managers/io_struct.py,sha256=23-eJQrpMw7OJ0LiDvBVKpI36rdyxJluFlHJ7wXjKqw,12261
73
- sglang/srt/managers/schedule_batch.py,sha256=LIkxGNZC_PWIX7-BJGLRpzgNIGH-1ZxL9RUZE-dgo70,39653
71
+ sglang/srt/managers/io_struct.py,sha256=O_oHnikwmOexNqH4HP6bwAI5d_jG_C96JGapkLg8B7c,12289
72
+ sglang/srt/managers/schedule_batch.py,sha256=4BgocYdKFTDCrrBkSXCT75EALBx-3RYnoN3SgtdsHlU,39595
74
73
  sglang/srt/managers/schedule_policy.py,sha256=LH0rh1PiI5LK-dSd3dar8_po6FidiBUuj0Xcp_yNQAA,12295
75
- sglang/srt/managers/scheduler.py,sha256=p72s46nNnUl5YTKfgwRNmcc8NZbBSGudYuqOP2bZsyc,45524
76
- sglang/srt/managers/tokenizer_manager.py,sha256=fGVMxJb-UQPokqdlbphWHSVnLyKWAY8JK7fHe6iVa2I,21793
74
+ sglang/srt/managers/scheduler.py,sha256=6vqsrZu2roxzXJpNeFQRbDvERTxqbDmbvrGDp1E7FRA,47926
75
+ sglang/srt/managers/tokenizer_manager.py,sha256=n_XCsCOwLZWCLv1ZJLGjyKgrAWCAQDyEhjnkxOptSa8,24436
77
76
  sglang/srt/managers/tp_worker.py,sha256=S5oim5xrkg1j68hYq6LfC8T533JYmQX9Kabt6U8ZXn4,5726
78
77
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=j5J4yHyR7w2HgAbN7S__299ADvsoyap5HK63SWMNavQ,7546
79
78
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
@@ -81,9 +80,11 @@ sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbD
81
80
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
82
81
  sglang/srt/mem_cache/memory_pool.py,sha256=41fjuj_sD0yfJq-sy-X99cc2djBa6w4dy2y47V0WqNU,10934
83
82
  sglang/srt/mem_cache/radix_cache.py,sha256=DzLCO_gYQ7X_C2NJSEHzzMZhb5HzWjKF9wXJQsnzr8M,10427
84
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=zRxXxV54b4SUXk9BQ1zPAS2VXCBRBvT15A64Yf0kBSE,12909
85
- sglang/srt/model_executor/forward_batch_info.py,sha256=1GM6A-tqTDD0MEMQx93PC7XahABr0vlv7JBXohaehkc,9272
86
- sglang/srt/model_executor/model_runner.py,sha256=Zs-u9sJREJD-1omhaFjBYfgR_2_7Cj0O5mGgQ0NtR8s,26793
83
+ sglang/srt/metrics/collector.py,sha256=9kidVhr4ldbSntAYfzwJt_2CTUFnnej0OoQdxUUwUWA,6767
84
+ sglang/srt/metrics/func_timer.py,sha256=xe9UT4bPP1mA4GRZLsCd708cmv1B00hMpUmF7hzAKB4,3344
85
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZMkyfZpWgDXfBpJ4cenh1TxXtt1O2xqeiXhDkq6E5pU,12936
86
+ sglang/srt/model_executor/forward_batch_info.py,sha256=61TVExbiXDQRvZ6oevNz9AIxG7e-KVddgj4I6MTivLg,9426
87
+ sglang/srt/model_executor/model_runner.py,sha256=AYMLc5Rd32ZyWnI6rERPuIASv6D-uA3ztoj9bh0VpcM,26800
87
88
  sglang/srt/models/baichuan.py,sha256=RyvPQvi7wy9VUGvLwG17XttcTp43yRj6c3zNRImBToA,15005
88
89
  sglang/srt/models/chatglm.py,sha256=9hCXTqGX8DMvSPSn6wlK0YNNRWGS4UiS4-xjFsO9hYU,13135
89
90
  sglang/srt/models/commandr.py,sha256=leoQNn4VRqa9SXos6DcrkHVG6-Xp-kjBn2PUgqc9bs8,14051
@@ -93,14 +94,16 @@ sglang/srt/models/deepseek_v2.py,sha256=z6532MRN1tBltFNteFJfimnaGpyNmK6g_sdNmTzs
93
94
  sglang/srt/models/exaone.py,sha256=YMyH4zxyCaCB432vCcom800efPI19_vIQ3OXLkLiXxk,12984
94
95
  sglang/srt/models/gemma.py,sha256=D_zjG312BeOPeplGzo5Z8tSMH9xL7wZ4KIgczZ9yJ0E,12193
95
96
  sglang/srt/models/gemma2.py,sha256=iE56CYzPn-QCis4kcU7Yi0jvJ04KeU2deuZH2DaS2lM,14768
96
- sglang/srt/models/gpt2.py,sha256=xWqU66KO6rNBnzA6uOBrlLWnwVzLKuC4UWHc5WuEHw8,10151
97
+ sglang/srt/models/gemma2_reward.py,sha256=zN3QYoKfMLmZlHJGVyak_kdI867rzjodYDg1SWhdW_s,2461
98
+ sglang/srt/models/gpt2.py,sha256=Th7_Dnkw82GFBOuMOTrHtA44JBPHRUtY3Qd73rQwzMc,9741
97
99
  sglang/srt/models/gpt_bigcode.py,sha256=f6vvxBFPhV6GIZrOEKjJPu41TyVYw5Knq4h9WDvyEeY,10040
98
100
  sglang/srt/models/grok.py,sha256=iSkvt7whYyMndUHBekM4vKHaDXnnmeJMErkklGpz624,14826
99
101
  sglang/srt/models/internlm2.py,sha256=HOVOXz3b7eLF2wpG_FEK5PYnYOEpHPGJ0pufvL7HPD0,12099
100
- sglang/srt/models/llama.py,sha256=X_LKJ02ofDfpgVVFexf_C6g4FikadfMikhuRVAuLN5I,16094
102
+ sglang/srt/models/internlm2_reward.py,sha256=dtT1vupWv6dXk17XYYdsmsR027GiP_WOxtMvwRC7Y84,2330
103
+ sglang/srt/models/llama.py,sha256=mIKyEHySlaCSOAAHA3x1DSnFHvlOzar7CYs2sQYZfdg,16286
101
104
  sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
102
105
  sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
103
- sglang/srt/models/llama_reward.py,sha256=48J6PmZJRFRv-6mEF6y5fxNKtRRZVQzvJqg3XaWDWa0,5448
106
+ sglang/srt/models/llama_reward.py,sha256=d-j00wj-_8mh2s2HJicTilNn8GWpcmxQVfmAhEJ1n7k,4524
104
107
  sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
105
108
  sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
106
109
  sglang/srt/models/minicpm.py,sha256=hAzgBImQ1xDeRdaQt5hKcLl1h1T-1QFSerG2MOlLjt8,13722
@@ -114,16 +117,16 @@ sglang/srt/models/olmoe.py,sha256=fEWr-RmW6l6fVA8jM9KX8bumUWLNQQG8VxGpajlkhUs,15
114
117
  sglang/srt/models/qwen.py,sha256=vQoq8Bv8A2zc-LE1i-E97A8i4ydtfxb2yt2JG6Tp9PQ,9851
115
118
  sglang/srt/models/qwen2.py,sha256=Y1f_PxZMTkSLgENbKl96VfNGBfvcU4cljpVe1a3vzVg,12328
116
119
  sglang/srt/models/qwen2_moe.py,sha256=RRuHLN1fIYFS4du4pUPNzGL-Rt2wLrjlgDfXiczZQ5c,16975
117
- sglang/srt/models/qwen2_vl.py,sha256=scKzs-KTI64CRRcBNWQniXURLO3WiJEzx-MsisH1Als,26093
120
+ sglang/srt/models/qwen2_vl.py,sha256=jb0RYMo0ShPIt4NtPCEcFGciZKstM-gYwVKND_LK7Ls,26052
118
121
  sglang/srt/models/stablelm.py,sha256=rIQOv9OS_Vb2nOT_AMx0yGG2onwmCbbxvXL_SPdZX7k,11256
119
122
  sglang/srt/models/torch_native_llama.py,sha256=d8gVNurlVVZ-tD3Uc_aHyGCVUUp1gR8awOH4fLRZHDE,19145
120
123
  sglang/srt/models/xverse.py,sha256=meyCCdrZRYNK70hnmydgwhHa1FTBhKekEdpG0_IGTWY,13564
121
124
  sglang/srt/models/xverse_moe.py,sha256=xlrhJBAlRzxhp5o0WQU_2V5Uvf8I9fwZLOZBh95o3to,15673
122
125
  sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
123
- sglang/srt/openai_api/adapter.py,sha256=nZOVjZ-q4eULl19oT97_u7z63SQiWW7IzbYzJeWE7os,53069
126
+ sglang/srt/openai_api/adapter.py,sha256=TFRafrvLvxGx93AZ8OByVwW7Y3ozBdAXg6gX5KU6hK8,53238
124
127
  sglang/srt/openai_api/protocol.py,sha256=EZ6G209rBEDP7cepO2kAYqE8wMe1ksYdN7to1iT97Lw,10248
125
- sglang/srt/sampling/sampling_batch_info.py,sha256=qrijXoMhF-V_x3g6lumsfXgsGaPiKLIJ2pUz6ii-O2s,7735
126
- sglang/srt/sampling/sampling_params.py,sha256=u1UWt9biIFXKymAg56RbkMa8oe5jxsoMvsv3cH7_kZ0,5692
128
+ sglang/srt/sampling/sampling_batch_info.py,sha256=7uoHypbbp4o71DfPmF22R_LeyM_Q9BTxBFg8O4lkd9w,7648
129
+ sglang/srt/sampling/sampling_params.py,sha256=O8w5yTLP1dwuCdb8kMBBhMSdMWvWxSv3fz2Eq07Tm88,5192
127
130
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
128
131
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
129
132
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
@@ -134,19 +137,19 @@ sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8
134
137
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
135
138
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
136
139
  sglang/test/runners.py,sha256=JxfsGEW9L3cz87fHYmWqb3Vnbk6K1csLLLftR3LogxU,14297
137
- sglang/test/simple_eval_common.py,sha256=r0G-9QLycs2ax3RMc44T_61fzMxlpTzv6pececC7lyY,12379
140
+ sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
138
141
  sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
139
- sglang/test/simple_eval_humaneval.py,sha256=7lTi841NT58smNOtRwCedrdX9IWWypdLkOtaQOBy-GI,5687
142
+ sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
140
143
  sglang/test/simple_eval_math.py,sha256=6kGKNwNbLN-Af3Wj8WTimWhH-Xp3enDmSvvSjsgWUpk,2550
141
- sglang/test/simple_eval_mgsm.py,sha256=wfbqJW9Rkc66vzq2fEMF6jchmoA8mw1OUiGU55cZ2B0,10261
144
+ sglang/test/simple_eval_mgsm.py,sha256=rd7TSUyxdKbrXaVoewo24V8lCo_6kO8zxPhhmvylpw8,10259
142
145
  sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9XI,4357
143
146
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
144
147
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
145
148
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
146
- sglang/test/test_utils.py,sha256=zspkM9VSm6QXI4wVG-75r8ttGgylnPOEH7nuYjp5plU,22799
149
+ sglang/test/test_utils.py,sha256=lgLPp27xQ1NfSdeJ1YUZeOer8I6G8UDce7YPyG637gY,23054
147
150
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=q98pQDikkmvvvvAG-AXMYaYte1iHHW2TFhKGtAeGvdE,12802
148
- sglang-0.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
149
- sglang-0.3.5.dist-info/METADATA,sha256=FQ8MBpLt6W0-43VhtuwEWgqomXaFwUumiBd6T8xPWG0,21099
150
- sglang-0.3.5.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
151
- sglang-0.3.5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
152
- sglang-0.3.5.dist-info/RECORD,,
151
+ sglang-0.3.5.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
152
+ sglang-0.3.5.post1.dist-info/METADATA,sha256=bTPgfYz1f3ZJPNiIxNPLOoTIGKACad-XLIZ8DOlszu0,21561
153
+ sglang-0.3.5.post1.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
154
+ sglang-0.3.5.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
155
+ sglang-0.3.5.post1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,65 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
14
- """
15
-
16
- """Base tool cache for constrained decoding tools."""
17
-
18
- import time
19
-
20
-
21
- class BaseToolCache:
22
- def __init__(self, enable=True):
23
- self.enable = enable
24
- self.reset()
25
-
26
- def reset(self):
27
- self.cache = {}
28
- self.metrics = {"total": 0, "hit": 0, "avg_init_time": 0}
29
-
30
- def query(self, key):
31
- def _init_with_timer(key):
32
- start = time.monotonic()
33
- val = self.init_value(key)
34
- init_time = time.monotonic() - start
35
- curr_total = self.metrics["total"]
36
- new_total = curr_total + 1
37
-
38
- # Update average init time without old_avg * old_total to avoid overflow.
39
- self.metrics["avg_init_time"] = (init_time / new_total) + (
40
- curr_total / new_total
41
- ) * self.metrics["avg_init_time"]
42
- return val
43
-
44
- if key in self.cache:
45
- self.metrics["hit"] += 1
46
- val = self.cache[key]
47
- else:
48
- # Cache miss or disabled.
49
- val = _init_with_timer(key)
50
-
51
- if self.enable:
52
- self.metrics["total"] += 1
53
- self.cache[key] = val
54
- return val
55
-
56
- def init_value(self, key):
57
- raise NotImplementedError()
58
-
59
- def get_cache_hit_rate(self):
60
- if self.metrics["total"] == 0:
61
- return 0
62
- return self.metrics["hit"] / self.metrics["total"]
63
-
64
- def get_avg_init_time(self):
65
- return self.metrics["avg_init_time"]
@@ -1,61 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
- http://www.apache.org/licenses/LICENSE-2.0
7
- Unless required by applicable law or agreed to in writing, software
8
- distributed under the License is distributed on an "AS IS" BASIS,
9
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
- See the License for the specific language governing permissions and
11
- limitations under the License.
12
- """
13
-
14
- """Cache for the compressed finite state machine."""
15
-
16
- from typing import Tuple
17
-
18
- from transformers import AutoTokenizer
19
-
20
- from sglang.srt.constrained import (
21
- GrammarMatcher,
22
- GrammarMatcherInitContext,
23
- GrammarMatcherInitContextCache,
24
- )
25
-
26
- MAX_ROLLBACK_TOKENS = 10
27
-
28
-
29
- class BNFCache:
30
- grammar_cache: GrammarMatcherInitContextCache
31
-
32
- def __init__(
33
- self,
34
- tokenizer_path,
35
- tokenizer_args_dict,
36
- skip_tokenizer_init=False,
37
- whitespace_patterns=None,
38
- ):
39
- # TODO(dark): how to deal with whitespace_patterns and skip_tokenizer_init
40
- if skip_tokenizer_init:
41
- return
42
-
43
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, **tokenizer_args_dict)
44
- self.grammar_cache = GrammarMatcherInitContextCache(
45
- tokenizer_or_vocab=tokenizer
46
- )
47
-
48
- def get_context(self, key: Tuple[str, str]) -> GrammarMatcherInitContext:
49
- key_type, key_string = key
50
- if key_type == "json":
51
- return self.grammar_cache.get_init_context_for_json_schema(key_string)
52
- elif key_type == "regex":
53
- raise ValueError(f"regex hasn't been supported by xgrammar yet")
54
- else:
55
- raise ValueError(f"Invalid key_type: {key_type}")
56
-
57
- def query(self, key: Tuple[str, str], vocab_size: int) -> GrammarMatcher:
58
- ctx = self.get_context(key)
59
- return GrammarMatcher(
60
- ctx, max_rollback_tokens=MAX_ROLLBACK_TOKENS, mask_vocab_size=vocab_size
61
- )
@@ -1,95 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
14
- """
15
-
16
- """Cache for the compressed finite state machine."""
17
- import logging
18
-
19
- from interegular import InvalidSyntax, parse_pattern
20
- from outlines.fsm.json_schema import build_regex_from_schema
21
- from transformers import AutoTokenizer
22
-
23
- from sglang.srt.constrained import RegexGuide, TransformerTokenizer
24
- from sglang.srt.constrained.base_tool_cache import BaseToolCache
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- class FSMCache(BaseToolCache):
30
- def __init__(
31
- self,
32
- tokenizer_path,
33
- tokenizer_args_dict,
34
- enable=True,
35
- skip_tokenizer_init=False,
36
- constrained_json_whitespace_pattern=None,
37
- ):
38
- super().__init__(enable=enable)
39
-
40
- if (
41
- skip_tokenizer_init
42
- or tokenizer_path.endswith(".json")
43
- or tokenizer_path.endswith(".model")
44
- ):
45
- # Do not support TiktokenTokenizer or SentencePieceTokenizer
46
- return
47
-
48
- tokenizer_args_dict.setdefault("padding_side", "left")
49
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, **tokenizer_args_dict)
50
- try:
51
- self.outlines_tokenizer = TransformerTokenizer(tokenizer)
52
- except AttributeError:
53
- # FIXME: tmp fix for chatglm2 & chatglm3 (pad_token_id=0)
54
- origin_pad_token_id = tokenizer.pad_token_id
55
-
56
- def fset(self, value):
57
- self._value = value
58
-
59
- type(tokenizer).pad_token_id = property(
60
- fget=type(tokenizer).pad_token_id.fget, fset=fset
61
- )
62
- self.outlines_tokenizer = TransformerTokenizer(tokenizer)
63
- self.outlines_tokenizer.tokenizer.pad_token_id = origin_pad_token_id
64
- self.outlines_tokenizer.pad_token_id = origin_pad_token_id
65
- self.outlines_tokenizer.pad_token = (
66
- self.outlines_tokenizer.tokenizer.pad_token
67
- )
68
- self.outlines_tokenizer.vocabulary = (
69
- self.outlines_tokenizer.tokenizer.get_vocab()
70
- )
71
- self.constrained_json_whitespace_pattern = constrained_json_whitespace_pattern
72
-
73
- def init_value(self, key):
74
- key_type, key_string = key
75
- if key_type == "json":
76
- try:
77
- regex = build_regex_from_schema(
78
- key_string,
79
- whitespace_pattern=self.constrained_json_whitespace_pattern,
80
- )
81
- except NotImplementedError as e:
82
- logger.warning(
83
- f"skip invalid json schema: json_schema={key_string}, {e=}"
84
- )
85
- return None, key_string
86
- elif key_type == "regex":
87
- regex = key_string
88
- else:
89
- raise ValueError(f"Invalid key_type: {key_type}")
90
- try:
91
- parse_pattern(regex)
92
- except InvalidSyntax as e:
93
- logger.warning(f"skip invalid regex guide: {regex=}, {e=}")
94
- return None, regex
95
- return RegexGuide(regex, self.outlines_tokenizer), regex
@@ -1,190 +0,0 @@
1
- """
2
- Copyright 2023-2024 SGLang Team
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
- http://www.apache.org/licenses/LICENSE-2.0
7
- Unless required by applicable law or agreed to in writing, software
8
- distributed under the License is distributed on an "AS IS" BASIS,
9
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
- See the License for the specific language governing permissions and
11
- limitations under the License.
12
- """
13
-
14
- """Cache for the compressed finite state machine."""
15
- import logging
16
- from typing import List, Optional, Tuple, Union
17
-
18
- import torch
19
-
20
- from sglang.srt.constrained import GrammarMatcher, RegexGuide
21
- from sglang.srt.constrained.bnf_cache import BNFCache
22
- from sglang.srt.constrained.fsm_cache import FSMCache
23
- from sglang.srt.constrained.jump_forward import JumpForwardCache, JumpForwardMap
24
-
25
- # from sglang.srt.managers.schedule_batch import Req
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
- INIT_INCREMENTAL_DETOKENIZATION_OFFSET = 5
30
-
31
-
32
- class XGrammarJump:
33
- pass
34
-
35
-
36
- class JumpHelper:
37
- data: Union[List, str]
38
- state: int
39
- suffix_ids: List[int]
40
-
41
- def __init__(
42
- self, data: Union[List, str] = "", state: int = -1, suffix_ids=[]
43
- ) -> None:
44
- self.data = data
45
- self.state = state
46
- self.suffix_ids = suffix_ids
47
-
48
- def can_jump(self):
49
- return len(self.data) > 0
50
-
51
-
52
- class Grammar:
53
- grammar: Union[GrammarMatcher, Tuple[RegexGuide, int]]
54
- jump_map: Union[XGrammarJump, JumpForwardMap, None]
55
-
56
- def __init__(
57
- self,
58
- grammar: Union[GrammarMatcher, Tuple[RegexGuide, int]],
59
- jump_map: Union[XGrammarJump, JumpForwardMap, None],
60
- ) -> None:
61
- self.grammar = grammar
62
- self.jump_map = jump_map
63
-
64
- def accept_token(self, token: int):
65
- if isinstance(self.grammar, GrammarMatcher):
66
- assert self.grammar.accept_token(token)
67
- else:
68
- guide, state = self.grammar
69
- self.grammar = guide, guide.get_next_state(state, token)
70
-
71
- def try_jump(self, tokenizer) -> JumpHelper:
72
- if isinstance(self.jump_map, XGrammarJump):
73
- assert isinstance(self.grammar, GrammarMatcher)
74
- return JumpHelper(self.grammar.find_jump_forward_string())
75
- elif isinstance(self.jump_map, JumpForwardMap):
76
- assert isinstance(self.grammar, Tuple)
77
-
78
- _, state = self.grammar
79
- jump_forward_bytes = self.jump_map.jump_forward_byte(state)
80
- if jump_forward_bytes is None or len(jump_forward_bytes) == 0:
81
- return JumpHelper() # can't jump
82
-
83
- # preprocess the jump forward string
84
- suffix_bytes = []
85
- continuation_range = range(0x80, 0xC0)
86
- cur_state = state
87
- while (
88
- len(jump_forward_bytes)
89
- and jump_forward_bytes[0][0] in continuation_range
90
- ):
91
- # continuation bytes
92
- byte_edge = jump_forward_bytes.pop(0)
93
- suffix_bytes.append(byte_edge[0])
94
- cur_state = byte_edge[1]
95
-
96
- suffix_tokens = [f"<0x{hex(b)[2:].upper()}>" for b in suffix_bytes]
97
- suffix_ids = tokenizer.convert_tokens_to_ids(suffix_tokens)
98
- return JumpHelper(suffix_ids, cur_state, suffix_bytes)
99
- else:
100
- return JumpHelper() # can't jump
101
-
102
- def jump_forward_str_state(self, helper: JumpHelper) -> Tuple[str, int]:
103
- if isinstance(helper.data, str):
104
- return helper.data, -1
105
- else:
106
- assert isinstance(self.jump_map, JumpForwardMap)
107
- return self.jump_map.jump_forward_symbol(helper.state)
108
-
109
- def jump_and_retokenize(
110
- self, old_output_ids: List[int], new_output_ids: List[int], next_state: int
111
- ):
112
- if isinstance(self.grammar, GrammarMatcher):
113
- k = 0
114
- for i, old_id in enumerate(old_output_ids):
115
- if old_id == new_output_ids[i]:
116
- k = i + 1
117
- else:
118
- break
119
-
120
- # rollback to the last token that is the same
121
- if k < len(old_output_ids):
122
- self.grammar.rollback(len(old_output_ids) - k)
123
-
124
- for i in range(k, len(new_output_ids)):
125
- assert self.grammar.accept_token(new_output_ids[i])
126
- else:
127
- self.grammar = self.grammar[0], next_state
128
-
129
- def fill_vocab_mask(self, vocab_mask: torch.Tensor, vocab_size: int):
130
- if isinstance(self.grammar, GrammarMatcher):
131
- # Note that this bitmask is a bitset, not bool
132
- bitmask = self.grammar.find_next_token_bitmask()
133
- # Mask the tokens that are not allowed
134
- vocab_mask[
135
- self.grammar.get_rejected_tokens_from_bitmask(bitmask, vocab_size)
136
- ] = 1
137
- else:
138
- guide, state = self.grammar
139
- vocab_mask.fill_(1)
140
- vocab_mask[guide.get_next_instruction(state).tokens] = 0
141
-
142
-
143
- class GrammarCache:
144
- grammar_cache: Union[BNFCache, FSMCache]
145
- jump_cache: Union[XGrammarJump, JumpForwardCache, None]
146
-
147
- def __init__(
148
- self,
149
- tokenizer_path,
150
- tokenizer_args_dict,
151
- skip_tokenizer_init=False,
152
- whitespace_patterns=None,
153
- backend=None,
154
- allow_jump=False,
155
- ):
156
- if backend == "xgrammar":
157
- self.grammar_cache = BNFCache(
158
- tokenizer_path=tokenizer_path,
159
- tokenizer_args_dict=tokenizer_args_dict,
160
- skip_tokenizer_init=skip_tokenizer_init,
161
- whitespace_patterns=whitespace_patterns,
162
- )
163
- self.jump_cache = XGrammarJump() if allow_jump else None
164
- else:
165
- assert backend == "outlines"
166
- self.grammar_cache = FSMCache(
167
- tokenizer_path=tokenizer_path,
168
- tokenizer_args_dict=tokenizer_args_dict,
169
- skip_tokenizer_init=skip_tokenizer_init,
170
- constrained_json_whitespace_pattern=whitespace_patterns,
171
- enable=True,
172
- )
173
- self.jump_cache = JumpForwardCache() if allow_jump else None
174
-
175
- def query(self, key: Tuple[str, str], vocab_size: int) -> Grammar:
176
- if isinstance(self.grammar_cache, BNFCache):
177
- assert not isinstance(self.jump_cache, JumpForwardCache)
178
- return Grammar(self.grammar_cache.query(key, vocab_size), self.jump_cache)
179
- else:
180
- jump_map = None
181
- guide, regex = self.grammar_cache.query(key)
182
- if isinstance(self.jump_cache, JumpForwardCache):
183
- jump_map = self.jump_cache.query(regex)
184
- return Grammar((guide, 0), jump_map)
185
-
186
- def reset(self):
187
- if isinstance(self.grammar_cache, FSMCache):
188
- self.grammar_cache.reset()
189
- if isinstance(self.jump_cache, JumpForwardCache):
190
- self.jump_cache.reset()