PyPI - sglang - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.5.post1__py3-none-any.whl - Mend

sglang 0.3.5py3-none-any.whl → 0.3.5.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

sglang/bench_serving.py +113 -3
sglang/srt/configs/model_config.py +5 -2
sglang/srt/constrained/__init__.py +2 -66
sglang/srt/constrained/base_grammar_backend.py +72 -0
sglang/srt/constrained/outlines_backend.py +165 -0
sglang/srt/constrained/outlines_jump_forward.py +182 -0
sglang/srt/constrained/xgrammar_backend.py +114 -0
sglang/srt/layers/attention/triton_ops/decode_attention.py +7 -0
sglang/srt/layers/attention/triton_ops/extend_attention.py +6 -0
sglang/srt/layers/fused_moe/fused_moe.py +23 -7
sglang/srt/layers/quantization/base_config.py +4 -6
sglang/srt/layers/vocab_parallel_embedding.py +216 -150
sglang/srt/managers/io_struct.py +5 -3
sglang/srt/managers/schedule_batch.py +14 -20
sglang/srt/managers/scheduler.py +153 -94
sglang/srt/managers/tokenizer_manager.py +81 -17
sglang/srt/metrics/collector.py +211 -0
sglang/srt/metrics/func_timer.py +108 -0
sglang/srt/mm_utils.py +1 -1
sglang/srt/model_executor/cuda_graph_runner.py +2 -2
sglang/srt/model_executor/forward_batch_info.py +7 -3
sglang/srt/model_executor/model_runner.py +2 -1
sglang/srt/models/gemma2_reward.py +69 -0
sglang/srt/models/gpt2.py +31 -37
sglang/srt/models/internlm2_reward.py +62 -0
sglang/srt/models/llama.py +11 -6
sglang/srt/models/llama_reward.py +5 -26
sglang/srt/models/qwen2_vl.py +5 -7
sglang/srt/openai_api/adapter.py +6 -2
sglang/srt/sampling/sampling_batch_info.py +2 -3
sglang/srt/sampling/sampling_params.py +0 -14
sglang/srt/server.py +58 -16
sglang/srt/server_args.py +42 -22
sglang/srt/utils.py +87 -0
sglang/test/simple_eval_common.py +1 -1
sglang/test/simple_eval_humaneval.py +2 -2
sglang/test/simple_eval_mgsm.py +2 -2
sglang/test/test_utils.py +18 -4
sglang/utils.py +1 -0
sglang/version.py +1 -1
{sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/METADATA +11 -7
{sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/RECORD +45 -42
{sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/WHEEL +1 -1
sglang/srt/constrained/base_tool_cache.py +0 -65
sglang/srt/constrained/bnf_cache.py +0 -61
sglang/srt/constrained/fsm_cache.py +0 -95
sglang/srt/constrained/grammar.py +0 -190
sglang/srt/constrained/jump_forward.py +0 -203
{sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/LICENSE +0 -0
{sglang-0.3.5.dist-info → sglang-0.3.5.post1.dist-info}/top_level.txt +0 -0

sglang/bench_serving.py CHANGED Viewed

@@ -596,12 +596,20 @@ def sample_random_requests(
         # Filter out sequences that are too long or too short
         input_requests: List[Tuple[str, int, int]] = []
-        for i in range(num_prompts):
+        for data in dataset:
+            i = len(input_requests)
+            if i == num_prompts:
+                break
             # Tokenize the prompts and completions.
-            prompt = dataset[i][0]
+            prompt = data[0]
             prompt_token_ids = tokenizer.encode(prompt)
             prompt_len = len(prompt_token_ids)
+            # Skip empty prompt
+            if prompt_len == 0:
+                continue
             if prompt_len > input_lens[i]:
                 input_ids = prompt_token_ids[: input_lens[i]]
             else:
@@ -627,6 +635,66 @@ def sample_random_requests(
     return input_requests
+def gen_prompt(tokenizer, token_num):
+    """Generate a random prompt of specified token length using tokenizer vocabulary."""
+    all_available_tokens = list(tokenizer.get_vocab().values())
+    selected_tokens = random.choices(all_available_tokens, k=token_num)
+    return tokenizer.decode(selected_tokens)
+def sample_generated_shared_prefix_requests(
+    num_groups: int,
+    prompts_per_group: int,
+    system_prompt_len: int,
+    question_len: int,
+    output_len: int,
+    tokenizer: PreTrainedTokenizerBase,
+) -> List[Tuple[str, int, int]]:
+    """Generate benchmark requests with shared system prompts using random tokens."""
+    # Generate system prompts for each group
+    system_prompts = []
+    for _ in range(num_groups):
+        system_prompt = gen_prompt(tokenizer, system_prompt_len)
+        system_prompts.append(system_prompt)
+    # Generate questions
+    questions = []
+    for _ in range(num_groups * prompts_per_group):
+        question = gen_prompt(tokenizer, question_len)
+        questions.append(question)
+    # Combine system prompts with questions
+    input_requests = []
+    total_input_tokens = 0
+    total_output_tokens = 0
+    for group_idx in range(num_groups):
+        system_prompt = system_prompts[group_idx]
+        for prompt_idx in range(prompts_per_group):
+            question = questions[group_idx * prompts_per_group + prompt_idx]
+            full_prompt = f"{system_prompt}\n\n{question}"
+            prompt_len = len(tokenizer.encode(full_prompt))
+            input_requests.append((full_prompt, prompt_len, output_len))
+            total_input_tokens += prompt_len
+            total_output_tokens += output_len
+    print(f"\nGenerated shared prefix dataset statistics:")
+    print(f"Number of groups: {num_groups}")
+    print(f"Prompts per group: {prompts_per_group}")
+    print(f"Total prompts: {len(input_requests)}")
+    print(f"Total input tokens: {total_input_tokens}")
+    print(f"Total output tokens: {total_output_tokens}")
+    print(
+        f"Average system prompt length: {sum(len(tokenizer.encode(sp)) for sp in system_prompts) / len(system_prompts):.1f} tokens"
+    )
+    print(
+        f"Average question length: {sum(len(tokenizer.encode(q)) for q in questions) / len(questions):.1f} tokens\n"
+    )
+    return input_requests
 async def get_request(
     input_requests: List[Tuple[str, int, int]],
     request_rate: float,
@@ -1048,6 +1116,15 @@ def run_benchmark(args_: argparse.Namespace):
             tokenizer=tokenizer,
             dataset_path=args.dataset_path,
         )
+    elif args.dataset_name == "generated-shared-prefix":
+        input_requests = sample_generated_shared_prefix_requests(
+            num_groups=args.gen_num_groups,
+            prompts_per_group=args.gen_prompts_per_group,
+            system_prompt_len=args.gen_system_prompt_len,
+            question_len=args.gen_question_len,
+            output_len=args.gen_output_len,
+            tokenizer=tokenizer,
+        )
     else:
         raise ValueError(f"Unknown dataset: {args.dataset_name}")
@@ -1121,7 +1198,7 @@ if __name__ == "__main__":
         "--dataset-name",
         type=str,
         default="sharegpt",
-        choices=["sharegpt", "random"],
+        choices=["sharegpt", "random", "generated-shared-prefix"],
         help="Name of the dataset to benchmark on.",
     )
     parser.add_argument(
@@ -1208,5 +1285,38 @@ if __name__ == "__main__":
         help="Append given JSON object to the request payload. You can use this to specify"
         "additional generate params like sampling params.",
     )
+    group = parser.add_argument_group("generated-shared-prefix dataset arguments")
+    group.add_argument(
+        "--gen-num-groups",
+        type=int,
+        default=64,
+        help="Number of system prompt groups for generated-shared-prefix dataset",
+    )
+    group.add_argument(
+        "--gen-prompts-per-group",
+        type=int,
+        default=16,
+        help="Number of prompts per system prompt group for generated-shared-prefix dataset",
+    )
+    group.add_argument(
+        "--gen-system-prompt-len",
+        type=int,
+        default=2048,
+        help="Target length in tokens for system prompts in generated-shared-prefix dataset",
+    )
+    group.add_argument(
+        "--gen-question-len",
+        type=int,
+        default=128,
+        help="Target length in tokens for questions in generated-shared-prefix dataset",
+    )
+    group.add_argument(
+        "--gen-output-len",
+        type=int,
+        default=256,
+        help="Target length in tokens for outputs in generated-shared-prefix dataset",
+    )
     args = parser.parse_args()
     run_benchmark(args)

sglang/srt/configs/model_config.py CHANGED Viewed

@@ -39,7 +39,7 @@ class ModelConfig:
         revision: Optional[str] = None,
         context_length: Optional[int] = None,
         model_override_args: Optional[dict] = None,
-        is_embedding: Optional[bool] = None
+        is_embedding: Optional[bool] = None,
     ) -> None:
         # Parse args
         self.model_override_args = json.loads(model_override_args)
@@ -52,7 +52,9 @@ class ModelConfig:
         self.hf_text_config = get_hf_text_config(self.hf_config)
         # Check model type
-        self.is_generation = is_generation_model(self.hf_config.architectures, is_embedding)
+        self.is_generation = is_generation_model(
+            self.hf_config.architectures, is_embedding
+        )
         self.is_multimodal = is_multimodal_model(self.hf_config.architectures)
         self.is_encoder_decoder = is_encoder_decoder_model(self.hf_config.architectures)
@@ -208,6 +210,7 @@ def is_generation_model(model_architectures: List[str], is_embedding: bool = Fal
         or "MistralModel" in model_architectures
         or "LlamaForSequenceClassification" in model_architectures
         or "LlamaForSequenceClassificationWithNormal_Weights" in model_architectures
+        or "InternLM2ForRewardModel" in model_architectures
     ):
         return False
     else:

sglang/srt/constrained/__init__.py CHANGED Viewed

@@ -13,69 +13,5 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-"""For constrained decoding."""
-import json
-from typing import Dict, Optional, Union
-from pydantic import BaseModel
-try:
-    from outlines.caching import cache as disk_cache
-    from outlines.caching import disable_cache
-    from outlines.fsm.guide import RegexGuide
-    from outlines.fsm.regex import FSMInfo, make_byte_level_fsm, make_deterministic_fsm
-    from outlines.models.transformers import TransformerTokenizer
-except ImportError as e:
-    print(
-        f'\nError: {e}. Please install a new version of outlines by `pip install "outlines>=0.0.44"`\n'
-    )
-    raise
-try:
-    from outlines.fsm.json_schema import build_regex_from_object
-except ImportError:
-    # Since outlines 0.0.32, build_regex_from_object is replaced by build_regex_from_schema,
-    # which only accepts string schema as input.
-    from outlines.fsm.json_schema import build_regex_from_schema
-    def build_regex_from_object(
-        object: Union[str, BaseModel, Dict], whitespace_pattern: Optional[str] = None
-    ):
-        if isinstance(object, type(BaseModel)):
-            schema = json.dumps(object.model_json_schema())
-        elif isinstance(object, Dict):
-            schema = json.dumps(object)
-        else:
-            schema = object
-        return build_regex_from_schema(schema, whitespace_pattern)
-try:
-    from xgrammar import (
-        GrammarMatcher,
-        GrammarMatcherInitContext,
-        GrammarMatcherInitContextCache,
-    )
-except ImportError as e:
-    class Dummy:
-        pass
-    GrammarMatcher = Dummy
-    GrammarMatcherInitContext = Dummy
-    GrammarMatcherInitContextCache = Dummy
-__all__ = [
-    "RegexGuide",
-    "FSMInfo",
-    "make_deterministic_fsm",
-    "build_regex_from_object",
-    "TransformerTokenizer",
-    "disk_cache",
-    "disable_cache",
-    "make_byte_level_fsm",
-    "GrammarMatcher",
-    "GrammarMatcherInitContext",
-    "GrammarMatcherInitContextCache",
-]
+# TODO(lmzheng): make this an optional dependency
+from sglang.srt.constrained.outlines_backend import build_regex_from_object

sglang/srt/constrained/base_grammar_backend.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+Copyright 2023-2024 SGLang Team
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+"""The baseclass of backends for grammar-guided constrained decoding."""
+from concurrent.futures import Future, ThreadPoolExecutor
+from dataclasses import dataclass
+from threading import Event, Lock
+from typing import Any, Optional, Tuple
+@dataclass
+class CacheEntry:
+    value: Any
+    event: Event
+class BaseGrammarObject:
+    pass
+class BaseGrammarBackend:
+    def __init__(self):
+        self.executor = ThreadPoolExecutor()
+        self.cache = {}
+        self.cache_lock = Lock()
+    def init_value(self, key: Tuple[str, str]) -> BaseGrammarObject:
+        with self.cache_lock:
+            if key in self.cache:
+                cache_hit = True
+                entry = self.cache[key]
+            else:
+                cache_hit = False
+                entry = CacheEntry(None, Event())
+                self.cache[key] = entry
+        if cache_hit:
+            entry.event.wait()
+        else:
+            entry.value = self.init_value_impl(key)
+            entry.event.set()
+        return entry.value.copy()
+    def init_value_impl(self, key: Tuple[str, str]) -> BaseGrammarObject:
+        raise NotImplementedError()
+    def get_cached_value(self, key: Tuple[str, str]) -> Optional[BaseGrammarObject]:
+        with self.cache_lock:
+            entry = self.cache.get(key)
+            if not entry or not entry.event.is_set():
+                return None
+            return self.cache[key].value.copy()
+    def get_future_value(self, key: Tuple[str, str]) -> Future:
+        return self.executor.submit(self.init_value, key)
+    def reset(self):
+        with self.cache_lock:
+            self.cache.clear()

sglang/srt/constrained/outlines_backend.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+Copyright 2023-2024 SGLang Team
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+"""Constrained decoding with outlines backend."""
+import json
+import logging
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+from outlines.fsm.guide import RegexGuide
+from outlines.models.transformers import TransformerTokenizer
+from sglang.srt.constrained.base_grammar_backend import (
+    BaseGrammarBackend,
+    BaseGrammarObject,
+)
+from sglang.srt.constrained.outlines_jump_forward import OutlinesJumpForwardMap
+logger = logging.getLogger(__name__)
+try:
+    from outlines.fsm.json_schema import build_regex_from_object
+except ImportError:
+    # Since outlines 0.0.32, build_regex_from_object is replaced by build_regex_from_schema,
+    # which only accepts string schema as input.
+    from outlines.fsm.json_schema import build_regex_from_schema
+    from pydantic import BaseModel
+    def build_regex_from_object(
+        object: Union[str, BaseModel, Dict], whitespace_pattern: Optional[str] = None
+    ):
+        if isinstance(object, type(BaseModel)):
+            schema = json.dumps(object.model_json_schema())
+        elif isinstance(object, Dict):
+            schema = json.dumps(object)
+        else:
+            schema = object
+        return build_regex_from_schema(schema, whitespace_pattern)
+class OutlinesGrammar(BaseGrammarObject):
+    def __init__(
+        self,
+        guide: RegexGuide,
+        jump_forward_map: Union[OutlinesJumpForwardMap, None],
+    ) -> None:
+        self.guide = guide
+        self.jump_forward_map = jump_forward_map
+        self.state = 0
+    def accept_token(self, token: int):
+        self.state = self.guide.get_next_state(self.state, token)
+    def try_jump_forward(self, tokenizer) -> Optional[Tuple]:
+        if not self.jump_forward_map:
+            return None
+        jump_forward_bytes = self.jump_forward_map.jump_forward_byte(self.state)
+        if jump_forward_bytes is None or len(jump_forward_bytes) <= 1:
+            return None
+        # preprocess the jump forward string
+        suffix_bytes = []
+        continuation_range = range(0x80, 0xC0)
+        cur_state = self.state
+        while (
+            len(jump_forward_bytes) and jump_forward_bytes[0][0] in continuation_range
+        ):
+            # continuation bytes
+            byte_edge = jump_forward_bytes.pop(0)
+            suffix_bytes.append(byte_edge[0])
+            cur_state = byte_edge[1]
+        suffix_tokens = [f"<0x{hex(b)[2:].upper()}>" for b in suffix_bytes]
+        suffix_ids = tokenizer.convert_tokens_to_ids(suffix_tokens)
+        return suffix_ids, cur_state
+    def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tuple[str, int]:
+        _, cur_state = helper
+        return self.jump_forward_map.jump_forward_symbol(cur_state)
+    def jump_and_retokenize(
+        self, old_output_ids: List[int], new_output_ids: List[int], next_state: int
+    ):
+        self.state = next_state
+    def fill_vocab_mask(self, vocab_mask: torch.Tensor):
+        vocab_mask.fill_(1)
+        vocab_mask[self.guide.get_next_instruction(self.state).tokens] = 0
+    def copy(self):
+        return OutlinesGrammar(self.guide, self.jump_forward_map)
+class OutlinesGrammarBackend(BaseGrammarBackend):
+    def __init__(
+        self,
+        tokenizer,
+        whitespace_pattern: bool,
+        allow_jump_forward: bool,
+    ):
+        super().__init__()
+        try:
+            self.outlines_tokenizer = TransformerTokenizer(tokenizer)
+        except AttributeError:
+            # FIXME: tmp fix for chatglm2 & chatglm3 (pad_token_id=0)
+            origin_pad_token_id = tokenizer.pad_token_id
+            def fset(self, value):
+                self._value = value
+            type(tokenizer).pad_token_id = property(
+                fget=type(tokenizer).pad_token_id.fget, fset=fset
+            )
+            self.outlines_tokenizer = TransformerTokenizer(tokenizer)
+            self.outlines_tokenizer.tokenizer.pad_token_id = origin_pad_token_id
+            self.outlines_tokenizer.pad_token_id = origin_pad_token_id
+            self.outlines_tokenizer.pad_token = (
+                self.outlines_tokenizer.tokenizer.pad_token
+            )
+            self.outlines_tokenizer.vocabulary = (
+                self.outlines_tokenizer.tokenizer.get_vocab()
+            )
+        self.allow_jump_forward = allow_jump_forward
+        self.whitespace_pattern = whitespace_pattern
+    def init_value_impl(self, key: Tuple[str, str]) -> OutlinesGrammar:
+        key_type, key_string = key
+        if key_type == "json":
+            try:
+                regex = build_regex_from_object(
+                    key_string,
+                    whitespace_pattern=self.whitespace_pattern,
+                )
+            except NotImplementedError as e:
+                logger.warning(
+                    f"skip invalid json schema: json_schema={key_string}, {e=}"
+                )
+                return None, key_string
+        elif key_type == "regex":
+            regex = key_string
+        else:
+            raise ValueError(f"Invalid key_type: {key_type}")
+        guide = RegexGuide(regex, self.outlines_tokenizer)
+        if self.allow_jump_forward:
+            jump_forward_map = OutlinesJumpForwardMap(regex)
+        else:
+            jump_forward_map = None
+        return OutlinesGrammar(guide, jump_forward_map)

sglang/srt/constrained/outlines_jump_forward.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""
+Copyright 2023-2024 SGLang Team
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+"""
+Faster constrained decoding with jump forward decoding / compressed finite state machine.
+Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
+"""
+import dataclasses
+import logging
+from collections import defaultdict
+import interegular
+from interegular import InvalidSyntax
+from outlines.caching import cache as disk_cache
+from outlines.fsm.regex import FSMInfo, make_byte_level_fsm, make_deterministic_fsm
+IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass
+class JumpEdge:
+    symbol: str = None
+    symbol_next_state: int = None
+    byte: int = None
+    byte_next_state: int = None
+@disk_cache()
+def init_state_to_jump_forward(regex_string):
+    try:
+        regex_pattern = interegular.parse_pattern(regex_string)
+    except InvalidSyntax as e:
+        logger.warning(f"skip invalid regex: {regex_string}, {e=}")
+        return
+    byte_fsm = make_byte_level_fsm(regex_pattern.to_fsm().reduce(), keep_utf8=True)
+    regex_fsm, _ = make_deterministic_fsm(byte_fsm)
+    fsm_info: FSMInfo = regex_fsm.fsm_info
+    symbol_to_id = fsm_info.alphabet_symbol_mapping
+    id_to_symbol = {}
+    for symbol, id_ in symbol_to_id.items():
+        id_to_symbol.setdefault(id_, []).append(symbol)
+    transitions = fsm_info.transitions
+    outgoings_ct = defaultdict(int)
+    # NOTE(lsyin): Final states can lead to terminate, so they have one outgoing edge naturally
+    for s in fsm_info.finals:
+        outgoings_ct[s] = 1
+    state_to_jump_forward = {}
+    for (state, id_), next_state in transitions.items():
+        if id_ == fsm_info.alphabet_anything_value:
+            # Arbitrarily symbol cannot be recognized as jump forward
+            continue
+        symbols = id_to_symbol[id_]
+        for c in symbols:
+            if len(c) > 1:
+                # Skip byte level transitions like c = "5E"
+                continue
+            outgoings_ct[state] += 1
+            if outgoings_ct[state] > 1:
+                if state in state_to_jump_forward:
+                    del state_to_jump_forward[state]
+                break
+            state_to_jump_forward[state] = JumpEdge(
+                symbol=c,
+                symbol_next_state=next_state,
+            )
+    # Process the byte level jump forward
+    outgoings_ct = defaultdict(int)
+    for s in fsm_info.finals:
+        outgoings_ct[s] = 1
+    for (state, id_), next_state in transitions.items():
+        if id_ == fsm_info.alphabet_anything_value:
+            continue
+        symbols = id_to_symbol[id_]
+        for c in symbols:
+            byte_ = None
+            if len(c) == 1 and ord(c) < 0x80:
+                # ASCII character
+                byte_ = ord(c)
+            elif len(c) > 1:
+                # FIXME: This logic is due to the leading \x00
+                # https://github.com/outlines-dev/outlines/pull/930
+                byte_ = int(symbols[0][1:], 16)
+            if byte_ is not None:
+                outgoings_ct[state] += 1
+                if outgoings_ct[state] > 1:
+                    if state in state_to_jump_forward:
+                        del state_to_jump_forward[state]
+                    break
+                e = state_to_jump_forward.get(state, JumpEdge())
+                e.byte = byte_
+                e.byte_next_state = next_state
+                state_to_jump_forward[state] = e
+    return state_to_jump_forward
+class OutlinesJumpForwardMap:
+    def __init__(self, regex_string):
+        self.state_to_jump_forward = init_state_to_jump_forward(regex_string)
+    def jump_forward_symbol(self, state):
+        jump_forward_str = ""
+        next_state = state
+        while state in self.state_to_jump_forward:
+            e = self.state_to_jump_forward[state]
+            if e.symbol is None:
+                break
+            jump_forward_str += e.symbol
+            next_state = e.symbol_next_state
+            state = next_state
+        return jump_forward_str, next_state
+    def jump_forward_byte(self, state):
+        if state not in self.state_to_jump_forward:
+            return None
+        jump_forward_bytes = []
+        next_state = None
+        while state in self.state_to_jump_forward:
+            e = self.state_to_jump_forward[state]
+            assert e.byte is not None and e.byte_next_state is not None
+            jump_forward_bytes.append((e.byte, e.byte_next_state))
+            next_state = e.byte_next_state
+            state = next_state
+        return jump_forward_bytes
+    def is_jump_forward_symbol_state(self, state):
+        return (
+            state in self.state_to_jump_forward
+            and self.state_to_jump_forward[state].symbol is not None
+        )
+def test_main(regex_string):
+    jump_forward_map = OutlinesJumpForwardMap(regex_string)
+    for state, e in jump_forward_map.state_to_jump_forward.items():
+        if e.symbol is not None:
+            jump_forward_str, next_state = jump_forward_map.jump_forward_symbol(state)
+            print(f"{state} -> {next_state}", jump_forward_str)
+        bytes_ = jump_forward_map.jump_forward_byte(state)
+        print(f"{state} -> {bytes_[-1][1]}", [hex(b) for b, _ in bytes_])
+if __name__ == "__main__":
+    import outlines
+    outlines.caching.clear_cache()
+    test_main(r"The google's DNS sever address is " + IP_REGEX)
+    test_main(r"霍格沃茨特快列车|霍比特人比尔博")
+    # 霍格: \xe9\x9c\x8d \xe6\xa0\xbc ...
+    # 霍比: \xe9\x9c\x8d \xe6\xaf\x94 ...
+    test_main(r"[-+]?[0-9]+[ ]*")

sglang 0.3.5__py3-none-any.whl → 0.3.5.post1__py3-none-any.whl

sglang 0.3.5py3-none-any.whl → 0.3.5.post1py3-none-any.whl