PyPI - sglang - Versions diffs - 0.4.6.post4__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

sglang 0.4.6.post4py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (358) hide show

sglang/lang/backend/runtime_endpoint.py CHANGED Viewed

@@ -85,6 +85,22 @@ class RuntimeEndpoint(BaseBackend):
         )
         self._assert_success(res)
+    def start_profile(self):
+        res = http_request(
+            self.base_url + "/start_profile",
+            api_key=self.api_key,
+            verify=self.verify,
+        )
+        self._assert_success(res)
+    def stop_profile(self):
+        res = http_request(
+            self.base_url + "/stop_profile",
+            api_key=self.api_key,
+            verify=self.verify,
+        )
+        self._assert_success(res)
     def commit_lazy_operations(self, s: StreamExecutor):
         data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}}
         self._add_images(s, data)
@@ -374,7 +390,8 @@ class Runtime:
         self.pid = None
         pipe_reader, pipe_writer = multiprocessing.Pipe(duplex=False)
-        proc = multiprocessing.Process(
+        ctx = multiprocessing.get_context("spawn")
+        proc = ctx.Process(
             target=launch_server,
             args=(self.server_args, pipe_writer),
         )
@@ -406,6 +423,12 @@ class Runtime:
             kill_process_tree(self.pid)
             self.pid = None
+    def start_profile(self):
+        self.endpoint.start_profile()
+    def stop_profile(self):
+        self.endpoint.stop_profile()
     def cache_prefix(self, prefix: str):
         self.endpoint.cache_prefix(prefix)

sglang/profiler.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""
+Run live profiling.
+Usage:
+python3 -m sglang.profiler
+"""
+import argparse
+import json
+import os
+import time
+import urllib.parse
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import List, Optional
+import requests
+PARENT_FOLDER = "/tmp/sglang-profile"
+def _run_profile(
+    url: Optional[str],
+    num_steps: int,
+    activities: List[str],
+    output_dir: Optional[str] = None,
+    profile_name: Optional[str] = None,
+    profile_by_stage: bool = False,
+) -> str:
+    if output_dir is None:
+        output_dir = PARENT_FOLDER
+    output_dir = os.path.normpath(output_dir)
+    output_dir = os.path.abspath(output_dir)
+    output_dir = Path(output_dir)
+    # Add "profile_name/timestamp" to the path.
+    if profile_name:
+        output_dir = output_dir / profile_name
+    output_dir = output_dir / str(time.time())
+    output_dir.mkdir(exist_ok=True, parents=True)
+    print(f"Dump profiling traces to {output_dir}")
+    print(
+        f"Waiting for {num_steps} steps and the trace to be flushed.... ({profile_by_stage=})"
+    )
+    # Dump server args.
+    file_path = Path(output_dir) / "server_args.json"
+    if not file_path.exists():
+        response = requests.get(url + "/get_server_info")
+        response.raise_for_status()
+        server_args_data = response.json()
+        with open(file_path, "w") as file:
+            file.write(json.dumps(server_args_data))
+    # Start profiler. The API replies when all steps are processed
+    # and files are generated.
+    json_data = {
+        "output_dir": str(output_dir),
+        "num_steps": str(num_steps),
+        "activities": activities,
+        "profile_by_stage": profile_by_stage,
+    }
+    response = requests.post(url=url + "/start_profile", json=json_data)
+    response.raise_for_status()
+    trace_link = str(output_dir)
+    return trace_link
+def run_profile(
+    url: Optional[str],
+    num_steps: int,
+    activities: List[str],
+    output_dir: Optional[str] = None,
+    profile_name: Optional[str] = None,
+    profile_by_stage: bool = False,
+):
+    # step based profile will self terminate on num_steps constraints
+    link = _run_profile(
+        url, num_steps, activities, output_dir, profile_name, profile_by_stage
+    )
+    return link
+if __name__ == "__main__":
+    parser = ArgumentParser(description="Benchmark the online serving throughput.")
+    parser.add_argument(
+        "--url",
+        type=str,
+        default="http://localhost:30000",
+        help="Server or API base url if not using http host and port.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default=None,
+        help="Profile directory to dump profile traces.",
+    )
+    parser.add_argument(
+        "--profile-name",
+        type=str,
+        default=None,
+        help="The name of this profile run.",
+    )
+    parser.add_argument(
+        "--num-steps",
+        type=int,
+        default=5,
+        help="The number of forward steps to profile.",
+    )
+    parser.add_argument(
+        "--profile-by-stage",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=False,
+        help="The number of forward steps to profile.",
+    )
+    parser.add_argument(
+        "--cpu",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=True,
+        help="Whether to profile CPU activity",
+    )
+    parser.add_argument(
+        "--gpu",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=True,
+        help="Whether to profile GPU activity",
+    )
+    parser.add_argument(
+        "--mem",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=False,
+        help="Whether to memory usage (https://pytorch.org/memory_viz)",
+    )
+    parser.add_argument(
+        "--rpd",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=False,
+        help="Whether to use rpd profiler (https://github.com/ROCm/rocmProfileData)",
+    )
+    args = parser.parse_args()
+    activities = []
+    if args.cpu:
+        activities.append("CPU")
+    if args.gpu:
+        activities.append("GPU")
+    if args.mem:
+        activities.append("MEM")
+    if args.rpd:
+        activities.append("RPD")
+    run_profile(
+        args.url,
+        args.num_steps,
+        activities,
+        args.output_dir,
+        args.profile_name,
+        args.profile_by_stage,
+    )

sglang/srt/_custom_ops.py CHANGED Viewed

@@ -113,3 +113,37 @@ else:
     def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor:
         return sgl_kernel.allreduce.get_meta_buffer_ipc_handle(inp)
+def mscclpp_generate_unique_id() -> bytes:
+    return sgl_kernel.allreduce.mscclpp_generate_unique_id()
+def mscclpp_init_context(
+    unique_id: bytes,
+    rank: int,
+    world_size: int,
+    scratch: torch.Tensor,
+    put_buffer: torch.Tensor,
+    nranks_per_node: int,
+    rank_to_node: List[int],
+    rank_to_ib: List[int],
+    context_selection: int,
+) -> int:
+    return sgl_kernel.allreduce.mscclpp_init_context(
+        unique_id,
+        rank,
+        world_size,
+        scratch,
+        put_buffer,
+        nranks_per_node,
+        rank_to_node,
+        rank_to_ib,
+        context_selection,
+    )
+def mscclpp_allreduce(
+    context: int, inp: torch.Tensor, out: torch.Tensor, nthreads: int, nblocks: int
+) -> None:
+    return sgl_kernel.allreduce.mscclpp_allreduce(context, inp, out, nthreads, nblocks)

sglang/srt/configs/internvl.py CHANGED Viewed

@@ -7,11 +7,8 @@ import sentencepiece as spm
 from transformers import (
     TOKENIZER_MAPPING,
     LlamaConfig,
-    Phi3Config,
     PretrainedConfig,
     PreTrainedTokenizer,
-    PreTrainedTokenizerFast,
-    Qwen2Config,
 )
 from sglang.utils import logger
@@ -302,24 +299,23 @@ class InternVLChatConfig(PretrainedConfig):
             )
         if llm_config is None:
-            # TODO: There might still be a bug in transformers version 4.44 and above.
-            llm_config = {"architectures": [""]}
+            llm_config = {"architectures": ["InternLM2ForCausalLM"]}
             logger.info(
                 "llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`)."
             )
         self.vision_config = InternVisionConfig(**vision_config)
-        if llm_config["architectures"][0] == "LlamaForCausalLM":
+        if llm_config.get("architectures")[0] == "LlamaForCausalLM":
             self.llm_config = LlamaConfig(**llm_config)
-        elif llm_config["architectures"][0] == "InternLM2ForCausalLM":
+        elif llm_config.get("architectures")[0] == "InternLM2ForCausalLM":
             self.llm_config = InternLM2Config(**llm_config)
-        elif llm_config["architectures"][0] == "Phi3ForCausalLM":
-            self.llm_config = Phi3Config(**llm_config)
-        elif llm_config["architectures"][0] == "Qwen2ForCausalLM":
-            self.llm_config = Qwen2Config(**llm_config)
         else:
             raise ValueError(
-                "Unsupported architecture: {}".format(llm_config["architectures"][0])
+                "Unsupported architecture: {}".format(
+                    llm_config.get("architectures")[0]
+                )
             )
         self.use_backbone_lora = use_backbone_lora
         self.use_llm_lora = use_llm_lora
         self.pad2square = pad2square

sglang/srt/configs/model_config.py CHANGED Viewed

@@ -16,13 +16,17 @@ import json
 import logging
 import math
 import os
-from enum import IntEnum, auto
+from enum import Enum, IntEnum, auto
 from typing import List, Optional, Set, Union
 import torch
 from transformers import PretrainedConfig
-from sglang.srt.hf_transformers_utils import get_config, get_context_length
+from sglang.srt.hf_transformers_utils import (
+    get_config,
+    get_context_length,
+    get_hf_text_config,
+)
 from sglang.srt.layers.quantization import QUANTIZATION_METHODS
 from sglang.srt.server_args import ServerArgs
 from sglang.srt.utils import get_bool_env_var, is_hip
@@ -35,6 +39,12 @@ class AttentionArch(IntEnum):
     MHA = auto()
+class ModelImpl(str, Enum):
+    AUTO = "auto"
+    SGLANG = "sglang"
+    TRANSFORMERS = "transformers"
 class ModelConfig:
     def __init__(
         self,
@@ -49,11 +59,13 @@ class ModelConfig:
         quantization: Optional[str] = None,
         override_config_file: Optional[str] = None,
         is_draft_model: bool = False,
+        impl: Union[str, ModelImpl] = ModelImpl.AUTO,
     ) -> None:
         self.model_path = model_path
         self.revision = revision
         self.quantization = quantization
+        self.impl = impl
         # Parse args
         self.maybe_pull_model_tokenizer_from_remote()
@@ -69,6 +81,7 @@ class ModelConfig:
             model_override_args=self.model_override_args,
             **kwargs,
         )
         self.hf_text_config = get_hf_text_config(self.hf_config)
         self.attention_chunk_size = getattr(
             self.hf_text_config, "attention_chunk_size", None
@@ -93,6 +106,8 @@ class ModelConfig:
         ):
             self.hf_config.architectures[0] = "DeepseekV3ForCausalLMNextN"
+        if is_draft_model and self.hf_config.architectures[0] == "MiMoForCausalLM":
+            self.hf_config.architectures[0] = "MiMoMTP"
         # Check model type
         self.is_generation = is_generation_model(
             self.hf_config.architectures, is_embedding
@@ -109,6 +124,10 @@ class ModelConfig:
         self.is_audio_model = enable_multimodal and is_audio_model(
             self.hf_config.architectures
         )
+        self.is_multimodal_chunked_prefill_supported = (
+            enable_multimodal
+            and is_multimodal_chunked_prefill_supported(self.hf_config.architectures)
+        )
         self.is_encoder_decoder = is_encoder_decoder_model(self.hf_config.architectures)
         self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
@@ -185,6 +204,22 @@ class ModelConfig:
             self.v_head_dim = self.hf_text_config.v_head_dim
             self.qk_nope_head_dim = self.hf_text_config.qk_nope_head_dim
         else:
+            if (
+                "MistralModel" in self.hf_config.architectures
+                or "MixtralForCausalLM" in self.hf_config.architectures
+                or "MistralForCausalLM" in self.hf_config.architectures
+            ):
+                if getattr(self, "head_dim", None) is None:
+                    self.head_dim = (
+                        self.hf_config.hidden_size // self.hf_config.num_attention_heads
+                    )
+                    # In transformers==4.52.3, the head_dim is null in MistralConfig
+                    if (
+                        not hasattr(self.hf_text_config, "head_dim")
+                        or self.hf_text_config.head_dim is None
+                    ):
+                        setattr(self.hf_text_config, "head_dim", self.head_dim)
             self.attention_arch = AttentionArch.MHA
         self.num_attention_heads = self.hf_text_config.num_attention_heads
@@ -209,7 +244,13 @@ class ModelConfig:
         # Cache attributes
         self.hf_eos_token_id = self.get_hf_eos_token_id()
-        self.image_token_id = getattr(self.hf_config, "image_token_id", None)
+        config = self.hf_config
+        # multimodal
+        self.image_token_id = getattr(config, "image_token_id", None) or getattr(
+            config, "image_token_index", None
+        )
     @staticmethod
     def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs):
@@ -223,6 +264,7 @@ class ModelConfig:
             enable_multimodal=server_args.enable_multimodal,
             dtype=server_args.dtype,
             quantization=server_args.quantization,
+            impl=server_args.impl,
             **kwargs,
         )
@@ -332,6 +374,7 @@ class ModelConfig:
             "w8a8_int8",
             "w8a8_fp8",
             "moe_wna16",
+            "qoq",
         ]
         compatible_quantization_methods = {
             "modelopt_fp4": ["modelopt"],
@@ -423,31 +466,6 @@ class ModelConfig:
                 self.model_path = client.get_local_dir()
-def get_hf_text_config(config: PretrainedConfig):
-    """Get the "sub" config relevant to llm for multi modal models.
-    No op for pure text models.
-    """
-    class_name = config.architectures[0]
-    if class_name.startswith("Llava") and class_name.endswith("ForCausalLM"):
-        # We support non-hf version of llava models, so we do not want to
-        # read the wrong values from the unused default text_config.
-        # NOTE(HandH1998): We set `torch_dtype` of config to `torch.float16` for the weights, as
-        # `torch.float16` is default used for image features in `python/sglang/srt/models/llava.py`.
-        setattr(config, "torch_dtype", torch.float16)
-        return config
-    if hasattr(config, "text_config"):
-        # The code operates under the assumption that text_config should have
-        # `num_attention_heads` (among others). Assert here to fail early
-        # if transformers config doesn't align with this assumption.
-        assert hasattr(config.text_config, "num_attention_heads")
-        return config.text_config
-    if hasattr(config, "language_config"):
-        return config.language_config
-    else:
-        return config
 # adapted from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/config.py
 _STR_DTYPE_TO_TORCH_DTYPE = {
     "half": torch.float16,
@@ -466,6 +484,8 @@ def _get_and_verify_dtype(
     # NOTE: getattr(config, "torch_dtype", torch.float32) is not correct
     # because config.torch_dtype can be None.
     config_dtype = getattr(config, "torch_dtype", None)
+    if isinstance(config_dtype, str):
+        config_dtype = _STR_DTYPE_TO_TORCH_DTYPE.get(config_dtype, None)
     if config_dtype is None:
         config_dtype = torch.float32
@@ -537,6 +557,7 @@ def is_generation_model(model_architectures: List[str], is_embedding: bool = Fal
 multimodal_model_archs = [
+    "CLIPModel",
     "DeepseekVL2ForCausalLM",
     "Gemma3ForConditionalGeneration",
     "Grok1VForCausalLM",
@@ -549,13 +570,14 @@ multimodal_model_archs = [
     "LlavaVidForCausalLM",
     "MiniCPMO",
     "MiniCPMV",
+    "Mistral3ForConditionalGeneration",
     "MultiModalityCausalLM",
     "MllamaForConditionalGeneration",
     "Qwen2VLForConditionalGeneration",
     "Qwen2_5_VLForConditionalGeneration",
-    "CLIPModel",
     "KimiVLForConditionalGeneration",
     "InternVLChatModel",
+    "Phi4MMForCausalLM",
 ]
@@ -585,6 +607,21 @@ def is_encoder_decoder_model(model_architectures: List[str]):
     return "MllamaForConditionalGeneration" in model_architectures
+def is_multimodal_chunked_prefill_supported(model_architectures: List[str]):
+    """Check if chunked prefill is supported for a MultiModal model."""
+    unsupported = [
+        "Grok1VForCausalLM",
+        "Grok1AForCausalLM",
+        "LlavaLlamaForCausalLM",
+        "MllamaForConditionalGeneration",
+        "CLIPModel",
+    ]
+    if any(multi_model_arch in unsupported for multi_model_arch in model_architectures):
+        return False
+    else:
+        return True
 def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
     if scale <= 1:
         return 1.0

sglang/srt/constrained/base_grammar_backend.py CHANGED Viewed

@@ -60,7 +60,7 @@ class BaseGrammarObject:
         raise NotImplementedError()
     def copy(self) -> "BaseGrammarObject":
-        raise NotImplementedError()
+        return self
     @property
     def finished(self):
@@ -99,9 +99,12 @@ class BaseGrammarObject:
         raise NotImplementedError()
+INVALID_GRAMMAR_OBJ = BaseGrammarObject()
 @dataclass
 class CacheEntry:
-    value: Optional[BaseGrammarObject]
+    value: BaseGrammarObject
     event: Event

sglang/srt/constrained/llguidance_backend.py CHANGED Viewed

@@ -28,6 +28,7 @@ from llguidance.torch import (
 )
 from sglang.srt.constrained.base_grammar_backend import (
+    INVALID_GRAMMAR_OBJ,
     BaseGrammarBackend,
     BaseGrammarObject,
 )
@@ -126,8 +127,8 @@ class GuidanceBackend(BaseGrammarBackend):
                 serialized_grammar=serialized_grammar,
             )
         except Exception as e:
-            logger.warning(f"Skip invalid grammar: {serialized_grammar}, {e=}")
-            return None
+            logger.error(f"Hit invalid grammar: {serialized_grammar=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
     def dispatch_json(self, key_string: str) -> Optional[GuidanceGrammar]:
         try:
@@ -138,8 +139,8 @@ class GuidanceBackend(BaseGrammarBackend):
                 },
             )
         except Exception as e:
-            logger.warning(f"Skip invalid grammar: {key_string=}, {e=}")
-            return None
+            logger.error(f"Hit invalid json_schema: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._from_serialized(serialized_grammar)
     def dispatch_regex(self, key_string: str) -> Optional[GuidanceGrammar]:
@@ -151,8 +152,8 @@ class GuidanceBackend(BaseGrammarBackend):
             serialized_grammar = grammar_from("ebnf", key_string)
             return self._from_serialized(serialized_grammar)
         except ValueError as e:
-            logger.warning(f"Skip invalid ebnf: regex={key_string}, {e=}")
-            return None
+            logger.error(f"Hit invalid ebnf: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
     def dispatch_structural_tag(self, key_string: str) -> Optional[GuidanceGrammar]:
         try:
@@ -169,5 +170,5 @@ class GuidanceBackend(BaseGrammarBackend):
             g = StructTag.to_grammar(tags)
             return self._from_serialized(g)
         except Exception as e:
-            logging.warning(f"Skip invalid structural_tag: {key_string}, {e=}")
-            return None
+            logging.error(f"Hit invalid structural_tag: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ

sglang/srt/constrained/outlines_backend.py CHANGED Viewed

@@ -24,6 +24,7 @@ from outlines.models.transformers import TransformerTokenizer
 from pydantic import BaseModel
 from sglang.srt.constrained.base_grammar_backend import (
+    INVALID_GRAMMAR_OBJ,
     BaseGrammarBackend,
     BaseGrammarObject,
 )
@@ -151,8 +152,8 @@ class OutlinesGrammarBackend(BaseGrammarBackend):
                 # outlines <= 0.0.46
                 guide = RegexGuide(regex, self.outlines_tokenizer)
         except interegular.patterns.InvalidSyntax as e:
-            logger.warning(f"skip invalid regex schema: {regex=}, {e=}")
-            return None
+            logger.error(f"Hit invalid regex schema: {regex=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         jump_forward_map = None
         return OutlinesGrammar(guide, jump_forward_map)
@@ -170,8 +171,8 @@ class OutlinesGrammarBackend(BaseGrammarBackend):
                 whitespace_pattern=self.whitespace_pattern,
             )
         except (NotImplementedError, json.decoder.JSONDecodeError, ValueError) as e:
-            logger.warning(f"Skip invalid json_schema: {key_string=}, {e=}")
-            return None
+            logger.error(f"Hit invalid json_schema: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._compile_regex(regex)
     def dispatch_regex(self, key_string: str):

sglang/srt/constrained/xgrammar_backend.py CHANGED Viewed

@@ -28,6 +28,7 @@ from xgrammar import (
 )
 from sglang.srt.constrained.base_grammar_backend import (
+    INVALID_GRAMMAR_OBJ,
     BaseGrammarBackend,
     BaseGrammarObject,
 )
@@ -152,10 +153,11 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
     ):
         super().__init__()
-        tokenizer_info = TokenizerInfo.from_huggingface(
-            tokenizer, vocab_size=vocab_size
-        )
-        override_stop_tokens = None
+        if True:
+            tokenizer_info = TokenizerInfo.from_huggingface(
+                tokenizer, vocab_size=vocab_size
+            )
+            override_stop_tokens = None
         self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
         self.vocab_size = vocab_size
@@ -178,25 +180,26 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
                 ctx = self.grammar_compiler.compile_builtin_json_grammar()
             else:
                 ctx = self.grammar_compiler.compile_json_schema(schema=key_string)
-        except RuntimeError as e:
-            logging.warning(f"Skip invalid json_schema: json_schema={key_string}, {e=}")
-            return None
+        except (RuntimeError, json.decoder.JSONDecodeError) as e:
+            logging.error(f"Hit invalid json_schema: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._from_context(ctx, key_string)
     def dispatch_ebnf(self, key_string: str) -> Optional[XGrammarGrammar]:
         try:
             ctx = self.grammar_compiler.compile_grammar(key_string)
         except RuntimeError as e:
-            logging.warning(f"Skip invalid ebnf: ebnf={key_string}, {e=}")
-            return None
+            logging.error(f"Hit invalid ebnf: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._from_context(ctx, key_string)
     def dispatch_regex(self, key_string: str) -> Optional[XGrammarGrammar]:
         try:
             ctx = self.grammar_compiler.compile_regex(key_string)
         except RuntimeError as e:
-            logging.warning(f"Skip invalid regex: regex={key_string}, {e=}")
-            return None
+            logging.error(f"Hit invalid regex: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._from_context(ctx, key_string)
     def dispatch_structural_tag(self, key_string: str) -> Optional[XGrammarGrammar]:
@@ -213,13 +216,10 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
             ctx = self.grammar_compiler.compile_structural_tag(
                 tags, structural_tag["triggers"]
             )
-        except RuntimeError as e:
-            logging.warning(
-                f"Skip invalid structural_tag: structural_tag={key_string}, {e=}"
-            )
-            return None
+        except (RuntimeError, json.decoder.JSONDecodeError) as e:
+            logging.error(f"Hit invalid structural_tag: {key_string=}, {e=}")
+            return INVALID_GRAMMAR_OBJ
         return self._from_context(ctx, key_string)
     def reset(self):
-        if self.grammar_compiler:
-            self.grammar_compiler.clear_cache()
+        self.grammar_compiler.clear_cache()

sglang 0.4.6.post4__py3-none-any.whl → 0.4.7__py3-none-any.whl

sglang 0.4.6.post4py3-none-any.whl → 0.4.7py3-none-any.whl