PyPI - sglang - Versions diffs - 0.4.6.post3__py3-none-any.whl → 0.4.6.post4__py3-none-any.whl - Mend

sglang 0.4.6.post3py3-none-any.whl → 0.4.6.post4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

sglang/bench_offline_throughput.py +4 -2
sglang/bench_one_batch.py +2 -2
sglang/bench_one_batch_server.py +143 -15
sglang/bench_serving.py +9 -7
sglang/compile_deep_gemm.py +1 -1
sglang/eval/loogle_eval.py +157 -0
sglang/lang/chat_template.py +78 -78
sglang/lang/tracer.py +1 -1
sglang/srt/code_completion_parser.py +1 -1
sglang/srt/configs/deepseekvl2.py +2 -2
sglang/srt/configs/model_config.py +1 -0
sglang/srt/constrained/base_grammar_backend.py +55 -72
sglang/srt/constrained/llguidance_backend.py +25 -21
sglang/srt/constrained/outlines_backend.py +27 -26
sglang/srt/constrained/reasoner_grammar_backend.py +22 -33
sglang/srt/constrained/xgrammar_backend.py +69 -43
sglang/srt/conversation.py +48 -43
sglang/srt/disaggregation/base/conn.py +1 -0
sglang/srt/disaggregation/decode.py +7 -2
sglang/srt/disaggregation/fake/conn.py +1 -1
sglang/srt/disaggregation/mooncake/conn.py +227 -120
sglang/srt/disaggregation/nixl/conn.py +1 -0
sglang/srt/disaggregation/prefill.py +7 -4
sglang/srt/disaggregation/utils.py +7 -1
sglang/srt/entrypoints/engine.py +17 -2
sglang/srt/entrypoints/http_server.py +17 -2
sglang/srt/function_call_parser.py +2 -2
sglang/srt/layers/attention/flashattention_backend.py +1 -1
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +1 -1
sglang/srt/layers/attention/utils.py +4 -2
sglang/srt/layers/dp_attention.py +71 -21
sglang/srt/layers/layernorm.py +1 -1
sglang/srt/layers/logits_processor.py +46 -11
sglang/srt/layers/moe/ep_moe/kernels.py +1 -1
sglang/srt/layers/moe/ep_moe/layer.py +1 -1
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +1 -1
sglang/srt/layers/moe/topk.py +1 -1
sglang/srt/layers/quantization/__init__.py +1 -1
sglang/srt/layers/quantization/blockwise_int8.py +2 -2
sglang/srt/layers/quantization/deep_gemm.py +72 -71
sglang/srt/layers/quantization/fp8.py +2 -2
sglang/srt/layers/quantization/fp8_kernel.py +3 -3
sglang/srt/layers/quantization/int8_kernel.py +2 -2
sglang/srt/layers/sampler.py +0 -4
sglang/srt/layers/vocab_parallel_embedding.py +18 -7
sglang/srt/lora/lora_manager.py +1 -1
sglang/srt/lora/mem_pool.py +4 -4
sglang/srt/lora/triton_ops/gate_up_lora_b.py +1 -1
sglang/srt/lora/triton_ops/qkv_lora_b.py +1 -1
sglang/srt/lora/triton_ops/sgemm_lora_a.py +1 -1
sglang/srt/lora/triton_ops/sgemm_lora_b.py +1 -1
sglang/srt/lora/utils.py +1 -1
sglang/srt/managers/data_parallel_controller.py +3 -3
sglang/srt/managers/detokenizer_manager.py +21 -8
sglang/srt/managers/io_struct.py +3 -1
sglang/srt/managers/mm_utils.py +1 -1
sglang/srt/managers/multimodal_processors/llava.py +46 -0
sglang/srt/managers/multimodal_processors/pixtral.py +127 -0
sglang/srt/managers/schedule_batch.py +76 -24
sglang/srt/managers/schedule_policy.py +0 -3
sglang/srt/managers/scheduler.py +113 -88
sglang/srt/managers/scheduler_output_processor_mixin.py +124 -55
sglang/srt/managers/tokenizer_manager.py +133 -34
sglang/srt/managers/tp_worker.py +12 -9
sglang/srt/managers/tp_worker_overlap_thread.py +22 -11
sglang/srt/mem_cache/memory_pool.py +2 -0
sglang/srt/metrics/collector.py +312 -37
sglang/srt/model_executor/cuda_graph_runner.py +10 -11
sglang/srt/model_executor/forward_batch_info.py +1 -1
sglang/srt/model_executor/model_runner.py +19 -14
sglang/srt/models/deepseek_janus_pro.py +2 -2
sglang/srt/models/deepseek_v2.py +23 -20
sglang/srt/models/llama.py +2 -0
sglang/srt/models/llama4.py +5 -6
sglang/srt/models/llava.py +248 -5
sglang/srt/models/mixtral.py +98 -34
sglang/srt/models/pixtral.py +467 -0
sglang/srt/models/roberta.py +1 -1
sglang/srt/models/torch_native_llama.py +1 -1
sglang/srt/openai_api/adapter.py +30 -4
sglang/srt/openai_api/protocol.py +0 -8
sglang/srt/reasoning_parser.py +3 -3
sglang/srt/sampling/custom_logit_processor.py +18 -3
sglang/srt/sampling/sampling_batch_info.py +4 -56
sglang/srt/sampling/sampling_params.py +2 -2
sglang/srt/server_args.py +34 -4
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +3 -3
sglang/srt/speculative/eagle_utils.py +7 -7
sglang/srt/speculative/eagle_worker.py +22 -19
sglang/srt/utils.py +6 -5
sglang/test/few_shot_gsm8k.py +2 -2
sglang/test/few_shot_gsm8k_engine.py +2 -2
sglang/test/run_eval.py +2 -2
sglang/test/runners.py +8 -1
sglang/test/send_one.py +13 -3
sglang/test/simple_eval_common.py +1 -1
sglang/test/simple_eval_humaneval.py +1 -1
sglang/test/test_programs.py +5 -5
sglang/test/test_utils.py +89 -14
sglang/utils.py +1 -1
sglang/version.py +1 -1
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/METADATA +6 -5
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/RECORD +107 -104
/sglang/{llama3_eval.py → eval/llama3_eval.py} +0 -0
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/WHEEL +0 -0
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/licenses/LICENSE +0 -0
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/top_level.txt +0 -0

sglang/test/run_eval.py CHANGED Viewed

@@ -71,9 +71,9 @@ def run_eval(args):
     )
     # Run eval
-    tic = time.time()
+    tic = time.perf_counter()
     result = eval_obj(sampler)
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     # Dump reports
     metrics = result.metrics | {"score": result.score}

sglang/test/runners.py CHANGED Viewed

@@ -19,7 +19,9 @@ from typing import List, Optional, Tuple, Union
 import torch
 import torch.nn.functional as F
+import transformers
 from transformers import (
+    AutoConfig,
     AutoModel,
     AutoModelForCausalLM,
     AutoModelForVision2Seq,
@@ -211,7 +213,12 @@ class HFRunner:
         # Load the model and tokenizer
         if self.model_type == "generation":
-            self.base_model = AutoModelForCausalLM.from_pretrained(
+            config = AutoConfig.from_pretrained(model_path)
+            if model_archs := getattr(config, "architectures"):
+                model_cls = getattr(transformers, model_archs[0])
+            else:
+                model_cls = AutoModelForCausalLM
+            self.base_model = model_cls.from_pretrained(
                 model_path,
                 torch_dtype=torch_dtype,
                 trust_remote_code=self.trust_remote_code,

sglang/test/send_one.py CHANGED Viewed

@@ -27,6 +27,7 @@ class BenchArgs:
         "Human: Give me a fully functional FastAPI server. Show the python code.\n\nAssistant:"
     )
     image: bool = False
+    many_images: bool = False
     stream: bool = False
     @staticmethod
@@ -48,6 +49,7 @@ class BenchArgs:
         parser.add_argument("--return-logprob", action="store_true")
         parser.add_argument("--prompt", type=str, default=BenchArgs.prompt)
         parser.add_argument("--image", action="store_true")
+        parser.add_argument("--many-images", action="store_true")
         parser.add_argument("--stream", action="store_true")
     @classmethod
@@ -62,6 +64,17 @@ def send_one_prompt(args):
             "Human: Describe this image in a very short sentence.\n\nAssistant:"
         )
         image_data = "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
+    elif args.many_images:
+        args.prompt = (
+            "Human: I have one reference image and many images."
+            "Describe their relationship in a very short sentence.\n\nAssistant:"
+        )
+        image_data = [
+            "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png",
+            "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png",
+            "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png",
+            "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png",
+        ]
     else:
         image_data = None
@@ -74,9 +87,6 @@ def send_one_prompt(args):
             "Write in a format of json.\nAssistant:"
         )
         json_schema = "$$ANY$$"
-        json_schema = (
-            '{"type": "object", "properties": {"population": {"type": "integer"}}}'
-        )
     else:
         json_schema = None

sglang/test/simple_eval_common.py CHANGED Viewed

@@ -140,7 +140,7 @@ class ChatCompletionSampler(SamplerBase):
                     max_tokens=self.max_tokens,
                 )
                 return response.choices[0].message.content
-            # NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are reruning MMMU
+            # NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are rerunning MMMU
             except openai.BadRequestError as e:
                 print("Bad Request Error", e)
                 return ""

sglang/test/simple_eval_humaneval.py CHANGED Viewed

@@ -121,7 +121,7 @@ class HumanEval(Eval):
                 convo=convo,
                 metrics={
                     f"pass@{k}": estimate_pass_at_k([total], [correct], k)
-                    # this will be aggrated so no need of .mean()
+                    # this will be aggregated so no need of .mean()
                     for k in self._ks_passes
                     if total >= k
                 },

sglang/test/test_programs.py CHANGED Viewed

@@ -370,7 +370,7 @@ def test_dtype_gen():
     @sgl.function
     def dtype_gen(s):
         s += "Q: What is the full name of DNS?\n"
-        s += "A: The full nams is " + sgl.gen("str_res", dtype=str, stop="\n") + "\n"
+        s += "A: The full names is " + sgl.gen("str_res", dtype=str, stop="\n") + "\n"
         s += "Q: Which year was DNS invented?\n"
         s += "A: " + sgl.gen("int_res", dtype=int) + "\n"
         s += "Q: What is the value of pi?\n"
@@ -503,7 +503,7 @@ def test_hellaswag_select():
     #####################################
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     rets = few_shot_hellaswag.run_batch(
         arguments,
         temperature=0,
@@ -514,13 +514,13 @@ def test_hellaswag_select():
     preds = []
     for i, ret in enumerate(rets):
         preds.append(choices[i].index(ret["answer"]))
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     # Compute accuracy
     accuracy = np.mean(np.array(preds) == np.array(labels))
     # Test generator style of run_batch
-    tic = time.time()
+    tic = time.perf_counter()
     rets = few_shot_hellaswag.run_batch(
         arguments,
         temperature=0,
@@ -531,7 +531,7 @@ def test_hellaswag_select():
     preds_gen = []
     for i, ret in enumerate(rets):
         preds_gen.append(choices[i].index(ret["answer"]))
-    latency_gen = time.time() - tic
+    latency_gen = time.perf_counter() - tic
     # Compute accuracy
     accuracy_gen = np.mean(np.array(preds_gen) == np.array(labels))

sglang/test/test_utils.py CHANGED Viewed

@@ -395,12 +395,12 @@ def popen_launch_server(
     other_args: list[str] = (),
     env: Optional[dict] = None,
     return_stdout_stderr: Optional[tuple] = None,
-    pd_seperated: bool = False,
+    pd_separated: bool = False,
 ):
     _, host, port = base_url.split(":")
     host = host[2:]
-    if pd_seperated:
+    if pd_separated:
         command = "sglang.launch_pd_server"
     else:
         command = "sglang.launch_server"
@@ -414,7 +414,7 @@ def popen_launch_server(
         *[str(x) for x in other_args],
     ]
-    if pd_seperated:
+    if pd_separated:
         command.extend(
             [
                 "--lb-host",
@@ -449,9 +449,9 @@ def popen_launch_server(
     else:
         process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
-    start_time = time.time()
+    start_time = time.perf_counter()
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 headers = {
                     "Content-Type": "application/json; charset=utf-8",
@@ -478,6 +478,81 @@ def popen_launch_server(
     raise TimeoutError("Server failed to start within the timeout period.")
+def popen_launch_pd_server(
+    model: str,
+    base_url: str,
+    timeout: float,
+    api_key: Optional[str] = None,
+    other_args: list[str] = (),
+    env: Optional[dict] = None,
+    return_stdout_stderr: Optional[tuple] = None,
+):
+    _, host, port = base_url.split(":")
+    host = host[2:]
+    command = "sglang.launch_server"
+    command = [
+        "python3",
+        "-m",
+        command,
+        "--model-path",
+        model,
+        *[str(x) for x in other_args],
+    ]
+    command.extend(
+        [
+            "--host",
+            host,
+            "--port",
+            port,
+        ]
+    )
+    if api_key:
+        command += ["--api-key", api_key]
+    print(f"command={' '.join(command)}")
+    if return_stdout_stderr:
+        process = subprocess.Popen(
+            command,
+            stdout=return_stdout_stderr[0],
+            stderr=return_stdout_stderr[1],
+            env=env,
+            text=True,
+        )
+    else:
+        process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
+    start_time = time.time()
+    with requests.Session() as session:
+        while time.time() - start_time < timeout:
+            try:
+                headers = {
+                    "Content-Type": "application/json; charset=utf-8",
+                    "Authorization": f"Bearer {api_key}",
+                }
+                response = session.get(
+                    f"{base_url}/health",
+                    headers=headers,
+                )
+                if response.status_code == 200:
+                    return process
+            except requests.RequestException:
+                pass
+            return_code = process.poll()
+            if return_code is not None:
+                raise Exception(f"Server unexpectedly exits ({return_code=}).")
+            time.sleep(10)
+    kill_process_tree(process.pid)
+    raise TimeoutError("Server failed to start within the timeout period.")
 def run_with_timeout(
     func: Callable,
     args: tuple = (),
@@ -509,7 +584,7 @@ class TestFile:
 def run_unittest_files(files: List[TestFile], timeout_per_file: float):
-    tic = time.time()
+    tic = time.perf_counter()
     success = True
     for i, file in enumerate(files):
@@ -524,13 +599,13 @@ def run_unittest_files(files: List[TestFile], timeout_per_file: float):
                 f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
                 flush=True,
             )
-            tic = time.time()
+            tic = time.perf_counter()
             process = subprocess.Popen(
                 ["python3", filename], stdout=None, stderr=None, env=os.environ
             )
             process.wait()
-            elapsed = time.time() - tic
+            elapsed = time.perf_counter() - tic
             print(
                 f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
@@ -556,9 +631,9 @@ def run_unittest_files(files: List[TestFile], timeout_per_file: float):
             break
     if success:
-        print(f"Success. Time elapsed: {time.time() - tic:.2f}s", flush=True)
+        print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
     else:
-        print(f"Fail. Time elapsed: {time.time() - tic:.2f}s", flush=True)
+        print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
     return 0 if success else -1
@@ -581,7 +656,7 @@ def get_benchmark_args(
     disable_stream=False,
     disable_ignore_eos=False,
     seed: int = 0,
-    pd_seperated: bool = False,
+    pd_separated: bool = False,
 ):
     return SimpleNamespace(
         backend="sglang",
@@ -611,7 +686,7 @@ def get_benchmark_args(
         profile=None,
         lora_name=None,
         prompt_suffix="",
-        pd_seperated=pd_seperated,
+        pd_separated=pd_separated,
     )
@@ -675,7 +750,7 @@ def run_bench_serving_multi(
     other_server_args,
     benchmark_args,
     need_warmup=False,
-    pd_seperated=False,
+    pd_separated=False,
 ):
     # Launch the server
     process = popen_launch_server(
@@ -683,7 +758,7 @@ def run_bench_serving_multi(
         base_url,
         timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
         other_args=other_server_args,
-        pd_seperated=pd_seperated,
+        pd_separated=pd_separated,
     )
     # run benchmark for all

sglang/utils.py CHANGED Viewed

@@ -278,7 +278,7 @@ def graceful_registry(sub_module_name: str):
             f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
         )
         if signum == signal.SIGTERM:
-            logger.info(f"{sub_module_name} recive sigterm")
+            logger.info(f"{sub_module_name} receive sigterm")
     signal.signal(signal.SIGTERM, graceful_shutdown)

sglang/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.6.~~post3~~"
1	+ __version__ = "0.4.6.post4"

{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sglang
-Version: 0.4.6.post3
+Version: 0.4.6.post4
 Summary: SGLang is yet another fast serving framework for large language models and vision language models.
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -247,7 +247,7 @@ Requires-Dist: xgrammar==0.1.19; extra == "runtime-common"
 Requires-Dist: blobfile==3.0.0; extra == "runtime-common"
 Provides-Extra: srt
 Requires-Dist: sglang[runtime_common]; extra == "srt"
-Requires-Dist: sgl-kernel==0.1.1; extra == "srt"
+Requires-Dist: sgl-kernel==0.1.2.post1; extra == "srt"
 Requires-Dist: flashinfer_python==0.2.5; extra == "srt"
 Requires-Dist: torch==2.6.0; extra == "srt"
 Requires-Dist: torchvision==0.21.0; extra == "srt"
@@ -301,6 +301,7 @@ Requires-Dist: sglang[srt]; extra == "all"
 Requires-Dist: sglang[openai]; extra == "all"
 Requires-Dist: sglang[anthropic]; extra == "all"
 Requires-Dist: sglang[litellm]; extra == "all"
+Requires-Dist: sglang[torch_memory_saver]; extra == "all"
 Provides-Extra: all-hip
 Requires-Dist: sglang[srt_hip]; extra == "all-hip"
 Requires-Dist: sglang[openai]; extra == "all-hip"
@@ -368,16 +369,16 @@ Dynamic: license-file
 - [2025/05] 🔥 Deploying DeepSeek with PD Disaggregation and Large-scale Expert Parallelism on 96 H100 GPUs ([blog](https://lmsys.org/blog/2025-05-05-large-scale-ep/)).
 - [2025/03] Supercharge DeepSeek-R1 Inference on AMD Instinct MI300X ([AMD blog](https://rocm.blogs.amd.com/artificial-intelligence/DeepSeekR1-Part2/README.html))
 - [2025/03] SGLang Joins PyTorch Ecosystem: Efficient LLM Serving Engine ([PyTorch blog](https://pytorch.org/blog/sglang-joins-pytorch/))
-- [2025/02] Unlock DeepSeek-R1 Inference Performance on AMD Instinct™ MI300X GPU ([AMD blog](https://rocm.blogs.amd.com/artificial-intelligence/DeepSeekR1_Perf/README.html))
 - [2025/01] 🔥 SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with DeepSeek-specific optimizations. ([instructions](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3), [AMD blog](https://www.amd.com/en/developer/resources/technical-articles/amd-instinct-gpus-power-deepseek-v3-revolutionizing-ai-development-with-sglang.html), [10+ other companies](https://x.com/lmsysorg/status/1887262321636221412))
 - [2024/12] 🔥 v0.4 Release: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
-- [2024/09] v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
 - [2024/07] v0.2 Release: Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
 <details>
 <summary>More</summary>
+- [2025/02] Unlock DeepSeek-R1 Inference Performance on AMD Instinct™ MI300X GPU ([AMD blog](https://rocm.blogs.amd.com/artificial-intelligence/DeepSeekR1_Perf/README.html))
 - [2024/10] The First SGLang Online Meetup ([slides](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#the-first-sglang-online-meetup)).
+- [2024/09] v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
 - [2024/02] SGLang enables **3x faster JSON decoding** with compressed finite state machine ([blog](https://lmsys.org/blog/2024-02-05-compressed-fsm/)).
 - [2024/01] SGLang provides up to **5x faster inference** with RadixAttention ([blog](https://lmsys.org/blog/2024-01-17-sglang/)).
 - [2024/01] SGLang powers the serving of the official **LLaVA v1.6** release demo ([usage](https://github.com/haotian-liu/LLaVA?tab=readme-ov-file#demo)).
@@ -409,7 +410,7 @@ Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
 ## Adoption and Sponsorship
 The project has been deployed to large-scale production, generating trillions of tokens every day.
-It is supported by the following institutions: AMD, Atlas Cloud, Baseten, Cursor, DataCrunch, Etched, Google Cloud, Hyperbolic, Iflytek, Jam & Tea Studios, LinkedIn, LMSYS, Meituan, Nebius, Novita AI, NVIDIA, Oracle, RunPod, Stanford, UC Berkeley, UCLA, xAI, and 01.AI.
+It is supported by the following institutions: AMD, Atlas Cloud, Baseten, Cursor, DataCrunch, Etched, Google Cloud, Hyperbolic, Iflytek, InnoMatrix, Jam & Tea Studios, LinkedIn, LMSYS, Meituan, Nebius, Novita AI, NVIDIA, Oracle, RunPod, Stanford, UC Berkeley, UCLA, xAI, and 01.AI.
 <img src="https://raw.githubusercontent.com/sgl-project/sgl-learning-materials/main/slides/adoption.png" alt="logo" width="800" margin="10px"></img>

sglang 0.4.6.post3__py3-none-any.whl → 0.4.6.post4__py3-none-any.whl

sglang 0.4.6.post3py3-none-any.whl → 0.4.6.post4py3-none-any.whl