sglang 0.3.6.post1__py3-none-any.whl → 0.3.6.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sglang/bench_one_batch.py CHANGED
@@ -466,7 +466,6 @@ if __name__ == "__main__":
466
466
 
467
467
  try:
468
468
  main(server_args, bench_args)
469
- except Exception as e:
470
- raise e
471
469
  finally:
472
- kill_child_process()
470
+ if server_args.tp_size != 1:
471
+ kill_child_process()
@@ -5,9 +5,9 @@ This script launches a server and uses the HTTP interface.
5
5
  It accepts server arguments (the same as launch_server.py) and benchmark arguments (e.g., batch size, input lengths).
6
6
 
7
7
  Usage:
8
- python3 -m sglang.bench_server_latency --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
8
+ python3 -m sglang.bench_one_batch_server --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
9
9
 
10
- python3 -m sglang.bench_server_latency --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
10
+ python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
11
11
  """
12
12
 
13
13
  import argparse
sglang/check_env.py CHANGED
@@ -22,18 +22,24 @@ PACKAGE_LIST = [
22
22
  "hf_transfer",
23
23
  "huggingface_hub",
24
24
  "interegular",
25
+ "modelscope",
26
+ "orjson",
27
+ "outlines",
28
+ "packaging",
25
29
  "psutil",
26
30
  "pydantic",
27
31
  "multipart",
28
32
  "zmq",
33
+ "torchao",
29
34
  "uvicorn",
30
35
  "uvloop",
31
36
  "vllm",
32
- "outlines",
37
+ "xgrammar",
33
38
  "openai",
34
39
  "tiktoken",
35
40
  "anthropic",
36
41
  "litellm",
42
+ "decord",
37
43
  ]
38
44
 
39
45
 
sglang/lang/tracer.py CHANGED
@@ -278,6 +278,6 @@ class TracingScope:
278
278
 
279
279
  def add_child_state(self, state: TracerProgramState):
280
280
  cur_scope = self
281
- while cur_scope != None:
281
+ while cur_scope is not None:
282
282
  cur_scope.tracer_state.child_states.append(state)
283
283
  cur_scope = cur_scope.last_scope
sglang/launch_server.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Launch the inference server."""
2
2
 
3
- import os
4
3
  import sys
5
4
 
6
5
  from sglang.srt.server import launch_server
@@ -12,7 +11,5 @@ if __name__ == "__main__":
12
11
 
13
12
  try:
14
13
  launch_server(server_args)
15
- except Exception as e:
16
- raise e
17
14
  finally:
18
15
  kill_child_process()
@@ -14,13 +14,13 @@
14
14
 
15
15
  import json
16
16
  import logging
17
- import os
18
17
  from enum import IntEnum, auto
19
18
  from typing import List, Optional
20
19
 
21
20
  from transformers import PretrainedConfig
22
21
 
23
22
  from sglang.srt.hf_transformers_utils import get_config, get_context_length
23
+ from sglang.srt.utils import get_bool_env_var
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
 
@@ -59,13 +59,9 @@ class ModelConfig:
59
59
 
60
60
  # Derive context length
61
61
  derived_context_len = get_context_length(self.hf_text_config)
62
- allow_long_context = os.environ.get(
63
- "SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN", None
64
- )
65
-
66
62
  if context_length is not None:
67
63
  if context_length > derived_context_len:
68
- if allow_long_context:
64
+ if get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
69
65
  logger.warning(
70
66
  f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
71
67
  f"This may lead to incorrect model outputs or CUDA errors."
@@ -18,7 +18,7 @@ import triton.language as tl
18
18
  from sglang.global_config import global_config
19
19
  from sglang.srt.layers.attention import AttentionBackend
20
20
  from sglang.srt.model_executor.forward_batch_info import ForwardBatch
21
- from sglang.srt.utils import is_flashinfer_available
21
+ from sglang.srt.utils import get_bool_env_var, is_flashinfer_available
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from sglang.srt.layers.radix_attention import RadixAttention
@@ -47,8 +47,8 @@ class FlashInferAttnBackend(AttentionBackend):
47
47
 
48
48
  # Parse constants
49
49
  if "SGLANG_FLASHINFER_USE_TENSOR_CORE" in os.environ:
50
- self.decode_use_tensor_cores = (
51
- os.environ["SGLANG_FLASHINFER_USE_TENSOR_CORE"].lower() == "true"
50
+ self.decode_use_tensor_cores = get_bool_env_var(
51
+ "SGLANG_FLASHINFER_USE_TENSOR_CORE"
52
52
  )
53
53
  else:
54
54
  if not _grouped_size_compiled_for_decode_kernels(
@@ -74,7 +74,7 @@ class Sampler(nn.Module):
74
74
  filter_apply_order="joint",
75
75
  )
76
76
 
77
- if not torch.all(success):
77
+ if self.use_nan_detectioin and not torch.all(success):
78
78
  logger.warning("Detected errors during sampling!")
79
79
  batch_next_token_ids = torch.zeros_like(batch_next_token_ids)
80
80
  elif global_server_args_dict["sampling_backend"] == "pytorch":
@@ -25,8 +25,6 @@ from sglang.srt.managers.io_struct import (
25
25
  BatchEmbeddingOut,
26
26
  BatchStrOut,
27
27
  BatchTokenIDOut,
28
- GetMemPoolSizeReqOutput,
29
- UpdateWeightReqOutput,
30
28
  )
31
29
  from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN
32
30
  from sglang.srt.server_args import PortArgs, ServerArgs
@@ -131,6 +131,7 @@ class LlavaImageProcessor(BaseImageProcessor):
131
131
  if not image_data:
132
132
  return None
133
133
 
134
+ modalities = request_obj.modalities or ["image"]
134
135
  aspect_ratio = getattr(self.hf_config, "image_aspect_ratio", None)
135
136
  grid_pinpoints = (
136
137
  self.hf_config.image_grid_pinpoints
@@ -139,9 +140,12 @@ class LlavaImageProcessor(BaseImageProcessor):
139
140
  else None
140
141
  )
141
142
 
143
+ if isinstance(image_data, str):
144
+ image_data = [image_data]
145
+
142
146
  if isinstance(image_data, list) and len(image_data) > 0:
143
- # Multiple images
144
- if len(image_data) > 1:
147
+ if "multi-images" in modalities or "video" in modalities:
148
+ # Multiple images
145
149
  aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres
146
150
  pixel_values, image_hashes, image_sizes = [], [], []
147
151
  res = []
@@ -166,13 +170,6 @@ class LlavaImageProcessor(BaseImageProcessor):
166
170
  )
167
171
  image_hashes = [image_hash]
168
172
  image_sizes = [image_size]
169
- elif isinstance(image_data, str):
170
- # A single image
171
- pixel_values, image_hash, image_size = await self._process_single_image(
172
- image_data, aspect_ratio, grid_pinpoints
173
- )
174
- image_hashes = [image_hash]
175
- image_sizes = [image_size]
176
173
  else:
177
174
  raise ValueError(f"Invalid image data: {image_data}")
178
175
 
@@ -31,6 +31,7 @@ import dataclasses
31
31
  import logging
32
32
  from typing import List, Optional, Tuple, Union
33
33
 
34
+ import numpy as np
34
35
  import torch
35
36
  import triton
36
37
  import triton.language as tl
@@ -167,6 +168,30 @@ class ImageInputs:
167
168
 
168
169
  return ret
169
170
 
171
+ def merge(self, other, vocab_size):
172
+ assert self.pixel_values.shape[1:] == other.pixel_values.shape[1:]
173
+ self.pixel_values = np.concatenate([self.pixel_values, other.pixel_values])
174
+ self.image_hashes += other.image_hashes
175
+
176
+ self.pad_values = [
177
+ (self.image_hashes) % vocab_size,
178
+ (self.image_hashes >> 16) % vocab_size,
179
+ (self.image_hashes >> 32) % vocab_size,
180
+ (self.image_hashes >> 64) % vocab_size,
181
+ ]
182
+
183
+ optional_args = [
184
+ "image_sizes",
185
+ "image_offsets",
186
+ # "modalities", # modalities should be ["multi-images"] (one entry) even for multiple images
187
+ "aspect_ratio_ids",
188
+ "aspect_ratio_mask",
189
+ "image_grid_thws",
190
+ ]
191
+ for arg in optional_args:
192
+ if getattr(self, arg, None) is not None:
193
+ setattr(self, arg, getattr(self, arg) + getattr(other, arg))
194
+
170
195
 
171
196
  class Req:
172
197
  """The input and output status of a request."""
@@ -177,6 +202,7 @@ class Req:
177
202
  origin_input_text: str,
178
203
  origin_input_ids: Tuple[int],
179
204
  sampling_params: SamplingParams,
205
+ origin_input_ids_unpadded: Optional[Tuple[int]] = None,
180
206
  lora_path: Optional[str] = None,
181
207
  input_embeds: Optional[List[List[float]]] = None,
182
208
  session_id: Optional[str] = None,
@@ -184,7 +210,11 @@ class Req:
184
210
  # Input and output info
185
211
  self.rid = rid
186
212
  self.origin_input_text = origin_input_text
187
- self.origin_input_ids_unpadded = origin_input_ids # Before image padding
213
+ self.origin_input_ids_unpadded = (
214
+ origin_input_ids_unpadded
215
+ if origin_input_ids_unpadded
216
+ else origin_input_ids # Before image padding
217
+ )
188
218
  self.origin_input_ids = origin_input_ids
189
219
  self.output_ids = [] # Each decode stage's output ids
190
220
  self.fill_ids = None # fill_ids = origin_input_ids + output_ids
@@ -260,6 +290,12 @@ class Req:
260
290
  # The number of cached tokens, that were already cached in the KV cache
261
291
  self.cached_tokens = 0
262
292
 
293
+ def extend_image_inputs(self, image_inputs, vocab_size):
294
+ if self.image_inputs is None:
295
+ self.image_inputs = image_inputs
296
+ else:
297
+ self.image_inputs.merge(image_inputs, vocab_size)
298
+
263
299
  # whether request reached finished condition
264
300
  def finished(self) -> bool:
265
301
  return self.finished_reason is not None
@@ -71,9 +71,10 @@ from sglang.srt.utils import (
71
71
  broadcast_pyobj,
72
72
  configure_logger,
73
73
  crash_on_warnings,
74
+ get_bool_env_var,
74
75
  get_zmq_socket,
75
- gpu_proc_affinity,
76
76
  kill_parent_process,
77
+ set_gpu_proc_affinity,
77
78
  set_random_seed,
78
79
  suppress_other_loggers,
79
80
  )
@@ -82,7 +83,7 @@ from sglang.utils import get_exception_traceback
82
83
  logger = logging.getLogger(__name__)
83
84
 
84
85
  # Test retract decode
85
- test_retract = os.getenv("SGLANG_TEST_RETRACT", "false").lower() == "true"
86
+ test_retract = get_bool_env_var("SGLANG_TEST_RETRACT")
86
87
 
87
88
 
88
89
  class Scheduler:
@@ -559,12 +560,13 @@ class Scheduler:
559
560
 
560
561
  # Image inputs
561
562
  if recv_req.image_inputs is not None:
562
- req.image_inputs = ImageInputs.from_dict(
563
+ image_inputs = ImageInputs.from_dict(
563
564
  recv_req.image_inputs, self.model_config.vocab_size
564
565
  )
565
566
  req.origin_input_ids = self.pad_input_ids_func(
566
- req.origin_input_ids_unpadded, req.image_inputs
567
+ req.origin_input_ids, image_inputs
567
568
  )
569
+ req.extend_image_inputs(image_inputs, self.model_config.vocab_size)
568
570
 
569
571
  if len(req.origin_input_ids) > self.max_req_input_len:
570
572
  req.finished_reason = FINISH_ABORT(
@@ -1404,7 +1406,8 @@ def run_scheduler_process(
1404
1406
  pipe_writer,
1405
1407
  ):
1406
1408
  # set cpu affinity to this gpu process
1407
- gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id)
1409
+ if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
1410
+ set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id)
1408
1411
 
1409
1412
  # [For Router] if env var "DP_RANK" exist, set dp_rank to the value of the env var
1410
1413
  if dp_rank is None and "DP_RANK" in os.environ:
@@ -41,16 +41,27 @@ class Session:
41
41
  ]
42
42
  + req.input_ids
43
43
  )
44
+ input_ids_unpadded = (
45
+ self.reqs[-1].origin_input_ids_unpadded
46
+ + self.reqs[-1].output_ids[
47
+ : self.reqs[-1].sampling_params.max_new_tokens
48
+ ]
49
+ + req.input_ids
50
+ )
44
51
  else:
45
52
  input_ids = req.input_ids
53
+ input_ids_unpadded = req.input_ids
46
54
  new_req = Req(
47
- req.rid,
48
- None,
49
- input_ids,
50
- req.sampling_params,
55
+ rid=req.rid,
56
+ origin_input_text=None,
57
+ origin_input_ids=input_ids,
58
+ origin_input_ids_unpadded=input_ids_unpadded,
59
+ sampling_params=req.sampling_params,
51
60
  lora_path=req.lora_path,
52
61
  session_id=self.session_id,
53
62
  )
63
+ if len(self.reqs) > 0:
64
+ new_req.image_inputs = self.reqs[-1].image_inputs
54
65
  new_req.tokenizer = tokenizer
55
66
  if req.session_rid is not None and len(self.reqs) == 0:
56
67
  new_req.finished_reason = FINISH_ABORT(
@@ -49,7 +49,13 @@ class LlavaBaseForCausalLM(nn.Module):
49
49
  image_sizes, pad_values = image_inputs.image_sizes, image_inputs.pad_values
50
50
 
51
51
  # hardcode for spatial_unpad + anyres
52
- image_aspect_ratio = "anyres" if len(image_sizes) == 1 else "pad"
52
+ if image_inputs.modalities is not None and (
53
+ "multi-images" in image_inputs.modalities
54
+ or "video" in image_inputs.modalities
55
+ ):
56
+ image_aspect_ratio = "pad"
57
+ else:
58
+ image_aspect_ratio = "anyres"
53
59
  offset_list = []
54
60
  for image_s in image_sizes:
55
61
  if len(image_sizes) > 16:
sglang/srt/server.py CHANGED
@@ -86,6 +86,7 @@ from sglang.srt.utils import (
86
86
  set_ulimit,
87
87
  )
88
88
  from sglang.utils import get_exception_traceback
89
+ from sglang.version import __version__
89
90
 
90
91
  logger = logging.getLogger(__name__)
91
92
 
@@ -455,7 +456,6 @@ def launch_engine(
455
456
  data = scheduler_pipe_readers[i].recv()
456
457
 
457
458
  if data["status"] != "ready":
458
- self.shutdown()
459
459
  raise RuntimeError(
460
460
  "Initialization failed. Please see the error messages above."
461
461
  )
@@ -528,6 +528,7 @@ async def _get_server_info():
528
528
  **dataclasses.asdict(tokenizer_manager.server_args), # server args
529
529
  "memory_pool_size": await tokenizer_manager.get_memory_pool_size(), # memory pool size
530
530
  "max_total_num_tokens": _max_total_num_tokens, # max total num tokens
531
+ "version": __version__,
531
532
  }
532
533
 
533
534
 
sglang/srt/utils.py CHANGED
@@ -72,7 +72,7 @@ def is_flashinfer_available():
72
72
  Check whether flashinfer is available.
73
73
  As of Oct. 6, 2024, it is only available on NVIDIA GPUs.
74
74
  """
75
- if os.environ.get("SGLANG_IS_FLASHINFER_AVAILABLE", "true") == "false":
75
+ if not get_bool_env_var("SGLANG_IS_FLASHINFER_AVAILABLE", default="true"):
76
76
  return False
77
77
  return torch.cuda.is_available() and not is_hip()
78
78
 
@@ -517,6 +517,11 @@ def monkey_patch_vllm_p2p_access_check(gpu_id: int):
517
517
 
518
518
  setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)
519
519
 
520
+ # Suppress the warnings from this delete function when using sglang.bench_one_batch
521
+ from vllm.distributed.device_communicators.custom_all_reduce import CustomAllreduce
522
+
523
+ setattr(CustomAllreduce, "__del__", lambda *args, **kwargs: None)
524
+
520
525
 
521
526
  vllm_all_gather_backup = None
522
527
 
@@ -626,7 +631,7 @@ def add_api_key_middleware(app, api_key: str):
626
631
 
627
632
 
628
633
  def prepare_model_and_tokenizer(model_path: str, tokenizer_path: str):
629
- if "SGLANG_USE_MODELSCOPE" in os.environ:
634
+ if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
630
635
  if not os.path.exists(model_path):
631
636
  from modelscope import snapshot_download
632
637
 
@@ -931,7 +936,7 @@ def get_nvgpu_memory_capacity():
931
936
 
932
937
  def crash_on_warnings():
933
938
  # Crash on warning if we are running CI tests
934
- return os.getenv("SGLANG_IS_IN_CI", "false").lower() == "true"
939
+ return get_bool_env_var("SGLANG_IS_IN_CI")
935
940
 
936
941
 
937
942
  def get_device_name(device_id: int = 0) -> str:
@@ -990,7 +995,7 @@ def direct_register_custom_op(
990
995
  my_lib._register_fake(op_name, fake_impl)
991
996
 
992
997
 
993
- def gpu_proc_affinity(
998
+ def set_gpu_proc_affinity(
994
999
  tp_size: int,
995
1000
  nnodes: int,
996
1001
  gpu_id: int,
@@ -1022,3 +1027,8 @@ def gpu_proc_affinity(
1022
1027
  # set cpu_affinity to current process
1023
1028
  p.cpu_affinity(bind_cpu_ids)
1024
1029
  logger.info(f"Process {pid} gpu_id {gpu_id} is running on CPUs: {p.cpu_affinity()}")
1030
+
1031
+
1032
+ def get_bool_env_var(name: str, default: str = "false") -> bool:
1033
+ value = os.getenv(name, default)
1034
+ return value.lower() in ("true", "1")
sglang/test/test_utils.py CHANGED
@@ -22,7 +22,7 @@ from sglang.bench_serving import run_benchmark
22
22
  from sglang.global_config import global_config
23
23
  from sglang.lang.backend.openai import OpenAI
24
24
  from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
25
- from sglang.srt.utils import kill_child_process
25
+ from sglang.srt.utils import get_bool_env_var, kill_child_process
26
26
  from sglang.test.run_eval import run_eval
27
27
  from sglang.utils import get_exception_traceback
28
28
 
@@ -44,7 +44,7 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8
44
44
 
45
45
  def is_in_ci():
46
46
  """Return whether it is in CI runner."""
47
- return os.getenv("SGLANG_IS_IN_CI", "false").lower() == "true"
47
+ return get_bool_env_var("SGLANG_IS_IN_CI")
48
48
 
49
49
 
50
50
  if is_in_ci():
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.6.post1"
1
+ __version__ = "0.3.6.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.6.post1
3
+ Version: 0.3.6.post2
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -240,6 +240,7 @@ Provides-Extra: srt
240
240
  Requires-Dist: sglang[runtime_common]; extra == "srt"
241
241
  Requires-Dist: torch; extra == "srt"
242
242
  Requires-Dist: vllm>=0.6.3.post1; extra == "srt"
243
+ Requires-Dist: cuda-python; extra == "srt"
243
244
  Provides-Extra: srt-hip
244
245
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
245
246
  Requires-Dist: torch; extra == "srt-hip"
@@ -350,7 +351,7 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
350
351
  [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
351
352
 
352
353
  ## Adoption and Sponsorship
353
- The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, and xAI.
354
+ The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, xAI and 01.AI.
354
355
 
355
356
  ## Acknowledgment and Citation
356
357
  We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
@@ -2,22 +2,22 @@ sglang/__init__.py,sha256=3M0oz0ZA8fULhV5LwQ4hxh-MRdHsOJRD1D63C60pdG4,1616
2
2
  sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
3
3
  sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
4
  sglang/bench_offline_throughput.py,sha256=z6uA6Gxa_nFZa0cOXi7MJDuX82xcqk5WfqBMavd8a-s,10929
5
- sglang/bench_one_batch.py,sha256=WxrQUkMcxz5GV8OEHj0ckHgpC76HgO6YxmDvJFRDeyU,15670
6
- sglang/bench_one_batch_server.py,sha256=nzeF_bcaXanQuYLBxAvd3OO4fwbKproMcahXdHIVR6w,5920
5
+ sglang/bench_one_batch.py,sha256=AVMpCBWEsMI2TlMK55JPgPJu0kHg8DI0WV_Bhd4pJgc,15668
6
+ sglang/bench_one_batch_server.py,sha256=hYc3r9JQOLrfqmKgKPOmP0Kr63Sya9wPV_dHzMRZ2Dw,5924
7
7
  sglang/bench_serving.py,sha256=hI7FjaERyqKBrYtKewDU6E4rSufKxqsUPyUgtWtTKSI,52545
8
- sglang/check_env.py,sha256=nR2m0a9WbQmkimJihUx-Lqi7XjN0jyWTCO2vYyA7R2M,5356
8
+ sglang/check_env.py,sha256=rE4ZAG0e6M-Xd-qdHcKclN8Qav6b9gEh4yvlV_TbOg0,5450
9
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
10
- sglang/launch_server.py,sha256=_XIqBcXArYtHTqilOFkYWKZBYXGCMHAxbYOST08LGj0,415
10
+ sglang/launch_server.py,sha256=U17c44CbbpMBm2JQxVLaz1mfUKk7PgBDhTLAFNeJEvI,362
11
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
12
  sglang/utils.py,sha256=eCvD3fZCALr-MuyZxJL7HAeeqqpxAxf4LJrf7OiCbco,11547
13
- sglang/version.py,sha256=YrfhKDmn6rTAj_qREKEXk2FahHCqSbHd4BNoD7wlIi0,28
13
+ sglang/version.py,sha256=_Aams_yVBpGe9-85k-kF3qpgcd3D_AsWkVfMFmCWh3c,28
14
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sglang/lang/chat_template.py,sha256=jprS3-In2FTUoedKwZg-HYvDwU8RTIYntOlf2zoN2sU,14814
16
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
17
17
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
18
18
  sglang/lang/interpreter.py,sha256=SBjejhLhTKzNM0HbjtTg5r17WPJ64WFSk6lcM_SCWKs,30717
19
19
  sglang/lang/ir.py,sha256=zpzzAO1YVldhE95Vwz5hU_TQltu-xt8A6rfFr0PuIDA,18410
20
- sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
20
+ sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
21
21
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
23
23
  sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
@@ -29,12 +29,12 @@ sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21
29
29
  sglang/srt/hf_transformers_utils.py,sha256=sUUCpjbTHuYDMuwOaz00nH5fataXKjliD8gCxXU64sw,6712
30
30
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
31
31
  sglang/srt/model_parallel.py,sha256=QR-Alqo0sElDXPJ79N1PhUHHKiEHPQn3dyXduMP-SHQ,3664
32
- sglang/srt/server.py,sha256=7PSxAUhiS796yQFeiQxiilRhLQ3FpV0wL53CfDgkCIk,30851
32
+ sglang/srt/server.py,sha256=tH_22tnksy3bbhYu_njjx5L59pb9lJ7tU40Z2BLoiaI,30894
33
33
  sglang/srt/server_args.py,sha256=CfmpU6_EDnxJzpJiRx2n6AhOPCtrHPOf-7wEtTF__L0,30834
34
- sglang/srt/utils.py,sha256=APZEUancLC0jRI1JMbv7e5bIZy3OEySGyZspxGA60yQ,33509
34
+ sglang/srt/utils.py,sha256=QXc01TOB7abpL6p3KzfP7u2xFZohQ-ThbI5DAJGoHeI,33894
35
35
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
36
36
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
37
- sglang/srt/configs/model_config.py,sha256=dQ58mYKN3M5IwldFZkwIb4CCBa6dREb5Om4Kg2kffOE,9565
37
+ sglang/srt/configs/model_config.py,sha256=r5N_OO4w3_R3kZ80P-ZPECscXmspI41d1vc6uEE9ixM,9526
38
38
  sglang/srt/configs/qwen2vl.py,sha256=AYHuFgJ0bwhWYkD7S6fvP7yJejJnuhy4xp5Q2W-O6ps,4424
39
39
  sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
40
40
  sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
@@ -50,12 +50,12 @@ sglang/srt/layers/logits_processor.py,sha256=V8fHxeQK8lzUhGD2Xc7MY1Y9qBhzFyh6hqp
50
50
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
51
51
  sglang/srt/layers/radix_attention.py,sha256=C_mK4mfmKlxMRNeKYP9E5R3PRd3eT-OcE_g3mo36dJM,2058
52
52
  sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
53
- sglang/srt/layers/sampler.py,sha256=zgNwgUx7fozkWsEJFRKDV9SipHBijfpU9pTroNst6Ho,4552
53
+ sglang/srt/layers/sampler.py,sha256=_enfER8MSxsCYrR6_NgyFxKA_XqKtii_asOZUFUUsd8,4580
54
54
  sglang/srt/layers/torchao_utils.py,sha256=v0hyr4hLsM42QwOPCdKb-ftRTjVokBZbqvRj4O4C-Nw,3415
55
55
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=RmaZbgXbFnGKX1eGYxlmiko-6JwaJX6seHupUSCtAm8,21583
56
56
  sglang/srt/layers/attention/__init__.py,sha256=EL1o6Q5vLgViN3pOr2A7F6K9FlNEpMdBypFAVMeq_HA,2445
57
57
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=BlX7uXteQpnoOnKsdBKh8h20zMVMEiibB5F_PkZSlNI,10706
58
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=oblYMbmYzK94H3EA9lMhKWaKdi8HLH5NqAiZmjzj4Es,24875
58
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=ENnNbsA8bY--eFe-Ecqa2RRklH2-a7SV_yZRzpDKnDQ,24879
59
59
  sglang/srt/layers/attention/triton_backend.py,sha256=gjxed2cvc2-8QEHkzyTVv6ui7oYOp2b_vgIUQVD1XuM,6538
60
60
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=BE63WhKiutSNkhJLsRwvfsRy-ExvuAv7FZyoWv73ul8,18744
61
61
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
@@ -73,13 +73,13 @@ sglang/srt/lora/lora.py,sha256=KhhO9aKCyFWvJnhI07lZKANIvNjtt882HrTYFNBZMv0,15065
73
73
  sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
74
74
  sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
75
75
  sglang/srt/managers/data_parallel_controller.py,sha256=JxRtJJTVn1FU2iD292rLZPftAsR4_8j4d3yF8j0dvBc,8327
76
- sglang/srt/managers/detokenizer_manager.py,sha256=nWBn54pz3aQ8tzVvViwwL2k0V4WATi0qw11H0Bzua-Q,7389
77
- sglang/srt/managers/image_processor.py,sha256=Pk_dtXzljTkFt7Acsv1RyDzEqvCvjc7BMngxGhtkpDU,13817
76
+ sglang/srt/managers/detokenizer_manager.py,sha256=oWquBe0yvSwILwllMBJFJUEgBt1NEM_3KluAc0T6Pnw,7333
77
+ sglang/srt/managers/image_processor.py,sha256=foLv3QVW_A8IRjRcHOKn0_HC771JbPEz8ML1mGqYKYw,13685
78
78
  sglang/srt/managers/io_struct.py,sha256=WLXz-tyn0jR7zNO9feRBXgyjphVa8qR55OoEOUdzoVI,13751
79
- sglang/srt/managers/schedule_batch.py,sha256=-5oYdkStPiYjPWl0tCkUVRjTGB7fjA0wIngK-09da7w,43111
79
+ sglang/srt/managers/schedule_batch.py,sha256=jBABHbL7gyrKdrFrzScJ76MtvG2D9Y5HDx74qsclo80,44470
80
80
  sglang/srt/managers/schedule_policy.py,sha256=ayFz4iPLIlG8mx5i1glTCAMHJPGpFedMP9UgRtqkNhA,12526
81
- sglang/srt/managers/scheduler.py,sha256=8owHPXG6fxZtsCWSJ6K7EOlFDcPxYinZC1DwKMJcEVM,55930
82
- sglang/srt/managers/session_controller.py,sha256=jXoPHxMGh8T1iYWIEjSXoPVwaL6NEjv3QtqlsrvPE1c,2355
81
+ sglang/srt/managers/scheduler.py,sha256=JVxV3Y5AU0OOOfePVM5dVPuuN_Kd9nwV3p3vH3CHQps,56059
82
+ sglang/srt/managers/session_controller.py,sha256=hajOnkNZ_JpP4E-GKMVGzyJSK4sc9uF9t229uFuxkVs,2874
83
83
  sglang/srt/managers/tokenizer_manager.py,sha256=zYbKEKNuM1B3PXzA7jnDpxew-0rZXSX-7dHmVLWG3e4,26477
84
84
  sglang/srt/managers/tp_worker.py,sha256=1SQJ60iKS9e5vGY555fT1iZ4OtLumXzeWfB08fSWKbk,6176
85
85
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7vhPebaOS4JamaS08CGf_hwxnUO7Gy_SXZXEPwNHKoY,7621
@@ -112,7 +112,7 @@ sglang/srt/models/llama.py,sha256=FSGuM3BamhuT5h2jedh5cSFwFYduOJwkAZJJ672awRw,16
112
112
  sglang/srt/models/llama_classification.py,sha256=c8WZ1ADa3f6s2IJVoP10ouVgeCwv_ndns_qMgLrC6QI,3413
113
113
  sglang/srt/models/llama_embedding.py,sha256=2ex2jrz31osaAd9V8sJeN0qyxmk-L5NgOBkXL1puGhI,3166
114
114
  sglang/srt/models/llama_reward.py,sha256=prhHDPpf1k6tlQtGE6zq5gx0uSZAD3W5v7W28bdgy4U,4619
115
- sglang/srt/models/llava.py,sha256=72DnZXIwu78zYqU8YIElq_AaSIFO_icYOPTHXE0_-YQ,24941
115
+ sglang/srt/models/llava.py,sha256=HjC2TDLngpaN8HMYyGp5doEK32HeQN8iT2tYE_Slrtg,25130
116
116
  sglang/srt/models/llavavid.py,sha256=DeWqGSmXgIYGuLyy2ZrxjM9WqbRjueP4chNmXt7Bnus,12221
117
117
  sglang/srt/models/minicpm.py,sha256=KbiTf-kaDAJxSo9Z4IGMTrs9WrYYji1KXO1kA2iy-as,13816
118
118
  sglang/srt/models/minicpm3.py,sha256=C43mTr2Qjccj4sXuTDgzbfZhvCNbsEHNggMRXQ7SrWs,25108
@@ -155,10 +155,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
155
155
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
156
156
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
157
157
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
158
- sglang/test/test_utils.py,sha256=ULF7C3pLXkMevXgE_Dodt29OBfvvXKUnRvwKhaBg1ys,23470
158
+ sglang/test/test_utils.py,sha256=NBEGQC_wtMqODQQZWrxdwmsoLFSZfDlQzIbsQ1kE_Yc,23468
159
159
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
160
- sglang-0.3.6.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
161
- sglang-0.3.6.post1.dist-info/METADATA,sha256=XwhCEL8SbEVcT7LQLk26g6tzduS6mByBE7dDqZYpQxo,22073
162
- sglang-0.3.6.post1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
163
- sglang-0.3.6.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
164
- sglang-0.3.6.post1.dist-info/RECORD,,
160
+ sglang-0.3.6.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
161
+ sglang-0.3.6.post2.dist-info/METADATA,sha256=3ekB4UX6bNwXzqlRChfxG0R8sme-x0FQAImcw0gpfM8,22122
162
+ sglang-0.3.6.post2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
163
+ sglang-0.3.6.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
164
+ sglang-0.3.6.post2.dist-info/RECORD,,