sglang 0.3.1__py3-none-any.whl → 0.3.1.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sglang/srt/server.py CHANGED
@@ -78,6 +78,7 @@ from sglang.srt.utils import (
78
78
  assert_pkg_version,
79
79
  configure_logger,
80
80
  enable_show_time_cost,
81
+ is_hip,
81
82
  kill_child_process,
82
83
  maybe_set_triton_cache_manager,
83
84
  prepare_model,
@@ -152,7 +153,7 @@ async def flush_cache():
152
153
  async def update_weights(obj: UpdateWeightReqInput, request: Request):
153
154
 
154
155
  success, message = await tokenizer_manager.update_weights(obj, request)
155
- content = {"message": message, "success": str(success)}
156
+ content = {"success": success, "message": message}
156
157
  if success:
157
158
  return JSONResponse(
158
159
  content,
@@ -434,6 +435,10 @@ def _set_envs_and_config(server_args: ServerArgs):
434
435
  "at https://docs.flashinfer.ai/installation.html.",
435
436
  )
436
437
 
438
+ if is_hip():
439
+ # to figure out a better method of not using fork later
440
+ mp.set_start_method("spawn", force=True)
441
+
437
442
 
438
443
  def _wait_and_warmup(server_args, pipe_finish_writer, pid):
439
444
  headers = {}
sglang/srt/server_args.py CHANGED
@@ -21,9 +21,22 @@ import logging
21
21
  import random
22
22
  from typing import List, Optional, Union
23
23
 
24
+ from sglang.srt.utils import is_hip
25
+
24
26
  logger = logging.getLogger(__name__)
25
27
 
26
28
 
29
+ class LoRAPathAction(argparse.Action):
30
+ def __call__(self, parser, namespace, values, option_string=None):
31
+ setattr(namespace, self.dest, {})
32
+ for lora_path in values:
33
+ if "=" in lora_path:
34
+ name, path = lora_path.split("=", 1)
35
+ getattr(namespace, self.dest)[name] = path
36
+ else:
37
+ getattr(namespace, self.dest)[lora_path] = lora_path
38
+
39
+
27
40
  @dataclasses.dataclass
28
41
  class ServerArgs:
29
42
  # Model and tokenizer
@@ -59,6 +72,7 @@ class ServerArgs:
59
72
  tp_size: int = 1
60
73
  stream_interval: int = 1
61
74
  random_seed: Optional[int] = None
75
+ constrained_json_whitespace_pattern: Optional[str] = None
62
76
 
63
77
  # Logging
64
78
  log_level: str = "info"
@@ -96,6 +110,7 @@ class ServerArgs:
96
110
  disable_custom_all_reduce: bool = False
97
111
  enable_mixed_chunk: bool = False
98
112
  enable_torch_compile: bool = False
113
+ max_torch_compile_bs: int = 32
99
114
  torchao_config: str = ""
100
115
  enable_p2p_check: bool = False
101
116
  enable_mla: bool = False
@@ -152,6 +167,11 @@ class ServerArgs:
152
167
  )
153
168
  self.sampling_backend = "pytorch"
154
169
 
170
+ # ROCm: flashinfer available later
171
+ if is_hip():
172
+ self.attention_backend = "triton"
173
+ self.sampling_backend = "pytorch"
174
+
155
175
  # Default kernel backends
156
176
  if self.enable_mla:
157
177
  logger.info("MLA optimization is tunred on. Use triton backend.")
@@ -359,6 +379,12 @@ class ServerArgs:
359
379
  default=ServerArgs.random_seed,
360
380
  help="The random seed.",
361
381
  )
382
+ parser.add_argument(
383
+ "--constrained-json-whitespace-pattern",
384
+ type=str,
385
+ default=ServerArgs.constrained_json_whitespace_pattern,
386
+ help=r"Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*",
387
+ )
362
388
  parser.add_argument(
363
389
  "--log-level",
364
390
  type=str,
@@ -498,6 +524,12 @@ class ServerArgs:
498
524
  action="store_true",
499
525
  help="Optimize the model with torch.compile. Experimental feature.",
500
526
  )
527
+ parser.add_argument(
528
+ "--max-torch-compile-bs",
529
+ type=int,
530
+ default=ServerArgs.max_torch_compile_bs,
531
+ help="Set the maximum batch size when using torch compile.",
532
+ )
501
533
  parser.add_argument(
502
534
  "--torchao-config",
503
535
  type=str,
@@ -532,7 +564,8 @@ class ServerArgs:
532
564
  type=str,
533
565
  nargs="*",
534
566
  default=None,
535
- help="The list of LoRA adapters.",
567
+ action=LoRAPathAction,
568
+ help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}",
536
569
  )
537
570
  parser.add_argument(
538
571
  "--max-loras-per-batch",
sglang/srt/utils.py CHANGED
@@ -51,6 +51,11 @@ show_time_cost = False
51
51
  time_infos = {}
52
52
 
53
53
 
54
+ # torch flag AMD GPU
55
+ def is_hip() -> bool:
56
+ return torch.version.hip is not None
57
+
58
+
54
59
  def enable_show_time_cost():
55
60
  global show_time_cost
56
61
  show_time_cost = True
@@ -187,7 +192,7 @@ def allocate_init_ports(
187
192
  cur_port += 1
188
193
 
189
194
  if port is not None and ret_ports[0] != port:
190
- logger.warn(
195
+ logger.warning(
191
196
  f"WARNING: Port {port} is not available. Use port {ret_ports[0]} instead."
192
197
  )
193
198
 
@@ -623,56 +628,7 @@ def set_ulimit(target_soft_limit=65535):
623
628
  try:
624
629
  resource.setrlimit(resource_type, (target_soft_limit, current_hard))
625
630
  except ValueError as e:
626
- logger.warn(f"Fail to set RLIMIT_NOFILE: {e}")
627
-
628
-
629
- def is_llama3_405b_fp8_head_16(model_config):
630
- """Return whether the model is meta-llama/Meta-Llama-3.1-405B-FP8 with 16 kv heads."""
631
- if (
632
- model_config.hf_config.architectures[0] == "LlamaForCausalLM"
633
- and model_config.hf_config.hidden_size == 16384
634
- and model_config.hf_config.intermediate_size == 53248
635
- and model_config.hf_config.num_hidden_layers == 126
636
- and model_config.hf_config.num_key_value_heads == 16
637
- and hasattr(model_config.hf_config, "quantization_config")
638
- and model_config.hf_config.quantization_config["quant_method"] == "fbgemm_fp8"
639
- ):
640
- return True
641
- return False
642
-
643
-
644
- def monkey_patch_vllm_qvk_linear_loader():
645
- """A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints."""
646
- from vllm.model_executor.layers.linear import QKVParallelLinear
647
-
648
- origin_weight_loader = QKVParallelLinear.weight_loader
649
-
650
- def get_original_weight(loaded_weight, head_dim):
651
- n_kv_head = loaded_weight.shape[0] // (2 * head_dim)
652
- dim = loaded_weight.shape[1]
653
- for i in range(n_kv_head):
654
- loaded_weight[i * head_dim : (i + 1) * head_dim, :] = loaded_weight[
655
- 2 * i * head_dim : (2 * i + 1) * head_dim, :
656
- ]
657
- original_kv_weight = loaded_weight[: n_kv_head * head_dim, :]
658
- assert original_kv_weight.shape == (n_kv_head * head_dim, dim)
659
- return original_kv_weight
660
-
661
- def weight_loader_srt(
662
- self,
663
- param: Parameter,
664
- loaded_weight: torch.Tensor,
665
- loaded_shard_id: Optional[str] = None,
666
- ):
667
- if (
668
- loaded_shard_id in ["k", "v"]
669
- and loaded_weight.shape[0] == self.head_size * self.total_num_kv_heads * 2
670
- ):
671
- loaded_weight = get_original_weight(loaded_weight, self.head_size)
672
-
673
- origin_weight_loader(self, param, loaded_weight, loaded_shard_id)
674
-
675
- setattr(QKVParallelLinear, "weight_loader", weight_loader_srt)
631
+ logger.warning(f"Fail to set RLIMIT_NOFILE: {e}")
676
632
 
677
633
 
678
634
  def add_api_key_middleware(app, api_key: str):
sglang/test/test_utils.py CHANGED
@@ -304,7 +304,6 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
304
304
  def select_sglang_backend(args: argparse.Namespace):
305
305
  if args.backend.startswith("srt"):
306
306
  if args.backend == "srt-no-parallel":
307
- global_config.enable_parallel_decoding = False
308
307
  global_config.enable_parallel_encoding = False
309
308
  backend = RuntimeEndpoint(f"{args.host}:{args.port}")
310
309
  elif args.backend.startswith("gpt-"):
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.1"
1
+ __version__ = "0.3.1.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.1
3
+ Version: 0.3.1.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
318
318
  ### Method 2: From source
319
319
  ```
320
320
  # Use the last release branch
321
- git clone -b v0.3.1 https://github.com/sgl-project/sglang.git
321
+ git clone -b v0.3.1.post1 https://github.com/sgl-project/sglang.git
322
322
  cd sglang
323
323
 
324
324
  pip install --upgrade pip
@@ -1,18 +1,18 @@
1
1
  sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
2
2
  sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
3
- sglang/bench_latency.py,sha256=EvmXpaREU-g25OTcOUTgAUPmA-txfnyjaqY-4hlq97w,16925
3
+ sglang/bench_latency.py,sha256=CDMrch4QwIyb2DTH2kBIgQ6Q8sGHwtrx3Cz49qZNfpU,17078
4
4
  sglang/bench_serving.py,sha256=6OM5JIDuoxJDg-VLE4ijGGcS8-6ViaidV05lIrZmSzo,36239
5
5
  sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
6
- sglang/global_config.py,sha256=KWpXd4OCCWW2TRQo-dShvLs4jb15ej9Ejhxr_wggzBg,1535
6
+ sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
7
7
  sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
8
8
  sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
9
9
  sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
10
- sglang/version.py,sha256=r4xAFihOf72W9TD-lpMi6ntWSTKTP2SlzKP1ytkjRbI,22
10
+ sglang/version.py,sha256=83xK6WSmRR5ba-i5fDLUmoJT83Eg_dpsWgwcnsUhMpA,28
11
11
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
13
13
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
14
14
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
15
- sglang/lang/interpreter.py,sha256=M42SuOnijFaHWOe3Qyi-bNanRt-mYhSDa1wWn1J42Hw,30324
15
+ sglang/lang/interpreter.py,sha256=rOquFbMzxry7IItZlAn5TwtQfxMy718JPxOkiXO-yrg,30234
16
16
  sglang/lang/ir.py,sha256=W3UfZikcGeT86PDDjDjw-yNzrKY2e2UYO4DTatMCfm0,17704
17
17
  sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
18
18
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,56 +25,56 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
25
25
  sglang/srt/conversation.py,sha256=S5w5V6G1xigNxa3UQoSxRcMpQLWWDT9EPBoHBvHkSAk,19663
26
26
  sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVvUdIMDo,6016
27
27
  sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
28
- sglang/srt/server.py,sha256=FNmTpX7E9fVWj_NFzp4AtE5ODaA_rg5Xm8uZ0FB0X4o,20041
29
- sglang/srt/server_args.py,sha256=5OHH3gaO1s5Y2UQw2_FnFxwxrsqnUQ_WNqP1R1IWUAA,21877
30
- sglang/srt/utils.py,sha256=pckOt7gyQfJaV3-h8FPurWyrPij5_EBUX_Xp7x6y6YM,24229
28
+ sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
29
+ sglang/srt/server_args.py,sha256=M1Bm9u2JRsEptne-kw-D-B_29Q-M6V4UpAM7K-JxXAc,23309
30
+ sglang/srt/utils.py,sha256=8yxiMRttCcfswynkNPWD3yZFNAGFz2P1PzSuxHCBGns,22340
31
31
  sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
32
32
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
33
33
  sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
34
34
  sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5UOS_4,2070
35
35
  sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
36
- sglang/srt/constrained/fsm_cache.py,sha256=jItSvCu_XrAgltfejwgvdltaiT98-8lJGBe_84cSnTk,2786
36
+ sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
37
37
  sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
38
- sglang/srt/layers/activation.py,sha256=JEXNTgqxoiU4N-gVm4XMjobhft4JKDcMrgTkfpsRUzM,4856
39
- sglang/srt/layers/attention_backend.py,sha256=39P3iMs7B1iEzCA3EHdqUp3BLafeIVFnFWGzpEhlTRk,18182
38
+ sglang/srt/layers/activation.py,sha256=awcwOODYcVdUtC2JxJ1TGsV8Tru0eACKcxYN6cWHbl4,5148
39
+ sglang/srt/layers/attention_backend.py,sha256=lqMsY4VaOO_szIWoTAinXf1DnP2UsbF32kzvwFySz9w,18119
40
40
  sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
41
- sglang/srt/layers/layernorm.py,sha256=RXuS4UyksatqTF6lSK7VYyEiUEnBiNIBlEn8q4w84UA,3404
41
+ sglang/srt/layers/layernorm.py,sha256=-9Yph4nnMZYX_Q31MUGAimLajNclHXjgDkswpU2BTos,3694
42
42
  sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
43
43
  sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
44
44
  sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
45
- sglang/srt/layers/sampler.py,sha256=1BKsZbSLBGFVtTJo1LsThuoRjOSOnsL1AiwFxJNIXRs,5800
45
+ sglang/srt/layers/sampler.py,sha256=Q4u46oYu66e34rBNzr50VoXO8FM-assYiCoROolq3Zs,3661
46
46
  sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJNLZlE,2703
47
47
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
48
48
  sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
49
- sglang/srt/layers/fused_moe/layer.py,sha256=GT3r2UPx_PAufJd0SUMOXyh76ymAeYDubd0SM0H71bo,20977
49
+ sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
50
50
  sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
51
51
  sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
52
52
  sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
53
53
  sglang/srt/lora/lora.py,sha256=ksj866lgDul6zxO30Jm7Nrjv-mFAMrzdvP8sez3Pl6U,14938
54
54
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
55
- sglang/srt/lora/lora_manager.py,sha256=Q7rk1SMEZ75wda68rAZDGVyX_o8ZdIW2I5Fo_llaqHs,9475
55
+ sglang/srt/lora/lora_manager.py,sha256=7J7cGmyy1Ph4HCvLdM-ViAizAbV1snZqD-S7JLWXasI,9561
56
56
  sglang/srt/managers/controller_multi.py,sha256=KolZDso2WqH1ZhQw9p1eTmlFRgo4bcvzBxE44_sNE_o,6300
57
57
  sglang/srt/managers/controller_single.py,sha256=DiZALP_iIPZQMRx09a-LwT5_Dg7p-WU8HXyMoxJ9sRA,4955
58
58
  sglang/srt/managers/detokenizer_manager.py,sha256=yQkL5gLomLiy1qc6e9HNz8hcj7JQFHm1AfIrzpXaWJE,6852
59
59
  sglang/srt/managers/io_struct.py,sha256=bqmL3NDPLqOn6Au3WLF0NOe8Dh7ECMN7BTHCkEZ_Edk,11247
60
60
  sglang/srt/managers/policy_scheduler.py,sha256=tiBUi2GJU5eQEBK6HfsO1_YjWtFkougo40954DIp4dM,13026
61
- sglang/srt/managers/schedule_batch.py,sha256=QfixWzh7ks60eYE52mZHfUseXqcb89h4ZO1Aur3weLU,27340
61
+ sglang/srt/managers/schedule_batch.py,sha256=ppHYK65GP0dtuCEzpSbGm9uAne5rEoRmW8osLknXJpI,27384
62
62
  sglang/srt/managers/tokenizer_manager.py,sha256=ql-sObjl1oRigJwnLtqqTaaw-i7gPTDMoNXDEMftr40,29643
63
- sglang/srt/managers/tp_worker.py,sha256=Zbl_tFUAsD6Qv1fUEJCn_jyUc3JjDm33yI3Nmu1HY8w,39174
63
+ sglang/srt/managers/tp_worker.py,sha256=4Hhla9rfGYEdQtzGmxlIEqxt_WVkn2dkLLNQZHgpkf0,39270
64
64
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
65
65
  sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
66
66
  sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
67
67
  sglang/srt/mem_cache/memory_pool.py,sha256=4br3Ea2bfA-YsF_sPOVHlF2zQzYGd8fVaYTp197yZsE,7871
68
68
  sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
69
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=LngmwtBcvobJ_9G8lD966SihjmMJlgMgHe_ZogK1kDg,10090
69
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZeO-8Mg4Tf0iP-L9FXcyhHfNzGWpTPEDGeUoC2lzHTE,10418
70
70
  sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
71
- sglang/srt/model_executor/model_runner.py,sha256=7jBSCdZxyDLWMOdwv1vRa7Oue-xbp8lA6I11ZPKFdAc,23457
71
+ sglang/srt/model_executor/model_runner.py,sha256=LoQ7OFVwOiK_BfdpRfitss1TfJ8qrysHgWM-xXu7n2Y,22433
72
72
  sglang/srt/models/baichuan.py,sha256=NrG1rMJXhemkrUCEf8xKOSDQVsOD-nN8RQz6MWHOg84,15124
73
73
  sglang/srt/models/chatglm.py,sha256=KwxLHBEvK02McXDvBS0gnRxfIvOAu2QP7lgibrj9Nbc,13371
74
74
  sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
75
75
  sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
76
76
  sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
77
- sglang/srt/models/deepseek_v2.py,sha256=3D9WtPvVOu8U40x_KOksnmWBLmLIcgtV958go8NSj5Q,28307
77
+ sglang/srt/models/deepseek_v2.py,sha256=bPaGRL8ieBCXKIf-KY7-D9Rus7Qj3VGvvtERzAXAZWs,28421
78
78
  sglang/srt/models/exaone.py,sha256=3I5ZoiLotf7U-8c9QJRubpgf6JDx9I_z-ViXQlCC-x8,13087
79
79
  sglang/srt/models/gemma.py,sha256=GkwgGFHgGlXgBZN7s7Wooz5tMyCp1YtgLahU2NOo66M,12273
80
80
  sglang/srt/models/gemma2.py,sha256=sFfCNEm0_OOWElRSTDuroRv8wNMX8v_81Uko9m546KA,14923
@@ -87,10 +87,11 @@ sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2z
87
87
  sglang/srt/models/llava.py,sha256=O4XGdl70Hh4tM_OHapFGHbReC82mbe9xLw6GELKWKhU,24881
88
88
  sglang/srt/models/llavavid.py,sha256=ou5uIuskBoBo0lXvqFFfDLBYYVfehx27n-Lu8X9gpLs,11992
89
89
  sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
90
- sglang/srt/models/minicpm3.py,sha256=S7bNeCAsfvL44Vn350KLaqX674SCb4CpUuDnhjLjr3U,25113
90
+ sglang/srt/models/minicpm3.py,sha256=_C96kO3qGK0KRctXZf8LBR9s0sEW0QXWSGU0Vf6OrI8,25206
91
91
  sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
92
92
  sglang/srt/models/mixtral.py,sha256=oRC7mKBrPJhvzkWSabrbeQQQac-jtF4EV6H2Sgjc5JY,13897
93
93
  sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
94
+ sglang/srt/models/olmoe.py,sha256=d0ECpU-IXXwGYg9tkVeMARUbqVcqEnWfpH3rrNiGKA0,15336
94
95
  sglang/srt/models/qwen.py,sha256=nqSRzkiZzpRVG6WGQ1MBUclQnXyw8jlvoOq-euM8j5s,9954
95
96
  sglang/srt/models/qwen2.py,sha256=9_M-VkHN1_T1XN-gsl_L636QMQ9BLF2WqvTcx_1L6aw,12432
96
97
  sglang/srt/models/qwen2_moe.py,sha256=s7b5XnSvsBYtZZUkjPp442m59CqPJ3HxGUIwXBVWsXw,17153
@@ -100,7 +101,7 @@ sglang/srt/models/xverse_moe.py,sha256=YR--WZ33G7XEMsS7ZJl1cQ62Q8PDo9gWqpvJBY_cb
100
101
  sglang/srt/models/yivl.py,sha256=B6MELthWIm5KdSzX3o2tbbpApY8XdjUdmcQSD4dQe_I,4835
101
102
  sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
102
103
  sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
103
- sglang/srt/sampling/sampling_batch_info.py,sha256=vkwy59Jt51FESYukmwDKwPbCM45WMb16dx_408B3oqc,7900
104
+ sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
104
105
  sglang/srt/sampling/sampling_params.py,sha256=ggOXxafqfCD-xrGYcM57byLZ79CIeBP4AD5F44L_CW0,5635
105
106
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
106
107
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
@@ -120,10 +121,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
120
121
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
121
122
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
122
123
  sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
123
- sglang/test/test_utils.py,sha256=iBs07MBFxOidipTG1-s2hrCvcURFJVXo7gg10pzAQX8,17168
124
+ sglang/test/test_utils.py,sha256=NLiJqFRWnCeQ-gdCBe0ubNFCsig1CPb1EU-Ay9CtSfU,17109
124
125
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
125
- sglang-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
126
- sglang-0.3.1.dist-info/METADATA,sha256=QKZQ7PjuK22x_QlQy1LqPX6y4zLgJJ9FPoNNSkw3cEk,38125
127
- sglang-0.3.1.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
128
- sglang-0.3.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
129
- sglang-0.3.1.dist-info/RECORD,,
126
+ sglang-0.3.1.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
127
+ sglang-0.3.1.post1.dist-info/METADATA,sha256=zswdq5UTi5aLVmpEyjnc7SzIi60yc4w2hlMhckdxmcU,38137
128
+ sglang-0.3.1.post1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
129
+ sglang-0.3.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
130
+ sglang-0.3.1.post1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5