sglang 0.3.1.post2__py3-none-any.whl → 0.3.1.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. sglang/bench_latency.py +8 -1
  2. sglang/srt/layers/activation.py +3 -2
  3. sglang/srt/layers/attention_backend.py +3 -1
  4. sglang/srt/layers/linear.py +1133 -0
  5. sglang/srt/layers/quantization/__init__.py +76 -0
  6. sglang/srt/layers/quantization/base_config.py +122 -0
  7. sglang/srt/models/baichuan.py +1 -1
  8. sglang/srt/models/chatglm.py +6 -6
  9. sglang/srt/models/commandr.py +7 -7
  10. sglang/srt/models/dbrx.py +7 -7
  11. sglang/srt/models/deepseek.py +7 -7
  12. sglang/srt/models/deepseek_v2.py +7 -7
  13. sglang/srt/models/exaone.py +6 -6
  14. sglang/srt/models/gemma.py +6 -6
  15. sglang/srt/models/gemma2.py +6 -6
  16. sglang/srt/models/gpt_bigcode.py +6 -6
  17. sglang/srt/models/grok.py +6 -6
  18. sglang/srt/models/internlm2.py +6 -6
  19. sglang/srt/models/llama.py +6 -6
  20. sglang/srt/models/llama_classification.py +1 -1
  21. sglang/srt/models/llava.py +1 -1
  22. sglang/srt/models/llavavid.py +1 -1
  23. sglang/srt/models/minicpm.py +6 -6
  24. sglang/srt/models/minicpm3.py +1 -1
  25. sglang/srt/models/mixtral.py +6 -6
  26. sglang/srt/models/mixtral_quant.py +6 -6
  27. sglang/srt/models/olmoe.py +1 -1
  28. sglang/srt/models/qwen.py +6 -6
  29. sglang/srt/models/qwen2.py +6 -6
  30. sglang/srt/models/qwen2_moe.py +7 -7
  31. sglang/srt/models/stablelm.py +6 -6
  32. sglang/srt/models/xverse.py +1 -1
  33. sglang/srt/models/xverse_moe.py +1 -1
  34. sglang/srt/models/yivl.py +1 -1
  35. sglang/srt/utils.py +21 -1
  36. sglang/test/test_utils.py +4 -2
  37. sglang/version.py +1 -1
  38. {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/METADATA +3 -2
  39. {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/RECORD +42 -39
  40. {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/LICENSE +0 -0
  41. {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/WHEEL +0 -0
  42. {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/top_level.txt +0 -0
@@ -29,12 +29,6 @@ from vllm.distributed import (
29
29
  get_tensor_model_parallel_world_size,
30
30
  tensor_model_parallel_all_reduce,
31
31
  )
32
- from vllm.model_executor.layers.linear import (
33
- QKVParallelLinear,
34
- ReplicatedLinear,
35
- RowParallelLinear,
36
- )
37
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
38
32
  from vllm.model_executor.layers.rotary_embedding import get_rope
39
33
  from vllm.model_executor.layers.vocab_parallel_embedding import (
40
34
  ParallelLMHead,
@@ -43,7 +37,13 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
43
37
  from vllm.model_executor.model_loader.weight_utils import default_weight_loader
44
38
 
45
39
  from sglang.srt.layers.layernorm import RMSNorm
40
+ from sglang.srt.layers.linear import (
41
+ QKVParallelLinear,
42
+ ReplicatedLinear,
43
+ RowParallelLinear,
44
+ )
46
45
  from sglang.srt.layers.logits_processor import LogitsProcessor
46
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
47
47
  from sglang.srt.layers.radix_attention import RadixAttention
48
48
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
49
49
 
@@ -35,7 +35,6 @@ from vllm.model_executor.layers.linear import (
35
35
  ReplicatedLinear,
36
36
  RowParallelLinear,
37
37
  )
38
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
39
38
  from vllm.model_executor.layers.rotary_embedding import get_rope
40
39
  from vllm.model_executor.layers.vocab_parallel_embedding import (
41
40
  ParallelLMHead,
@@ -47,6 +46,7 @@ from vllm.utils import print_warning_once
47
46
  from sglang.srt.layers.activation import SiluAndMul
48
47
  from sglang.srt.layers.layernorm import RMSNorm
49
48
  from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
49
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
50
50
  from sglang.srt.layers.radix_attention import RadixAttention
51
51
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
52
52
 
sglang/srt/models/qwen.py CHANGED
@@ -22,12 +22,6 @@ from torch import nn
22
22
  from transformers import PretrainedConfig
23
23
  from vllm.config import CacheConfig
24
24
  from vllm.distributed import get_tensor_model_parallel_world_size
25
- from vllm.model_executor.layers.linear import (
26
- MergedColumnParallelLinear,
27
- QKVParallelLinear,
28
- RowParallelLinear,
29
- )
30
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
31
25
  from vllm.model_executor.layers.rotary_embedding import get_rope
32
26
  from vllm.model_executor.layers.vocab_parallel_embedding import (
33
27
  ParallelLMHead,
@@ -37,7 +31,13 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
37
31
 
38
32
  from sglang.srt.layers.activation import SiluAndMul
39
33
  from sglang.srt.layers.layernorm import RMSNorm
34
+ from sglang.srt.layers.linear import (
35
+ MergedColumnParallelLinear,
36
+ QKVParallelLinear,
37
+ RowParallelLinear,
38
+ )
40
39
  from sglang.srt.layers.logits_processor import LogitsProcessor
40
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
41
41
  from sglang.srt.layers.radix_attention import RadixAttention
42
42
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
43
43
 
@@ -22,12 +22,6 @@ import torch
22
22
  from torch import nn
23
23
  from vllm.config import CacheConfig
24
24
  from vllm.distributed import get_tensor_model_parallel_world_size
25
- from vllm.model_executor.layers.linear import (
26
- MergedColumnParallelLinear,
27
- QKVParallelLinear,
28
- RowParallelLinear,
29
- )
30
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
31
25
  from vllm.model_executor.layers.rotary_embedding import get_rope
32
26
  from vllm.model_executor.layers.vocab_parallel_embedding import (
33
27
  ParallelLMHead,
@@ -37,8 +31,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
37
31
 
38
32
  from sglang.srt.layers.activation import SiluAndMul
39
33
  from sglang.srt.layers.layernorm import RMSNorm
34
+ from sglang.srt.layers.linear import (
35
+ MergedColumnParallelLinear,
36
+ QKVParallelLinear,
37
+ RowParallelLinear,
38
+ )
40
39
  from sglang.srt.layers.logits_processor import LogitsProcessor
41
40
  from sglang.srt.layers.pooler import Pooler, PoolingType
41
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
42
42
  from sglang.srt.layers.radix_attention import RadixAttention
43
43
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
44
44
 
@@ -29,13 +29,6 @@ from vllm.distributed import (
29
29
  tensor_model_parallel_all_reduce,
30
30
  )
31
31
  from vllm.model_executor.layers.fused_moe import FusedMoE
32
- from vllm.model_executor.layers.linear import (
33
- MergedColumnParallelLinear,
34
- QKVParallelLinear,
35
- ReplicatedLinear,
36
- RowParallelLinear,
37
- )
38
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
39
32
  from vllm.model_executor.layers.rotary_embedding import get_rope
40
33
  from vllm.model_executor.layers.vocab_parallel_embedding import (
41
34
  ParallelLMHead,
@@ -45,7 +38,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
45
38
 
46
39
  from sglang.srt.layers.activation import SiluAndMul
47
40
  from sglang.srt.layers.layernorm import RMSNorm
41
+ from sglang.srt.layers.linear import (
42
+ MergedColumnParallelLinear,
43
+ QKVParallelLinear,
44
+ ReplicatedLinear,
45
+ RowParallelLinear,
46
+ )
48
47
  from sglang.srt.layers.logits_processor import LogitsProcessor
48
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
49
49
  from sglang.srt.layers.radix_attention import RadixAttention
50
50
  from sglang.srt.layers.torchao_utils import apply_torchao_config_
51
51
  from sglang.srt.managers.schedule_batch import global_server_args_dict
@@ -24,12 +24,6 @@ from torch import nn
24
24
  from transformers import PretrainedConfig
25
25
  from vllm.config import CacheConfig
26
26
  from vllm.distributed import get_tensor_model_parallel_world_size
27
- from vllm.model_executor.layers.linear import (
28
- MergedColumnParallelLinear,
29
- QKVParallelLinear,
30
- RowParallelLinear,
31
- )
32
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
33
27
  from vllm.model_executor.layers.rotary_embedding import get_rope
34
28
  from vllm.model_executor.layers.vocab_parallel_embedding import (
35
29
  ParallelLMHead,
@@ -38,7 +32,13 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
38
32
  from vllm.model_executor.model_loader.weight_utils import default_weight_loader
39
33
 
40
34
  from sglang.srt.layers.activation import SiluAndMul
35
+ from sglang.srt.layers.linear import (
36
+ MergedColumnParallelLinear,
37
+ QKVParallelLinear,
38
+ RowParallelLinear,
39
+ )
41
40
  from sglang.srt.layers.logits_processor import LogitsProcessor
41
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
42
42
  from sglang.srt.layers.radix_attention import RadixAttention
43
43
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
44
44
 
@@ -31,7 +31,6 @@ from vllm.model_executor.layers.linear import (
31
31
  QKVParallelLinear,
32
32
  RowParallelLinear,
33
33
  )
34
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
35
34
  from vllm.model_executor.layers.rotary_embedding import get_rope
36
35
  from vllm.model_executor.layers.vocab_parallel_embedding import (
37
36
  ParallelLMHead,
@@ -40,6 +39,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
40
39
  from vllm.model_executor.model_loader.weight_utils import default_weight_loader
41
40
 
42
41
  from sglang.srt.layers.logits_processor import LogitsProcessor
42
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
43
43
  from sglang.srt.layers.radix_attention import RadixAttention
44
44
  from sglang.srt.model_executor.model_runner import InputMetadata
45
45
 
@@ -34,7 +34,6 @@ from vllm.model_executor.layers.linear import (
34
34
  ReplicatedLinear,
35
35
  RowParallelLinear,
36
36
  )
37
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
38
37
  from vllm.model_executor.layers.rotary_embedding import get_rope
39
38
  from vllm.model_executor.layers.vocab_parallel_embedding import (
40
39
  ParallelLMHead,
@@ -43,6 +42,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
43
42
  from vllm.model_executor.model_loader.weight_utils import default_weight_loader
44
43
 
45
44
  from sglang.srt.layers.logits_processor import LogitsProcessor
45
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
46
46
  from sglang.srt.layers.radix_attention import RadixAttention
47
47
  from sglang.srt.model_executor.forward_batch_info import InputMetadata
48
48
 
sglang/srt/models/yivl.py CHANGED
@@ -21,9 +21,9 @@ import torch
21
21
  import torch.nn as nn
22
22
  from transformers import CLIPVisionModel, LlavaConfig
23
23
  from vllm.config import CacheConfig
24
- from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
25
24
  from vllm.model_executor.model_loader.weight_utils import default_weight_loader
26
25
 
26
+ from sglang.srt.layers.quantization.base_config import QuantizationConfig
27
27
  from sglang.srt.models.llava import LlavaLlamaForCausalLM
28
28
 
29
29
 
sglang/srt/utils.py CHANGED
@@ -26,7 +26,7 @@ import struct
26
26
  import time
27
27
  from importlib.metadata import PackageNotFoundError, version
28
28
  from io import BytesIO
29
- from typing import List, Optional, Union
29
+ from typing import Any, Dict, List, Optional, Union
30
30
 
31
31
  import numpy as np
32
32
  import psutil
@@ -682,3 +682,23 @@ def replace_submodule(
682
682
  target_name = module_name.split(".")[-1]
683
683
  setattr(parent, target_name, new_module)
684
684
  return new_module
685
+
686
+
687
+ def set_weight_attrs(
688
+ weight: torch.Tensor,
689
+ weight_attrs: Optional[Dict[str, Any]],
690
+ ):
691
+ """Set attributes on a weight tensor.
692
+
693
+ This method is used to set attributes on a weight tensor. This method
694
+ will not overwrite existing attributes.
695
+
696
+ Args:
697
+ weight: The weight tensor.
698
+ weight_attrs: A dictionary of attributes to set on the weight tensor.
699
+ """
700
+ if weight_attrs is None:
701
+ return
702
+ for key, value in weight_attrs.items():
703
+ assert not hasattr(weight, key), f"Overwriting existing tensor attribute: {key}"
704
+ setattr(weight, key, value)
sglang/test/test_utils.py CHANGED
@@ -25,11 +25,13 @@ from sglang.utils import get_exception_traceback
25
25
  DEFAULT_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/Meta-Llama-3.1-8B-FP8"
26
26
  DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
27
27
  DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
28
+ DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
28
29
  DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
29
30
  DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Meta-Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it"
30
- DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct"
31
+ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
31
32
  DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8"
32
- DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
33
+ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
34
+ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
33
35
 
34
36
 
35
37
  def is_in_ci():
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.1.post2"
1
+ __version__ = "0.3.1.post3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.1.post2
3
+ Version: 0.3.1.post3
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
318
318
  ### Method 2: From source
319
319
  ```
320
320
  # Use the last release branch
321
- git clone -b v0.3.1.post2 https://github.com/sgl-project/sglang.git
321
+ git clone -b v0.3.1.post3 https://github.com/sgl-project/sglang.git
322
322
  cd sglang
323
323
 
324
324
  pip install --upgrade pip
@@ -499,6 +499,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
499
499
  - Llama / Llama 2 / Llama 3 / Llama 3.1
500
500
  - Mistral / Mixtral / Mistral NeMo
501
501
  - Gemma / Gemma 2
502
+ - OLMoE
502
503
  - Qwen / Qwen 2 / Qwen 2 MoE
503
504
  - DeepSeek / DeepSeek 2
504
505
  - [LLaVA-OneVision](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/)
@@ -1,6 +1,6 @@
1
1
  sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
2
2
  sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
3
- sglang/bench_latency.py,sha256=bA50iUYOxEnLjzY2S4AgwxtSAqujUbGfQFwbLZj5XNc,17160
3
+ sglang/bench_latency.py,sha256=lyA_AwlhDbLMrH9Ca5_X3NUYQdwbHn_vpNbMyvqOZic,17342
4
4
  sglang/bench_server_latency.py,sha256=KvFJgKQTSons7KOG0CBqnnOOx1gW29bBM1Z3GQO_6-E,5599
5
5
  sglang/bench_serving.py,sha256=3gIJ1O2x51Fwd4wYJjgwluTbWKXL-azckQte7YC5zIc,36261
6
6
  sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
@@ -8,7 +8,7 @@ sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
8
8
  sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
9
9
  sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
10
10
  sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
11
- sglang/version.py,sha256=U9F0UlFDynnYN5dX-kxehylWCwXo9a6E6W4FfDusfRg,28
11
+ sglang/version.py,sha256=vtapUd7gvia5JFNpZOX5Q2A4TqgNWABeKFK66x_VeZU,28
12
12
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
14
14
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -28,7 +28,7 @@ sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVv
28
28
  sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
29
29
  sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
30
30
  sglang/srt/server_args.py,sha256=3XjDt6SSjTfbOe0HSXA--2aUvrpWSnQmAHYwmeS1-M0,23159
31
- sglang/srt/utils.py,sha256=8yxiMRttCcfswynkNPWD3yZFNAGFz2P1PzSuxHCBGns,22340
31
+ sglang/srt/utils.py,sha256=Vly46zMM_rz__DaU15vbidYtS0Gh2s7TnAMj4WLyAO4,22954
32
32
  sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
33
33
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
34
34
  sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
@@ -36,10 +36,11 @@ sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5
36
36
  sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
37
37
  sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
38
38
  sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
39
- sglang/srt/layers/activation.py,sha256=i3omgj3GdUIZBqJNUjpdJsMc2UM3Lx07FT2J1WICrqA,5171
40
- sglang/srt/layers/attention_backend.py,sha256=lqMsY4VaOO_szIWoTAinXf1DnP2UsbF32kzvwFySz9w,18119
39
+ sglang/srt/layers/activation.py,sha256=tRWHxIjcIopkOremkb5Jy5O0rgdB1PAhHfIEONfyj6Y,5166
40
+ sglang/srt/layers/attention_backend.py,sha256=TMxsN1HwgqAURD1i77c-TN-3Xy53H9Kbg6HgpRHHoj0,18167
41
41
  sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
42
42
  sglang/srt/layers/layernorm.py,sha256=p_7bnmSpJ_slpoP0Gk5wQPpHtLllUu3imSIRBqGqTP0,3737
43
+ sglang/srt/layers/linear.py,sha256=9rjCiSb_QOn5RgpVjIhEKdReRvSYVfcTSjbWBEbApLI,45173
43
44
  sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
44
45
  sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
45
46
  sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
@@ -48,6 +49,8 @@ sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJ
48
49
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
49
50
  sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
50
51
  sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
52
+ sglang/srt/layers/quantization/__init__.py,sha256=wl9mIOeA6mtKIaW1LWUJABWPdqOb-2uZ-kSijWoxLtU,3095
53
+ sglang/srt/layers/quantization/base_config.py,sha256=vlpSPvSrFmUe65ETg4SoPocQ9bVNY6As3QuHdr_3Dr4,4023
51
54
  sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
52
55
  sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
53
56
  sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
@@ -70,36 +73,36 @@ sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5M
70
73
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=gZ0Wukqz6u67MMIj4MC8JET9jcHdh0rotYzpuPlHruY,10512
71
74
  sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
72
75
  sglang/srt/model_executor/model_runner.py,sha256=X7AG1k9AI_kqS8q1i5Bfv-kFysIdqJAVWMGGZoAPThY,22726
73
- sglang/srt/models/baichuan.py,sha256=NrG1rMJXhemkrUCEf8xKOSDQVsOD-nN8RQz6MWHOg84,15124
74
- sglang/srt/models/chatglm.py,sha256=KwxLHBEvK02McXDvBS0gnRxfIvOAu2QP7lgibrj9Nbc,13371
75
- sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
76
- sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
77
- sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
78
- sglang/srt/models/deepseek_v2.py,sha256=1J0pt1jZRcBBGYbgt1wGiuxPcrdpfTEUEaGFqju6TVA,28431
79
- sglang/srt/models/exaone.py,sha256=3I5ZoiLotf7U-8c9QJRubpgf6JDx9I_z-ViXQlCC-x8,13087
80
- sglang/srt/models/gemma.py,sha256=GkwgGFHgGlXgBZN7s7Wooz5tMyCp1YtgLahU2NOo66M,12273
81
- sglang/srt/models/gemma2.py,sha256=sFfCNEm0_OOWElRSTDuroRv8wNMX8v_81Uko9m546KA,14923
82
- sglang/srt/models/gpt_bigcode.py,sha256=kzHYogeGXZF4KHpkXA-RGqvs016mA-6klWxD2QJTi9E,10195
83
- sglang/srt/models/grok.py,sha256=6I4OwQwNyAbh5GF24_SRm12XYBvM9iGWB-T4TSTJ0wU,14929
84
- sglang/srt/models/internlm2.py,sha256=6j7JH0p3yib8GZDH8Cmrs-pgwfH3eOlAK6V3Cq64O7w,12202
85
- sglang/srt/models/llama.py,sha256=nbJwRcG9DnurVNSGLKJjnmBmTXP1_5WZpudth_0PVpw,15216
86
- sglang/srt/models/llama_classification.py,sha256=HF-69J9qIYdfX0R5wEtIgvafMzprKcXdvF3W_orl_kA,3394
76
+ sglang/srt/models/baichuan.py,sha256=d2PFmyLBXjzS7X7FL9uz139_CpBPb5WYhzcHgF--gRE,15115
77
+ sglang/srt/models/chatglm.py,sha256=chDkgLTRU3bPxTUilhW_FGnsUWj_2fkvulCi9pdDxBY,13353
78
+ sglang/srt/models/commandr.py,sha256=FspSRkMRAXUjD3xzAkxkMiGiRg91czn9T5bagrf3l9M,14136
79
+ sglang/srt/models/dbrx.py,sha256=UmpbTCuf8rYe2Grut7YUPU1gEwsDhgNIs8vW4DNiaf0,14634
80
+ sglang/srt/models/deepseek.py,sha256=TWwfwKYvZZyu2UbimvimeyU_7u7HyIYZlRdlPtOCTfo,15988
81
+ sglang/srt/models/deepseek_v2.py,sha256=36iH4HrObMasOY801Tacub_40BR_0ImdqdKcJ6nHOD8,28413
82
+ sglang/srt/models/exaone.py,sha256=0OTgeAzyi_xvoQTx4TwYkCxRq8sMa-4EYL0_KJRmiAU,13069
83
+ sglang/srt/models/gemma.py,sha256=qo-4F602DKuv33zp4i4dayteFoVhnTYgVbFWKYms5Og,12255
84
+ sglang/srt/models/gemma2.py,sha256=8wGqNQPaPjuTtgHiKsUP4nowOukPvXwRywD4lkAW9Dg,14905
85
+ sglang/srt/models/gpt_bigcode.py,sha256=k_pZa4Sg5GEsr4ln0kjP765moGUPNs5a6iANPjE2W8U,10177
86
+ sglang/srt/models/grok.py,sha256=71Zx-4Q3wggNMtRYlXuPMA-auK-sHBYukI1Usn8LVrE,14911
87
+ sglang/srt/models/internlm2.py,sha256=nEr6MSHFkTjPLvWl1jQQdGFO7iOHex6YtE-I4rYuLao,12184
88
+ sglang/srt/models/llama.py,sha256=bdIt9IfZBgsg6CoZT3lvB-dqXhfxempdRHLkY3Su_VU,15198
89
+ sglang/srt/models/llama_classification.py,sha256=UpwYsgNVS1065t7Yjmi2XGbk9Or8bq2cF82zH1Yx2Mg,3385
87
90
  sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2zDEyLW4g,3458
88
- sglang/srt/models/llava.py,sha256=O4XGdl70Hh4tM_OHapFGHbReC82mbe9xLw6GELKWKhU,24881
89
- sglang/srt/models/llavavid.py,sha256=ou5uIuskBoBo0lXvqFFfDLBYYVfehx27n-Lu8X9gpLs,11992
90
- sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
91
- sglang/srt/models/minicpm3.py,sha256=McPWyy2fQqfHUhi9Nk36rkvvPAS8RmLOY7Vh4ah5c1w,25216
91
+ sglang/srt/models/llava.py,sha256=1MG1JDDQb7xc67BSimDo98Gmvza6PmrHQHmKybsDui4,24872
92
+ sglang/srt/models/llavavid.py,sha256=RqOUFROt-gqTlFYqnySAVBXJO9g-NMU2yke-AW5cV6o,11983
93
+ sglang/srt/models/minicpm.py,sha256=Xvy99mkfwzRZCLOe3BhfmNSuJyDhGjjAJq0YOpepu_Q,13807
94
+ sglang/srt/models/minicpm3.py,sha256=yuiwWNfJeWvfUgwkbEfpuc9_uPB6odqBCbdYj8t9aDQ,25207
92
95
  sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
93
- sglang/srt/models/mixtral.py,sha256=oRC7mKBrPJhvzkWSabrbeQQQac-jtF4EV6H2Sgjc5JY,13897
94
- sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
95
- sglang/srt/models/olmoe.py,sha256=d0ECpU-IXXwGYg9tkVeMARUbqVcqEnWfpH3rrNiGKA0,15336
96
- sglang/srt/models/qwen.py,sha256=nqSRzkiZzpRVG6WGQ1MBUclQnXyw8jlvoOq-euM8j5s,9954
97
- sglang/srt/models/qwen2.py,sha256=9_M-VkHN1_T1XN-gsl_L636QMQ9BLF2WqvTcx_1L6aw,12432
98
- sglang/srt/models/qwen2_moe.py,sha256=s7b5XnSvsBYtZZUkjPp442m59CqPJ3HxGUIwXBVWsXw,17153
99
- sglang/srt/models/stablelm.py,sha256=30ngpc0Xq3VxzXJlf6svP1oax8Q3krMJkxM8PVKtZWU,11359
100
- sglang/srt/models/xverse.py,sha256=L3g32-je_7JmzF2-hztaIVshHYCIv7jOM3oFs-fb2MY,13658
101
- sglang/srt/models/xverse_moe.py,sha256=CgDD9cR83UVfTsPU6WcbHVYBrkYKv_kTdwncTIx7Q7U,15842
102
- sglang/srt/models/yivl.py,sha256=B6MELthWIm5KdSzX3o2tbbpApY8XdjUdmcQSD4dQe_I,4835
96
+ sglang/srt/models/mixtral.py,sha256=QzWIhjk8gW9DquTvgQsWK3VK0ccdTMT0hCDDHI03KPI,13879
97
+ sglang/srt/models/mixtral_quant.py,sha256=e2x1AykUSVRqEVw6Pg7uKW1Uj8xyn4jZSfLJL4Kl5o8,14054
98
+ sglang/srt/models/olmoe.py,sha256=hGh2IlCg9kr1WIeGyRWwNpa1CfyZH163vq7eSx5d598,15327
99
+ sglang/srt/models/qwen.py,sha256=Vs6f8Jn1TswEzgiPS0G9qxeDU_DdC60JnhDeRDTH3FQ,9936
100
+ sglang/srt/models/qwen2.py,sha256=pamZrETUcaXbWN4tVTjObFPNjqaMu49-8g267NzxkFI,12414
101
+ sglang/srt/models/qwen2_moe.py,sha256=2BFsp1oPs7o_3uc8xvIGfGRNNU2TKkmKZY9P1qtgtlQ,17135
102
+ sglang/srt/models/stablelm.py,sha256=v67JM1SHb-LinrsX598WMsLVeyzjoKquW6G5G30X5fQ,11341
103
+ sglang/srt/models/xverse.py,sha256=VThXXKg3DzepcEP1JHcqSyhRBvq6yL14oh4uj5TJOEM,13649
104
+ sglang/srt/models/xverse_moe.py,sha256=BqmV-uk9ipp4nrj6-lnFfvkwUcuKmV7yfGAYB6Ob-UQ,15833
105
+ sglang/srt/models/yivl.py,sha256=N3noJ5M-FiZS-E_zfaJs4prQOu_ineRt11MWloYgOR8,4826
103
106
  sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
104
107
  sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
105
108
  sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
@@ -122,10 +125,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
122
125
  sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
123
126
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
124
127
  sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
125
- sglang/test/test_utils.py,sha256=dsHRd1xLzcjlarxUnDIz2XEHfut7HvqVPwx2Fn7vf10,17179
128
+ sglang/test/test_utils.py,sha256=OnAFpTA94GmQCHCV5XpaYImn11U7Cg4yfSw0nC17GRs,17504
126
129
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
127
- sglang-0.3.1.post2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
128
- sglang-0.3.1.post2.dist-info/METADATA,sha256=WxMy8Ur_rjPxqVOoWSFoM3eBHWt0cKGyrtwOUfWL-Vc,38114
129
- sglang-0.3.1.post2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
130
- sglang-0.3.1.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
131
- sglang-0.3.1.post2.dist-info/RECORD,,
130
+ sglang-0.3.1.post3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
131
+ sglang-0.3.1.post3.dist-info/METADATA,sha256=uhvB-z9UZsAafHaPfU9qYU6oKxrC6BLcyBspbtoFAY8,38122
132
+ sglang-0.3.1.post3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
133
+ sglang-0.3.1.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
134
+ sglang-0.3.1.post3.dist-info/RECORD,,