sglang 0.3.1.post2__py3-none-any.whl → 0.3.1.post3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +8 -1
- sglang/srt/layers/activation.py +3 -2
- sglang/srt/layers/attention_backend.py +3 -1
- sglang/srt/layers/linear.py +1133 -0
- sglang/srt/layers/quantization/__init__.py +76 -0
- sglang/srt/layers/quantization/base_config.py +122 -0
- sglang/srt/models/baichuan.py +1 -1
- sglang/srt/models/chatglm.py +6 -6
- sglang/srt/models/commandr.py +7 -7
- sglang/srt/models/dbrx.py +7 -7
- sglang/srt/models/deepseek.py +7 -7
- sglang/srt/models/deepseek_v2.py +7 -7
- sglang/srt/models/exaone.py +6 -6
- sglang/srt/models/gemma.py +6 -6
- sglang/srt/models/gemma2.py +6 -6
- sglang/srt/models/gpt_bigcode.py +6 -6
- sglang/srt/models/grok.py +6 -6
- sglang/srt/models/internlm2.py +6 -6
- sglang/srt/models/llama.py +6 -6
- sglang/srt/models/llama_classification.py +1 -1
- sglang/srt/models/llava.py +1 -1
- sglang/srt/models/llavavid.py +1 -1
- sglang/srt/models/minicpm.py +6 -6
- sglang/srt/models/minicpm3.py +1 -1
- sglang/srt/models/mixtral.py +6 -6
- sglang/srt/models/mixtral_quant.py +6 -6
- sglang/srt/models/olmoe.py +1 -1
- sglang/srt/models/qwen.py +6 -6
- sglang/srt/models/qwen2.py +6 -6
- sglang/srt/models/qwen2_moe.py +7 -7
- sglang/srt/models/stablelm.py +6 -6
- sglang/srt/models/xverse.py +1 -1
- sglang/srt/models/xverse_moe.py +1 -1
- sglang/srt/models/yivl.py +1 -1
- sglang/srt/utils.py +21 -1
- sglang/test/test_utils.py +4 -2
- sglang/version.py +1 -1
- {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/METADATA +3 -2
- {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/RECORD +42 -39
- {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/LICENSE +0 -0
- {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/WHEEL +0 -0
- {sglang-0.3.1.post2.dist-info → sglang-0.3.1.post3.dist-info}/top_level.txt +0 -0
@@ -29,12 +29,6 @@ from vllm.distributed import (
|
|
29
29
|
get_tensor_model_parallel_world_size,
|
30
30
|
tensor_model_parallel_all_reduce,
|
31
31
|
)
|
32
|
-
from vllm.model_executor.layers.linear import (
|
33
|
-
QKVParallelLinear,
|
34
|
-
ReplicatedLinear,
|
35
|
-
RowParallelLinear,
|
36
|
-
)
|
37
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
38
32
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
39
33
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
40
34
|
ParallelLMHead,
|
@@ -43,7 +37,13 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|
43
37
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
44
38
|
|
45
39
|
from sglang.srt.layers.layernorm import RMSNorm
|
40
|
+
from sglang.srt.layers.linear import (
|
41
|
+
QKVParallelLinear,
|
42
|
+
ReplicatedLinear,
|
43
|
+
RowParallelLinear,
|
44
|
+
)
|
46
45
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
46
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
47
47
|
from sglang.srt.layers.radix_attention import RadixAttention
|
48
48
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
49
49
|
|
sglang/srt/models/olmoe.py
CHANGED
@@ -35,7 +35,6 @@ from vllm.model_executor.layers.linear import (
|
|
35
35
|
ReplicatedLinear,
|
36
36
|
RowParallelLinear,
|
37
37
|
)
|
38
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
39
38
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
40
39
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
41
40
|
ParallelLMHead,
|
@@ -47,6 +46,7 @@ from vllm.utils import print_warning_once
|
|
47
46
|
from sglang.srt.layers.activation import SiluAndMul
|
48
47
|
from sglang.srt.layers.layernorm import RMSNorm
|
49
48
|
from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
|
49
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
50
50
|
from sglang.srt.layers.radix_attention import RadixAttention
|
51
51
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
52
52
|
|
sglang/srt/models/qwen.py
CHANGED
@@ -22,12 +22,6 @@ from torch import nn
|
|
22
22
|
from transformers import PretrainedConfig
|
23
23
|
from vllm.config import CacheConfig
|
24
24
|
from vllm.distributed import get_tensor_model_parallel_world_size
|
25
|
-
from vllm.model_executor.layers.linear import (
|
26
|
-
MergedColumnParallelLinear,
|
27
|
-
QKVParallelLinear,
|
28
|
-
RowParallelLinear,
|
29
|
-
)
|
30
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
31
25
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
32
26
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
33
27
|
ParallelLMHead,
|
@@ -37,7 +31,13 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
|
37
31
|
|
38
32
|
from sglang.srt.layers.activation import SiluAndMul
|
39
33
|
from sglang.srt.layers.layernorm import RMSNorm
|
34
|
+
from sglang.srt.layers.linear import (
|
35
|
+
MergedColumnParallelLinear,
|
36
|
+
QKVParallelLinear,
|
37
|
+
RowParallelLinear,
|
38
|
+
)
|
40
39
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
40
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
41
41
|
from sglang.srt.layers.radix_attention import RadixAttention
|
42
42
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
43
43
|
|
sglang/srt/models/qwen2.py
CHANGED
@@ -22,12 +22,6 @@ import torch
|
|
22
22
|
from torch import nn
|
23
23
|
from vllm.config import CacheConfig
|
24
24
|
from vllm.distributed import get_tensor_model_parallel_world_size
|
25
|
-
from vllm.model_executor.layers.linear import (
|
26
|
-
MergedColumnParallelLinear,
|
27
|
-
QKVParallelLinear,
|
28
|
-
RowParallelLinear,
|
29
|
-
)
|
30
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
31
25
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
32
26
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
33
27
|
ParallelLMHead,
|
@@ -37,8 +31,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
|
37
31
|
|
38
32
|
from sglang.srt.layers.activation import SiluAndMul
|
39
33
|
from sglang.srt.layers.layernorm import RMSNorm
|
34
|
+
from sglang.srt.layers.linear import (
|
35
|
+
MergedColumnParallelLinear,
|
36
|
+
QKVParallelLinear,
|
37
|
+
RowParallelLinear,
|
38
|
+
)
|
40
39
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
41
40
|
from sglang.srt.layers.pooler import Pooler, PoolingType
|
41
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
42
42
|
from sglang.srt.layers.radix_attention import RadixAttention
|
43
43
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
44
44
|
|
sglang/srt/models/qwen2_moe.py
CHANGED
@@ -29,13 +29,6 @@ from vllm.distributed import (
|
|
29
29
|
tensor_model_parallel_all_reduce,
|
30
30
|
)
|
31
31
|
from vllm.model_executor.layers.fused_moe import FusedMoE
|
32
|
-
from vllm.model_executor.layers.linear import (
|
33
|
-
MergedColumnParallelLinear,
|
34
|
-
QKVParallelLinear,
|
35
|
-
ReplicatedLinear,
|
36
|
-
RowParallelLinear,
|
37
|
-
)
|
38
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
39
32
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
40
33
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
41
34
|
ParallelLMHead,
|
@@ -45,7 +38,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
|
45
38
|
|
46
39
|
from sglang.srt.layers.activation import SiluAndMul
|
47
40
|
from sglang.srt.layers.layernorm import RMSNorm
|
41
|
+
from sglang.srt.layers.linear import (
|
42
|
+
MergedColumnParallelLinear,
|
43
|
+
QKVParallelLinear,
|
44
|
+
ReplicatedLinear,
|
45
|
+
RowParallelLinear,
|
46
|
+
)
|
48
47
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
48
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
49
49
|
from sglang.srt.layers.radix_attention import RadixAttention
|
50
50
|
from sglang.srt.layers.torchao_utils import apply_torchao_config_
|
51
51
|
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
sglang/srt/models/stablelm.py
CHANGED
@@ -24,12 +24,6 @@ from torch import nn
|
|
24
24
|
from transformers import PretrainedConfig
|
25
25
|
from vllm.config import CacheConfig
|
26
26
|
from vllm.distributed import get_tensor_model_parallel_world_size
|
27
|
-
from vllm.model_executor.layers.linear import (
|
28
|
-
MergedColumnParallelLinear,
|
29
|
-
QKVParallelLinear,
|
30
|
-
RowParallelLinear,
|
31
|
-
)
|
32
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
33
27
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
34
28
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
35
29
|
ParallelLMHead,
|
@@ -38,7 +32,13 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|
38
32
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
39
33
|
|
40
34
|
from sglang.srt.layers.activation import SiluAndMul
|
35
|
+
from sglang.srt.layers.linear import (
|
36
|
+
MergedColumnParallelLinear,
|
37
|
+
QKVParallelLinear,
|
38
|
+
RowParallelLinear,
|
39
|
+
)
|
41
40
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
41
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
42
42
|
from sglang.srt.layers.radix_attention import RadixAttention
|
43
43
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
44
44
|
|
sglang/srt/models/xverse.py
CHANGED
@@ -31,7 +31,6 @@ from vllm.model_executor.layers.linear import (
|
|
31
31
|
QKVParallelLinear,
|
32
32
|
RowParallelLinear,
|
33
33
|
)
|
34
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
35
34
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
36
35
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
37
36
|
ParallelLMHead,
|
@@ -40,6 +39,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|
40
39
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
41
40
|
|
42
41
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
42
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
43
43
|
from sglang.srt.layers.radix_attention import RadixAttention
|
44
44
|
from sglang.srt.model_executor.model_runner import InputMetadata
|
45
45
|
|
sglang/srt/models/xverse_moe.py
CHANGED
@@ -34,7 +34,6 @@ from vllm.model_executor.layers.linear import (
|
|
34
34
|
ReplicatedLinear,
|
35
35
|
RowParallelLinear,
|
36
36
|
)
|
37
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
38
37
|
from vllm.model_executor.layers.rotary_embedding import get_rope
|
39
38
|
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
40
39
|
ParallelLMHead,
|
@@ -43,6 +42,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
|
43
42
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
44
43
|
|
45
44
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
45
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
46
46
|
from sglang.srt.layers.radix_attention import RadixAttention
|
47
47
|
from sglang.srt.model_executor.forward_batch_info import InputMetadata
|
48
48
|
|
sglang/srt/models/yivl.py
CHANGED
@@ -21,9 +21,9 @@ import torch
|
|
21
21
|
import torch.nn as nn
|
22
22
|
from transformers import CLIPVisionModel, LlavaConfig
|
23
23
|
from vllm.config import CacheConfig
|
24
|
-
from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
|
25
24
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
26
25
|
|
26
|
+
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
27
27
|
from sglang.srt.models.llava import LlavaLlamaForCausalLM
|
28
28
|
|
29
29
|
|
sglang/srt/utils.py
CHANGED
@@ -26,7 +26,7 @@ import struct
|
|
26
26
|
import time
|
27
27
|
from importlib.metadata import PackageNotFoundError, version
|
28
28
|
from io import BytesIO
|
29
|
-
from typing import List, Optional, Union
|
29
|
+
from typing import Any, Dict, List, Optional, Union
|
30
30
|
|
31
31
|
import numpy as np
|
32
32
|
import psutil
|
@@ -682,3 +682,23 @@ def replace_submodule(
|
|
682
682
|
target_name = module_name.split(".")[-1]
|
683
683
|
setattr(parent, target_name, new_module)
|
684
684
|
return new_module
|
685
|
+
|
686
|
+
|
687
|
+
def set_weight_attrs(
|
688
|
+
weight: torch.Tensor,
|
689
|
+
weight_attrs: Optional[Dict[str, Any]],
|
690
|
+
):
|
691
|
+
"""Set attributes on a weight tensor.
|
692
|
+
|
693
|
+
This method is used to set attributes on a weight tensor. This method
|
694
|
+
will not overwrite existing attributes.
|
695
|
+
|
696
|
+
Args:
|
697
|
+
weight: The weight tensor.
|
698
|
+
weight_attrs: A dictionary of attributes to set on the weight tensor.
|
699
|
+
"""
|
700
|
+
if weight_attrs is None:
|
701
|
+
return
|
702
|
+
for key, value in weight_attrs.items():
|
703
|
+
assert not hasattr(weight, key), f"Overwriting existing tensor attribute: {key}"
|
704
|
+
setattr(weight, key, value)
|
sglang/test/test_utils.py
CHANGED
@@ -25,11 +25,13 @@ from sglang.utils import get_exception_traceback
|
|
25
25
|
DEFAULT_FP8_MODEL_NAME_FOR_TEST = "neuralmagic/Meta-Llama-3.1-8B-FP8"
|
26
26
|
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
27
27
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
28
|
+
DEFAULT_MLA_MODEL_NAME_FOR_TEST = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
28
29
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600
|
29
30
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Meta-Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it"
|
30
|
-
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct"
|
31
|
+
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
31
32
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8"
|
32
|
-
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
|
33
|
+
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
|
34
|
+
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
33
35
|
|
34
36
|
|
35
37
|
def is_in_ci():
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.3.1.
|
1
|
+
__version__ = "0.3.1.post3"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.3.1.
|
3
|
+
Version: 0.3.1.post3
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
|
318
318
|
### Method 2: From source
|
319
319
|
```
|
320
320
|
# Use the last release branch
|
321
|
-
git clone -b v0.3.1.
|
321
|
+
git clone -b v0.3.1.post3 https://github.com/sgl-project/sglang.git
|
322
322
|
cd sglang
|
323
323
|
|
324
324
|
pip install --upgrade pip
|
@@ -499,6 +499,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
499
499
|
- Llama / Llama 2 / Llama 3 / Llama 3.1
|
500
500
|
- Mistral / Mixtral / Mistral NeMo
|
501
501
|
- Gemma / Gemma 2
|
502
|
+
- OLMoE
|
502
503
|
- Qwen / Qwen 2 / Qwen 2 MoE
|
503
504
|
- DeepSeek / DeepSeek 2
|
504
505
|
- [LLaVA-OneVision](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
|
2
2
|
sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
|
3
|
-
sglang/bench_latency.py,sha256=
|
3
|
+
sglang/bench_latency.py,sha256=lyA_AwlhDbLMrH9Ca5_X3NUYQdwbHn_vpNbMyvqOZic,17342
|
4
4
|
sglang/bench_server_latency.py,sha256=KvFJgKQTSons7KOG0CBqnnOOx1gW29bBM1Z3GQO_6-E,5599
|
5
5
|
sglang/bench_serving.py,sha256=3gIJ1O2x51Fwd4wYJjgwluTbWKXL-azckQte7YC5zIc,36261
|
6
6
|
sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
|
@@ -8,7 +8,7 @@ sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
|
|
8
8
|
sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
|
9
9
|
sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
|
10
10
|
sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
|
11
|
-
sglang/version.py,sha256=
|
11
|
+
sglang/version.py,sha256=vtapUd7gvia5JFNpZOX5Q2A4TqgNWABeKFK66x_VeZU,28
|
12
12
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
|
14
14
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -28,7 +28,7 @@ sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVv
|
|
28
28
|
sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
|
29
29
|
sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
|
30
30
|
sglang/srt/server_args.py,sha256=3XjDt6SSjTfbOe0HSXA--2aUvrpWSnQmAHYwmeS1-M0,23159
|
31
|
-
sglang/srt/utils.py,sha256=
|
31
|
+
sglang/srt/utils.py,sha256=Vly46zMM_rz__DaU15vbidYtS0Gh2s7TnAMj4WLyAO4,22954
|
32
32
|
sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
|
33
33
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
34
34
|
sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
|
@@ -36,10 +36,11 @@ sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5
|
|
36
36
|
sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
|
37
37
|
sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
|
38
38
|
sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
|
39
|
-
sglang/srt/layers/activation.py,sha256=
|
40
|
-
sglang/srt/layers/attention_backend.py,sha256=
|
39
|
+
sglang/srt/layers/activation.py,sha256=tRWHxIjcIopkOremkb5Jy5O0rgdB1PAhHfIEONfyj6Y,5166
|
40
|
+
sglang/srt/layers/attention_backend.py,sha256=TMxsN1HwgqAURD1i77c-TN-3Xy53H9Kbg6HgpRHHoj0,18167
|
41
41
|
sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
|
42
42
|
sglang/srt/layers/layernorm.py,sha256=p_7bnmSpJ_slpoP0Gk5wQPpHtLllUu3imSIRBqGqTP0,3737
|
43
|
+
sglang/srt/layers/linear.py,sha256=9rjCiSb_QOn5RgpVjIhEKdReRvSYVfcTSjbWBEbApLI,45173
|
43
44
|
sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
|
44
45
|
sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
|
45
46
|
sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
|
@@ -48,6 +49,8 @@ sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJ
|
|
48
49
|
sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
|
49
50
|
sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
|
50
51
|
sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
|
52
|
+
sglang/srt/layers/quantization/__init__.py,sha256=wl9mIOeA6mtKIaW1LWUJABWPdqOb-2uZ-kSijWoxLtU,3095
|
53
|
+
sglang/srt/layers/quantization/base_config.py,sha256=vlpSPvSrFmUe65ETg4SoPocQ9bVNY6As3QuHdr_3Dr4,4023
|
51
54
|
sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
|
52
55
|
sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
|
53
56
|
sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
|
@@ -70,36 +73,36 @@ sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5M
|
|
70
73
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=gZ0Wukqz6u67MMIj4MC8JET9jcHdh0rotYzpuPlHruY,10512
|
71
74
|
sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
|
72
75
|
sglang/srt/model_executor/model_runner.py,sha256=X7AG1k9AI_kqS8q1i5Bfv-kFysIdqJAVWMGGZoAPThY,22726
|
73
|
-
sglang/srt/models/baichuan.py,sha256=
|
74
|
-
sglang/srt/models/chatglm.py,sha256=
|
75
|
-
sglang/srt/models/commandr.py,sha256=
|
76
|
-
sglang/srt/models/dbrx.py,sha256=
|
77
|
-
sglang/srt/models/deepseek.py,sha256=
|
78
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
79
|
-
sglang/srt/models/exaone.py,sha256=
|
80
|
-
sglang/srt/models/gemma.py,sha256=
|
81
|
-
sglang/srt/models/gemma2.py,sha256=
|
82
|
-
sglang/srt/models/gpt_bigcode.py,sha256=
|
83
|
-
sglang/srt/models/grok.py,sha256=
|
84
|
-
sglang/srt/models/internlm2.py,sha256=
|
85
|
-
sglang/srt/models/llama.py,sha256=
|
86
|
-
sglang/srt/models/llama_classification.py,sha256=
|
76
|
+
sglang/srt/models/baichuan.py,sha256=d2PFmyLBXjzS7X7FL9uz139_CpBPb5WYhzcHgF--gRE,15115
|
77
|
+
sglang/srt/models/chatglm.py,sha256=chDkgLTRU3bPxTUilhW_FGnsUWj_2fkvulCi9pdDxBY,13353
|
78
|
+
sglang/srt/models/commandr.py,sha256=FspSRkMRAXUjD3xzAkxkMiGiRg91czn9T5bagrf3l9M,14136
|
79
|
+
sglang/srt/models/dbrx.py,sha256=UmpbTCuf8rYe2Grut7YUPU1gEwsDhgNIs8vW4DNiaf0,14634
|
80
|
+
sglang/srt/models/deepseek.py,sha256=TWwfwKYvZZyu2UbimvimeyU_7u7HyIYZlRdlPtOCTfo,15988
|
81
|
+
sglang/srt/models/deepseek_v2.py,sha256=36iH4HrObMasOY801Tacub_40BR_0ImdqdKcJ6nHOD8,28413
|
82
|
+
sglang/srt/models/exaone.py,sha256=0OTgeAzyi_xvoQTx4TwYkCxRq8sMa-4EYL0_KJRmiAU,13069
|
83
|
+
sglang/srt/models/gemma.py,sha256=qo-4F602DKuv33zp4i4dayteFoVhnTYgVbFWKYms5Og,12255
|
84
|
+
sglang/srt/models/gemma2.py,sha256=8wGqNQPaPjuTtgHiKsUP4nowOukPvXwRywD4lkAW9Dg,14905
|
85
|
+
sglang/srt/models/gpt_bigcode.py,sha256=k_pZa4Sg5GEsr4ln0kjP765moGUPNs5a6iANPjE2W8U,10177
|
86
|
+
sglang/srt/models/grok.py,sha256=71Zx-4Q3wggNMtRYlXuPMA-auK-sHBYukI1Usn8LVrE,14911
|
87
|
+
sglang/srt/models/internlm2.py,sha256=nEr6MSHFkTjPLvWl1jQQdGFO7iOHex6YtE-I4rYuLao,12184
|
88
|
+
sglang/srt/models/llama.py,sha256=bdIt9IfZBgsg6CoZT3lvB-dqXhfxempdRHLkY3Su_VU,15198
|
89
|
+
sglang/srt/models/llama_classification.py,sha256=UpwYsgNVS1065t7Yjmi2XGbk9Or8bq2cF82zH1Yx2Mg,3385
|
87
90
|
sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2zDEyLW4g,3458
|
88
|
-
sglang/srt/models/llava.py,sha256=
|
89
|
-
sglang/srt/models/llavavid.py,sha256=
|
90
|
-
sglang/srt/models/minicpm.py,sha256=
|
91
|
-
sglang/srt/models/minicpm3.py,sha256=
|
91
|
+
sglang/srt/models/llava.py,sha256=1MG1JDDQb7xc67BSimDo98Gmvza6PmrHQHmKybsDui4,24872
|
92
|
+
sglang/srt/models/llavavid.py,sha256=RqOUFROt-gqTlFYqnySAVBXJO9g-NMU2yke-AW5cV6o,11983
|
93
|
+
sglang/srt/models/minicpm.py,sha256=Xvy99mkfwzRZCLOe3BhfmNSuJyDhGjjAJq0YOpepu_Q,13807
|
94
|
+
sglang/srt/models/minicpm3.py,sha256=yuiwWNfJeWvfUgwkbEfpuc9_uPB6odqBCbdYj8t9aDQ,25207
|
92
95
|
sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
|
93
|
-
sglang/srt/models/mixtral.py,sha256=
|
94
|
-
sglang/srt/models/mixtral_quant.py,sha256=
|
95
|
-
sglang/srt/models/olmoe.py,sha256=
|
96
|
-
sglang/srt/models/qwen.py,sha256=
|
97
|
-
sglang/srt/models/qwen2.py,sha256=
|
98
|
-
sglang/srt/models/qwen2_moe.py,sha256=
|
99
|
-
sglang/srt/models/stablelm.py,sha256=
|
100
|
-
sglang/srt/models/xverse.py,sha256=
|
101
|
-
sglang/srt/models/xverse_moe.py,sha256=
|
102
|
-
sglang/srt/models/yivl.py,sha256=
|
96
|
+
sglang/srt/models/mixtral.py,sha256=QzWIhjk8gW9DquTvgQsWK3VK0ccdTMT0hCDDHI03KPI,13879
|
97
|
+
sglang/srt/models/mixtral_quant.py,sha256=e2x1AykUSVRqEVw6Pg7uKW1Uj8xyn4jZSfLJL4Kl5o8,14054
|
98
|
+
sglang/srt/models/olmoe.py,sha256=hGh2IlCg9kr1WIeGyRWwNpa1CfyZH163vq7eSx5d598,15327
|
99
|
+
sglang/srt/models/qwen.py,sha256=Vs6f8Jn1TswEzgiPS0G9qxeDU_DdC60JnhDeRDTH3FQ,9936
|
100
|
+
sglang/srt/models/qwen2.py,sha256=pamZrETUcaXbWN4tVTjObFPNjqaMu49-8g267NzxkFI,12414
|
101
|
+
sglang/srt/models/qwen2_moe.py,sha256=2BFsp1oPs7o_3uc8xvIGfGRNNU2TKkmKZY9P1qtgtlQ,17135
|
102
|
+
sglang/srt/models/stablelm.py,sha256=v67JM1SHb-LinrsX598WMsLVeyzjoKquW6G5G30X5fQ,11341
|
103
|
+
sglang/srt/models/xverse.py,sha256=VThXXKg3DzepcEP1JHcqSyhRBvq6yL14oh4uj5TJOEM,13649
|
104
|
+
sglang/srt/models/xverse_moe.py,sha256=BqmV-uk9ipp4nrj6-lnFfvkwUcuKmV7yfGAYB6Ob-UQ,15833
|
105
|
+
sglang/srt/models/yivl.py,sha256=N3noJ5M-FiZS-E_zfaJs4prQOu_ineRt11MWloYgOR8,4826
|
103
106
|
sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
|
104
107
|
sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
|
105
108
|
sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
|
@@ -122,10 +125,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
122
125
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
123
126
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
124
127
|
sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
|
125
|
-
sglang/test/test_utils.py,sha256=
|
128
|
+
sglang/test/test_utils.py,sha256=OnAFpTA94GmQCHCV5XpaYImn11U7Cg4yfSw0nC17GRs,17504
|
126
129
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
|
127
|
-
sglang-0.3.1.
|
128
|
-
sglang-0.3.1.
|
129
|
-
sglang-0.3.1.
|
130
|
-
sglang-0.3.1.
|
131
|
-
sglang-0.3.1.
|
130
|
+
sglang-0.3.1.post3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
131
|
+
sglang-0.3.1.post3.dist-info/METADATA,sha256=uhvB-z9UZsAafHaPfU9qYU6oKxrC6BLcyBspbtoFAY8,38122
|
132
|
+
sglang-0.3.1.post3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
133
|
+
sglang-0.3.1.post3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
134
|
+
sglang-0.3.1.post3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|