sglang 0.3.1__py3-none-any.whl → 0.3.1.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +7 -2
- sglang/global_config.py +5 -13
- sglang/lang/interpreter.py +0 -3
- sglang/srt/constrained/fsm_cache.py +5 -1
- sglang/srt/layers/activation.py +12 -0
- sglang/srt/layers/attention_backend.py +12 -12
- sglang/srt/layers/fused_moe/layer.py +27 -7
- sglang/srt/layers/layernorm.py +12 -0
- sglang/srt/layers/sampler.py +32 -97
- sglang/srt/lora/lora_manager.py +11 -8
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/managers/tp_worker.py +8 -7
- sglang/srt/model_executor/cuda_graph_runner.py +12 -1
- sglang/srt/model_executor/model_runner.py +24 -41
- sglang/srt/models/deepseek_v2.py +6 -1
- sglang/srt/models/minicpm3.py +5 -1
- sglang/srt/models/olmoe.py +415 -0
- sglang/srt/sampling/sampling_batch_info.py +3 -50
- sglang/srt/server.py +6 -1
- sglang/srt/server_args.py +34 -1
- sglang/srt/utils.py +7 -51
- sglang/test/test_utils.py +0 -1
- sglang/version.py +1 -1
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/METADATA +2 -2
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/RECORD +28 -27
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/WHEEL +1 -1
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/LICENSE +0 -0
- {sglang-0.3.1.dist-info → sglang-0.3.1.post1.dist-info}/top_level.txt +0 -0
sglang/srt/server.py
CHANGED
@@ -78,6 +78,7 @@ from sglang.srt.utils import (
|
|
78
78
|
assert_pkg_version,
|
79
79
|
configure_logger,
|
80
80
|
enable_show_time_cost,
|
81
|
+
is_hip,
|
81
82
|
kill_child_process,
|
82
83
|
maybe_set_triton_cache_manager,
|
83
84
|
prepare_model,
|
@@ -152,7 +153,7 @@ async def flush_cache():
|
|
152
153
|
async def update_weights(obj: UpdateWeightReqInput, request: Request):
|
153
154
|
|
154
155
|
success, message = await tokenizer_manager.update_weights(obj, request)
|
155
|
-
content = {"
|
156
|
+
content = {"success": success, "message": message}
|
156
157
|
if success:
|
157
158
|
return JSONResponse(
|
158
159
|
content,
|
@@ -434,6 +435,10 @@ def _set_envs_and_config(server_args: ServerArgs):
|
|
434
435
|
"at https://docs.flashinfer.ai/installation.html.",
|
435
436
|
)
|
436
437
|
|
438
|
+
if is_hip():
|
439
|
+
# to figure out a better method of not using fork later
|
440
|
+
mp.set_start_method("spawn", force=True)
|
441
|
+
|
437
442
|
|
438
443
|
def _wait_and_warmup(server_args, pipe_finish_writer, pid):
|
439
444
|
headers = {}
|
sglang/srt/server_args.py
CHANGED
@@ -21,9 +21,22 @@ import logging
|
|
21
21
|
import random
|
22
22
|
from typing import List, Optional, Union
|
23
23
|
|
24
|
+
from sglang.srt.utils import is_hip
|
25
|
+
|
24
26
|
logger = logging.getLogger(__name__)
|
25
27
|
|
26
28
|
|
29
|
+
class LoRAPathAction(argparse.Action):
|
30
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
31
|
+
setattr(namespace, self.dest, {})
|
32
|
+
for lora_path in values:
|
33
|
+
if "=" in lora_path:
|
34
|
+
name, path = lora_path.split("=", 1)
|
35
|
+
getattr(namespace, self.dest)[name] = path
|
36
|
+
else:
|
37
|
+
getattr(namespace, self.dest)[lora_path] = lora_path
|
38
|
+
|
39
|
+
|
27
40
|
@dataclasses.dataclass
|
28
41
|
class ServerArgs:
|
29
42
|
# Model and tokenizer
|
@@ -59,6 +72,7 @@ class ServerArgs:
|
|
59
72
|
tp_size: int = 1
|
60
73
|
stream_interval: int = 1
|
61
74
|
random_seed: Optional[int] = None
|
75
|
+
constrained_json_whitespace_pattern: Optional[str] = None
|
62
76
|
|
63
77
|
# Logging
|
64
78
|
log_level: str = "info"
|
@@ -96,6 +110,7 @@ class ServerArgs:
|
|
96
110
|
disable_custom_all_reduce: bool = False
|
97
111
|
enable_mixed_chunk: bool = False
|
98
112
|
enable_torch_compile: bool = False
|
113
|
+
max_torch_compile_bs: int = 32
|
99
114
|
torchao_config: str = ""
|
100
115
|
enable_p2p_check: bool = False
|
101
116
|
enable_mla: bool = False
|
@@ -152,6 +167,11 @@ class ServerArgs:
|
|
152
167
|
)
|
153
168
|
self.sampling_backend = "pytorch"
|
154
169
|
|
170
|
+
# ROCm: flashinfer available later
|
171
|
+
if is_hip():
|
172
|
+
self.attention_backend = "triton"
|
173
|
+
self.sampling_backend = "pytorch"
|
174
|
+
|
155
175
|
# Default kernel backends
|
156
176
|
if self.enable_mla:
|
157
177
|
logger.info("MLA optimization is tunred on. Use triton backend.")
|
@@ -359,6 +379,12 @@ class ServerArgs:
|
|
359
379
|
default=ServerArgs.random_seed,
|
360
380
|
help="The random seed.",
|
361
381
|
)
|
382
|
+
parser.add_argument(
|
383
|
+
"--constrained-json-whitespace-pattern",
|
384
|
+
type=str,
|
385
|
+
default=ServerArgs.constrained_json_whitespace_pattern,
|
386
|
+
help=r"Regex pattern for syntactic whitespaces allowed in JSON constrained output. For example, to allow the model generate consecutive whitespaces, set the pattern to [\n\t ]*",
|
387
|
+
)
|
362
388
|
parser.add_argument(
|
363
389
|
"--log-level",
|
364
390
|
type=str,
|
@@ -498,6 +524,12 @@ class ServerArgs:
|
|
498
524
|
action="store_true",
|
499
525
|
help="Optimize the model with torch.compile. Experimental feature.",
|
500
526
|
)
|
527
|
+
parser.add_argument(
|
528
|
+
"--max-torch-compile-bs",
|
529
|
+
type=int,
|
530
|
+
default=ServerArgs.max_torch_compile_bs,
|
531
|
+
help="Set the maximum batch size when using torch compile.",
|
532
|
+
)
|
501
533
|
parser.add_argument(
|
502
534
|
"--torchao-config",
|
503
535
|
type=str,
|
@@ -532,7 +564,8 @@ class ServerArgs:
|
|
532
564
|
type=str,
|
533
565
|
nargs="*",
|
534
566
|
default=None,
|
535
|
-
|
567
|
+
action=LoRAPathAction,
|
568
|
+
help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}",
|
536
569
|
)
|
537
570
|
parser.add_argument(
|
538
571
|
"--max-loras-per-batch",
|
sglang/srt/utils.py
CHANGED
@@ -51,6 +51,11 @@ show_time_cost = False
|
|
51
51
|
time_infos = {}
|
52
52
|
|
53
53
|
|
54
|
+
# torch flag AMD GPU
|
55
|
+
def is_hip() -> bool:
|
56
|
+
return torch.version.hip is not None
|
57
|
+
|
58
|
+
|
54
59
|
def enable_show_time_cost():
|
55
60
|
global show_time_cost
|
56
61
|
show_time_cost = True
|
@@ -187,7 +192,7 @@ def allocate_init_ports(
|
|
187
192
|
cur_port += 1
|
188
193
|
|
189
194
|
if port is not None and ret_ports[0] != port:
|
190
|
-
logger.
|
195
|
+
logger.warning(
|
191
196
|
f"WARNING: Port {port} is not available. Use port {ret_ports[0]} instead."
|
192
197
|
)
|
193
198
|
|
@@ -623,56 +628,7 @@ def set_ulimit(target_soft_limit=65535):
|
|
623
628
|
try:
|
624
629
|
resource.setrlimit(resource_type, (target_soft_limit, current_hard))
|
625
630
|
except ValueError as e:
|
626
|
-
logger.
|
627
|
-
|
628
|
-
|
629
|
-
def is_llama3_405b_fp8_head_16(model_config):
|
630
|
-
"""Return whether the model is meta-llama/Meta-Llama-3.1-405B-FP8 with 16 kv heads."""
|
631
|
-
if (
|
632
|
-
model_config.hf_config.architectures[0] == "LlamaForCausalLM"
|
633
|
-
and model_config.hf_config.hidden_size == 16384
|
634
|
-
and model_config.hf_config.intermediate_size == 53248
|
635
|
-
and model_config.hf_config.num_hidden_layers == 126
|
636
|
-
and model_config.hf_config.num_key_value_heads == 16
|
637
|
-
and hasattr(model_config.hf_config, "quantization_config")
|
638
|
-
and model_config.hf_config.quantization_config["quant_method"] == "fbgemm_fp8"
|
639
|
-
):
|
640
|
-
return True
|
641
|
-
return False
|
642
|
-
|
643
|
-
|
644
|
-
def monkey_patch_vllm_qvk_linear_loader():
|
645
|
-
"""A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints."""
|
646
|
-
from vllm.model_executor.layers.linear import QKVParallelLinear
|
647
|
-
|
648
|
-
origin_weight_loader = QKVParallelLinear.weight_loader
|
649
|
-
|
650
|
-
def get_original_weight(loaded_weight, head_dim):
|
651
|
-
n_kv_head = loaded_weight.shape[0] // (2 * head_dim)
|
652
|
-
dim = loaded_weight.shape[1]
|
653
|
-
for i in range(n_kv_head):
|
654
|
-
loaded_weight[i * head_dim : (i + 1) * head_dim, :] = loaded_weight[
|
655
|
-
2 * i * head_dim : (2 * i + 1) * head_dim, :
|
656
|
-
]
|
657
|
-
original_kv_weight = loaded_weight[: n_kv_head * head_dim, :]
|
658
|
-
assert original_kv_weight.shape == (n_kv_head * head_dim, dim)
|
659
|
-
return original_kv_weight
|
660
|
-
|
661
|
-
def weight_loader_srt(
|
662
|
-
self,
|
663
|
-
param: Parameter,
|
664
|
-
loaded_weight: torch.Tensor,
|
665
|
-
loaded_shard_id: Optional[str] = None,
|
666
|
-
):
|
667
|
-
if (
|
668
|
-
loaded_shard_id in ["k", "v"]
|
669
|
-
and loaded_weight.shape[0] == self.head_size * self.total_num_kv_heads * 2
|
670
|
-
):
|
671
|
-
loaded_weight = get_original_weight(loaded_weight, self.head_size)
|
672
|
-
|
673
|
-
origin_weight_loader(self, param, loaded_weight, loaded_shard_id)
|
674
|
-
|
675
|
-
setattr(QKVParallelLinear, "weight_loader", weight_loader_srt)
|
631
|
+
logger.warning(f"Fail to set RLIMIT_NOFILE: {e}")
|
676
632
|
|
677
633
|
|
678
634
|
def add_api_key_middleware(app, api_key: str):
|
sglang/test/test_utils.py
CHANGED
@@ -304,7 +304,6 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
|
|
304
304
|
def select_sglang_backend(args: argparse.Namespace):
|
305
305
|
if args.backend.startswith("srt"):
|
306
306
|
if args.backend == "srt-no-parallel":
|
307
|
-
global_config.enable_parallel_decoding = False
|
308
307
|
global_config.enable_parallel_encoding = False
|
309
308
|
backend = RuntimeEndpoint(f"{args.host}:{args.port}")
|
310
309
|
elif args.backend.startswith("gpt-"):
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.3.1"
|
1
|
+
__version__ = "0.3.1.post1"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.3.1
|
3
|
+
Version: 0.3.1.post1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -318,7 +318,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
|
318
318
|
### Method 2: From source
|
319
319
|
```
|
320
320
|
# Use the last release branch
|
321
|
-
git clone -b v0.3.1 https://github.com/sgl-project/sglang.git
|
321
|
+
git clone -b v0.3.1.post1 https://github.com/sgl-project/sglang.git
|
322
322
|
cd sglang
|
323
323
|
|
324
324
|
pip install --upgrade pip
|
@@ -1,18 +1,18 @@
|
|
1
1
|
sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
|
2
2
|
sglang/api.py,sha256=pH4CjwOXUweL5MF1sIkFMddDxfnF7PyUxEHC5kvNVbI,6468
|
3
|
-
sglang/bench_latency.py,sha256=
|
3
|
+
sglang/bench_latency.py,sha256=CDMrch4QwIyb2DTH2kBIgQ6Q8sGHwtrx3Cz49qZNfpU,17078
|
4
4
|
sglang/bench_serving.py,sha256=6OM5JIDuoxJDg-VLE4ijGGcS8-6ViaidV05lIrZmSzo,36239
|
5
5
|
sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
|
6
|
-
sglang/global_config.py,sha256=
|
6
|
+
sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
|
7
7
|
sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
|
8
8
|
sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
|
9
9
|
sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
|
10
|
-
sglang/version.py,sha256=
|
10
|
+
sglang/version.py,sha256=83xK6WSmRR5ba-i5fDLUmoJT83Eg_dpsWgwcnsUhMpA,28
|
11
11
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
|
13
13
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
14
14
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
15
|
-
sglang/lang/interpreter.py,sha256=
|
15
|
+
sglang/lang/interpreter.py,sha256=rOquFbMzxry7IItZlAn5TwtQfxMy718JPxOkiXO-yrg,30234
|
16
16
|
sglang/lang/ir.py,sha256=W3UfZikcGeT86PDDjDjw-yNzrKY2e2UYO4DTatMCfm0,17704
|
17
17
|
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
18
18
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -25,56 +25,56 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
25
25
|
sglang/srt/conversation.py,sha256=S5w5V6G1xigNxa3UQoSxRcMpQLWWDT9EPBoHBvHkSAk,19663
|
26
26
|
sglang/srt/hf_transformers_utils.py,sha256=6HlqcmGPIvnSGaEEICeuzwag1QylSoSGbXRVvUdIMDo,6016
|
27
27
|
sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
|
28
|
-
sglang/srt/server.py,sha256=
|
29
|
-
sglang/srt/server_args.py,sha256=
|
30
|
-
sglang/srt/utils.py,sha256=
|
28
|
+
sglang/srt/server.py,sha256=n4QRn36_t-HAH-lSME3tiZSCUGRQwqMUckgs0paHq5g,20179
|
29
|
+
sglang/srt/server_args.py,sha256=M1Bm9u2JRsEptne-kw-D-B_29Q-M6V4UpAM7K-JxXAc,23309
|
30
|
+
sglang/srt/utils.py,sha256=8yxiMRttCcfswynkNPWD3yZFNAGFz2P1PzSuxHCBGns,22340
|
31
31
|
sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
|
32
32
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
33
33
|
sglang/srt/configs/model_config.py,sha256=OqHrucJQHbH-wxgkGj-Dcx_B888uUGASpLRjz40HaLY,6651
|
34
34
|
sglang/srt/constrained/__init__.py,sha256=ze8awDPvwAzdeMwzJ-25kXOQ4nVWoaP55jBDt5UOS_4,2070
|
35
35
|
sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
|
36
|
-
sglang/srt/constrained/fsm_cache.py,sha256=
|
36
|
+
sglang/srt/constrained/fsm_cache.py,sha256=k7DRUAaiLTEX5UarfJ17gEYQ-QWQAGfvykjYFkM_Y2U,2982
|
37
37
|
sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
|
38
|
-
sglang/srt/layers/activation.py,sha256=
|
39
|
-
sglang/srt/layers/attention_backend.py,sha256=
|
38
|
+
sglang/srt/layers/activation.py,sha256=awcwOODYcVdUtC2JxJ1TGsV8Tru0eACKcxYN6cWHbl4,5148
|
39
|
+
sglang/srt/layers/attention_backend.py,sha256=lqMsY4VaOO_szIWoTAinXf1DnP2UsbF32kzvwFySz9w,18119
|
40
40
|
sglang/srt/layers/flashinfer_utils.py,sha256=jyaO7XiEisFZg_dfaCbfRCHSHSKYoM1wOzfHa0h1q14,7413
|
41
|
-
sglang/srt/layers/layernorm.py,sha256
|
41
|
+
sglang/srt/layers/layernorm.py,sha256=-9Yph4nnMZYX_Q31MUGAimLajNclHXjgDkswpU2BTos,3694
|
42
42
|
sglang/srt/layers/logits_processor.py,sha256=Js2qSk1Z3uPL2cYO1ARai51f2i8OedV3qdwByQVSJtI,12439
|
43
43
|
sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
|
44
44
|
sglang/srt/layers/radix_attention.py,sha256=EcVO0fUSmgvE_9R-MlpgJq0O_uT8ACuHzbMi19bANYc,1874
|
45
|
-
sglang/srt/layers/sampler.py,sha256=
|
45
|
+
sglang/srt/layers/sampler.py,sha256=Q4u46oYu66e34rBNzr50VoXO8FM-assYiCoROolq3Zs,3661
|
46
46
|
sglang/srt/layers/torchao_utils.py,sha256=rTECwKSXhj_ylh_iSzfbopz9_lZOFHatquQrNJNLZlE,2703
|
47
47
|
sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
|
48
48
|
sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
|
49
|
-
sglang/srt/layers/fused_moe/layer.py,sha256=
|
49
|
+
sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
|
50
50
|
sglang/srt/layers/triton_attention/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
|
51
51
|
sglang/srt/layers/triton_attention/extend_attention.py,sha256=XTUTMrE-5jfMEufQUifZ-8NJQABSPcF47qhnNT5Z1iI,11050
|
52
52
|
sglang/srt/layers/triton_attention/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
|
53
53
|
sglang/srt/lora/lora.py,sha256=ksj866lgDul6zxO30Jm7Nrjv-mFAMrzdvP8sez3Pl6U,14938
|
54
54
|
sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
|
55
|
-
sglang/srt/lora/lora_manager.py,sha256=
|
55
|
+
sglang/srt/lora/lora_manager.py,sha256=7J7cGmyy1Ph4HCvLdM-ViAizAbV1snZqD-S7JLWXasI,9561
|
56
56
|
sglang/srt/managers/controller_multi.py,sha256=KolZDso2WqH1ZhQw9p1eTmlFRgo4bcvzBxE44_sNE_o,6300
|
57
57
|
sglang/srt/managers/controller_single.py,sha256=DiZALP_iIPZQMRx09a-LwT5_Dg7p-WU8HXyMoxJ9sRA,4955
|
58
58
|
sglang/srt/managers/detokenizer_manager.py,sha256=yQkL5gLomLiy1qc6e9HNz8hcj7JQFHm1AfIrzpXaWJE,6852
|
59
59
|
sglang/srt/managers/io_struct.py,sha256=bqmL3NDPLqOn6Au3WLF0NOe8Dh7ECMN7BTHCkEZ_Edk,11247
|
60
60
|
sglang/srt/managers/policy_scheduler.py,sha256=tiBUi2GJU5eQEBK6HfsO1_YjWtFkougo40954DIp4dM,13026
|
61
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
61
|
+
sglang/srt/managers/schedule_batch.py,sha256=ppHYK65GP0dtuCEzpSbGm9uAne5rEoRmW8osLknXJpI,27384
|
62
62
|
sglang/srt/managers/tokenizer_manager.py,sha256=ql-sObjl1oRigJwnLtqqTaaw-i7gPTDMoNXDEMftr40,29643
|
63
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
63
|
+
sglang/srt/managers/tp_worker.py,sha256=4Hhla9rfGYEdQtzGmxlIEqxt_WVkn2dkLLNQZHgpkf0,39270
|
64
64
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
|
65
65
|
sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
|
66
66
|
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
67
67
|
sglang/srt/mem_cache/memory_pool.py,sha256=4br3Ea2bfA-YsF_sPOVHlF2zQzYGd8fVaYTp197yZsE,7871
|
68
68
|
sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
|
69
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
69
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=ZeO-8Mg4Tf0iP-L9FXcyhHfNzGWpTPEDGeUoC2lzHTE,10418
|
70
70
|
sglang/srt/model_executor/forward_batch_info.py,sha256=yvkhayY9Zu6gysoojcGT73lADGOtfHKkFKWdJLRyACI,6141
|
71
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
71
|
+
sglang/srt/model_executor/model_runner.py,sha256=LoQ7OFVwOiK_BfdpRfitss1TfJ8qrysHgWM-xXu7n2Y,22433
|
72
72
|
sglang/srt/models/baichuan.py,sha256=NrG1rMJXhemkrUCEf8xKOSDQVsOD-nN8RQz6MWHOg84,15124
|
73
73
|
sglang/srt/models/chatglm.py,sha256=KwxLHBEvK02McXDvBS0gnRxfIvOAu2QP7lgibrj9Nbc,13371
|
74
74
|
sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
|
75
75
|
sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
|
76
76
|
sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
|
77
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
77
|
+
sglang/srt/models/deepseek_v2.py,sha256=bPaGRL8ieBCXKIf-KY7-D9Rus7Qj3VGvvtERzAXAZWs,28421
|
78
78
|
sglang/srt/models/exaone.py,sha256=3I5ZoiLotf7U-8c9QJRubpgf6JDx9I_z-ViXQlCC-x8,13087
|
79
79
|
sglang/srt/models/gemma.py,sha256=GkwgGFHgGlXgBZN7s7Wooz5tMyCp1YtgLahU2NOo66M,12273
|
80
80
|
sglang/srt/models/gemma2.py,sha256=sFfCNEm0_OOWElRSTDuroRv8wNMX8v_81Uko9m546KA,14923
|
@@ -87,10 +87,11 @@ sglang/srt/models/llama_embedding.py,sha256=RI2mpYheP5WwhuTINU-6IrU61usuMyCK9h2z
|
|
87
87
|
sglang/srt/models/llava.py,sha256=O4XGdl70Hh4tM_OHapFGHbReC82mbe9xLw6GELKWKhU,24881
|
88
88
|
sglang/srt/models/llavavid.py,sha256=ou5uIuskBoBo0lXvqFFfDLBYYVfehx27n-Lu8X9gpLs,11992
|
89
89
|
sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
|
90
|
-
sglang/srt/models/minicpm3.py,sha256=
|
90
|
+
sglang/srt/models/minicpm3.py,sha256=_C96kO3qGK0KRctXZf8LBR9s0sEW0QXWSGU0Vf6OrI8,25206
|
91
91
|
sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
|
92
92
|
sglang/srt/models/mixtral.py,sha256=oRC7mKBrPJhvzkWSabrbeQQQac-jtF4EV6H2Sgjc5JY,13897
|
93
93
|
sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
|
94
|
+
sglang/srt/models/olmoe.py,sha256=d0ECpU-IXXwGYg9tkVeMARUbqVcqEnWfpH3rrNiGKA0,15336
|
94
95
|
sglang/srt/models/qwen.py,sha256=nqSRzkiZzpRVG6WGQ1MBUclQnXyw8jlvoOq-euM8j5s,9954
|
95
96
|
sglang/srt/models/qwen2.py,sha256=9_M-VkHN1_T1XN-gsl_L636QMQ9BLF2WqvTcx_1L6aw,12432
|
96
97
|
sglang/srt/models/qwen2_moe.py,sha256=s7b5XnSvsBYtZZUkjPp442m59CqPJ3HxGUIwXBVWsXw,17153
|
@@ -100,7 +101,7 @@ sglang/srt/models/xverse_moe.py,sha256=YR--WZ33G7XEMsS7ZJl1cQ62Q8PDo9gWqpvJBY_cb
|
|
100
101
|
sglang/srt/models/yivl.py,sha256=B6MELthWIm5KdSzX3o2tbbpApY8XdjUdmcQSD4dQe_I,4835
|
101
102
|
sglang/srt/openai_api/adapter.py,sha256=CJ47YftRHAip1FMcHIhtCorBtzlIkv7F0Wz_JUcI4T4,51032
|
102
103
|
sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
|
103
|
-
sglang/srt/sampling/sampling_batch_info.py,sha256=
|
104
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=GewqyxCrW2PFwuzGHaCR59Pvw6j0n2dKGrlJWYQWwW4,6149
|
104
105
|
sglang/srt/sampling/sampling_params.py,sha256=ggOXxafqfCD-xrGYcM57byLZ79CIeBP4AD5F44L_CW0,5635
|
105
106
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
106
107
|
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
|
@@ -120,10 +121,10 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
120
121
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
121
122
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
122
123
|
sglang/test/test_programs.py,sha256=3-XKnppQdCNWjaJb6jwib5Z9OSpgKvH8SFLJbE4J9qI,17001
|
123
|
-
sglang/test/test_utils.py,sha256=
|
124
|
+
sglang/test/test_utils.py,sha256=NLiJqFRWnCeQ-gdCBe0ubNFCsig1CPb1EU-Ay9CtSfU,17109
|
124
125
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
|
125
|
-
sglang-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
126
|
-
sglang-0.3.1.dist-info/METADATA,sha256=
|
127
|
-
sglang-0.3.1.dist-info/WHEEL,sha256=
|
128
|
-
sglang-0.3.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
129
|
-
sglang-0.3.1.dist-info/RECORD,,
|
126
|
+
sglang-0.3.1.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
127
|
+
sglang-0.3.1.post1.dist-info/METADATA,sha256=zswdq5UTi5aLVmpEyjnc7SzIi60yc4w2hlMhckdxmcU,38137
|
128
|
+
sglang-0.3.1.post1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
129
|
+
sglang-0.3.1.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
130
|
+
sglang-0.3.1.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|