sglang 0.4.3__py3-none-any.whl → 0.4.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/lang/backend/openai.py +5 -0
- sglang/lang/chat_template.py +22 -7
- sglang/lang/ir.py +1 -0
- sglang/srt/configs/__init__.py +6 -3
- sglang/srt/configs/model_config.py +2 -0
- sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
- sglang/srt/entrypoints/engine.py +17 -2
- sglang/srt/hf_transformers_utils.py +2 -3
- sglang/srt/layers/attention/flashinfer_backend.py +101 -30
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/managers/image_processor.py +217 -122
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/model_executor/forward_batch_info.py +4 -1
- sglang/srt/model_executor/model_runner.py +1 -0
- sglang/srt/models/deepseek_nextn.py +295 -0
- sglang/srt/models/deepseek_v2.py +9 -3
- sglang/srt/models/llava.py +2 -1
- sglang/srt/models/qwen2_5_vl.py +722 -0
- sglang/srt/models/qwen2_vl.py +2 -1
- sglang/srt/openai_api/adapter.py +17 -3
- sglang/srt/server_args.py +6 -3
- sglang/srt/speculative/eagle_worker.py +7 -2
- sglang/srt/speculative/spec_info.py +11 -1
- sglang/utils.py +99 -19
- sglang/version.py +1 -1
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/METADATA +3 -3
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/RECORD +43 -27
- sglang/srt/configs/qwen2vl.py +0 -130
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/WHEEL +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/top_level.txt +0 -0
sglang/srt/models/qwen2_vl.py
CHANGED
@@ -31,8 +31,9 @@ import torch
|
|
31
31
|
import torch.nn as nn
|
32
32
|
import torch.nn.functional as F
|
33
33
|
from einops import rearrange
|
34
|
+
from transformers import Qwen2VLConfig
|
35
|
+
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
|
34
36
|
|
35
|
-
from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
|
36
37
|
from sglang.srt.hf_transformers_utils import get_processor
|
37
38
|
from sglang.srt.layers.activation import QuickGELU
|
38
39
|
from sglang.srt.layers.attention.vision import VisionAttention
|
sglang/srt/openai_api/adapter.py
CHANGED
@@ -20,12 +20,14 @@ import os
|
|
20
20
|
import time
|
21
21
|
import uuid
|
22
22
|
from http import HTTPStatus
|
23
|
-
from typing import Dict, List
|
23
|
+
from typing import Dict, List
|
24
24
|
|
25
25
|
from fastapi import HTTPException, Request, UploadFile
|
26
26
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
27
27
|
from pydantic import ValidationError
|
28
28
|
|
29
|
+
from sglang.lang.chat_template import get_chat_template_by_model_path
|
30
|
+
|
29
31
|
try:
|
30
32
|
from outlines.fsm.json_schema import convert_json_schema_to_str
|
31
33
|
except ImportError:
|
@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {}
|
|
92
94
|
# map file id to file path in SGLang backend
|
93
95
|
file_id_storage: Dict[str, str] = {}
|
94
96
|
|
95
|
-
|
96
97
|
# backend storage directory
|
97
98
|
storage_dir = None
|
98
99
|
|
@@ -116,12 +117,13 @@ def create_streaming_error_response(
|
|
116
117
|
return json_str
|
117
118
|
|
118
119
|
|
119
|
-
def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
|
120
|
+
def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg, model_path):
|
120
121
|
global chat_template_name
|
121
122
|
|
122
123
|
logger.info(
|
123
124
|
f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}"
|
124
125
|
)
|
126
|
+
|
125
127
|
if not chat_template_exists(chat_template_arg):
|
126
128
|
if not os.path.exists(chat_template_arg):
|
127
129
|
raise RuntimeError(
|
@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
|
|
163
165
|
else:
|
164
166
|
chat_template_name = chat_template_arg
|
165
167
|
|
168
|
+
# check chat-template
|
169
|
+
chat_template = get_chat_template_by_model_path(model_path)
|
170
|
+
if chat_template is not None:
|
171
|
+
official_chat_template = chat_template.name
|
172
|
+
used_chat_template = chat_template_name
|
173
|
+
if official_chat_template != used_chat_template:
|
174
|
+
logger.warning(
|
175
|
+
f"Using a chat_template: '{used_chat_template}', "
|
176
|
+
f"which is different from official chat template: '{official_chat_template}', "
|
177
|
+
f"This discrepancy may lead to performance degradation."
|
178
|
+
)
|
179
|
+
|
166
180
|
|
167
181
|
async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None):
|
168
182
|
try:
|
sglang/srt/server_args.py
CHANGED
@@ -262,14 +262,17 @@ class ServerArgs:
|
|
262
262
|
)
|
263
263
|
|
264
264
|
# Speculative Decoding
|
265
|
-
if
|
265
|
+
if (
|
266
|
+
self.speculative_algorithm == "EAGLE"
|
267
|
+
or self.speculative_algorithm == "NEXTN"
|
268
|
+
):
|
266
269
|
self.prefill_only_one_req = True
|
267
270
|
self.disable_cuda_graph_padding = True
|
268
271
|
self.disable_radix_cache = True
|
269
272
|
self.disable_overlap_schedule = True
|
270
273
|
self.chunked_prefill_size = -1
|
271
274
|
logger.info(
|
272
|
-
"The radix cache, chunked prefill, and overlap scheduler are disabled because of using
|
275
|
+
f"The radix cache, chunked prefill, and overlap scheduler are disabled because of using {self.speculative_algorithm} speculative decoding."
|
273
276
|
)
|
274
277
|
|
275
278
|
# GGUF
|
@@ -705,7 +708,7 @@ class ServerArgs:
|
|
705
708
|
parser.add_argument(
|
706
709
|
"--speculative-algorithm",
|
707
710
|
type=str,
|
708
|
-
choices=["EAGLE"],
|
711
|
+
choices=["EAGLE", "NEXTN"],
|
709
712
|
help="Speculative algorithm.",
|
710
713
|
)
|
711
714
|
parser.add_argument(
|
@@ -24,6 +24,7 @@ from sglang.srt.speculative.eagle_utils import (
|
|
24
24
|
fast_topk,
|
25
25
|
select_top_k_tokens,
|
26
26
|
)
|
27
|
+
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
27
28
|
|
28
29
|
logger = logging.getLogger(__name__)
|
29
30
|
|
@@ -57,11 +58,15 @@ class EAGLEWorker(TpModelWorker):
|
|
57
58
|
# Parse arguments
|
58
59
|
self.topk = server_args.speculative_eagle_topk
|
59
60
|
self.speculative_num_steps = server_args.speculative_num_steps
|
61
|
+
self.speculative_algorithm = SpeculativeAlgorithm.from_string(
|
62
|
+
server_args.speculative_algorithm
|
63
|
+
)
|
60
64
|
self.server_args = server_args
|
61
65
|
|
62
66
|
# Share the embedding and lm_head
|
63
|
-
|
64
|
-
|
67
|
+
if not self.speculative_algorithm.is_nextn():
|
68
|
+
embed, head = self.target_worker.model_runner.model.get_embed_and_head()
|
69
|
+
self.model_runner.model.set_embed_and_head(embed, head)
|
65
70
|
self.model_runner.server_args.disable_cuda_graph = backup_disable_cuda_graph
|
66
71
|
|
67
72
|
# Create multi-step attn backends and cuda graph runners
|
@@ -5,18 +5,28 @@ class SpeculativeAlgorithm(IntEnum):
|
|
5
5
|
NONE = auto()
|
6
6
|
EAGLE = auto()
|
7
7
|
|
8
|
+
# NEXTN spec decoding is for DeepSeek V3/R1
|
9
|
+
# currently it's implemented based on EAGLE
|
10
|
+
NEXTN = auto()
|
11
|
+
|
8
12
|
def is_none(self):
|
9
13
|
return self == SpeculativeAlgorithm.NONE
|
10
14
|
|
11
15
|
def is_eagle(self):
|
12
|
-
return self == SpeculativeAlgorithm.EAGLE
|
16
|
+
return self == SpeculativeAlgorithm.EAGLE or self == SpeculativeAlgorithm.NEXTN
|
17
|
+
|
18
|
+
def is_nextn(self):
|
19
|
+
return self == SpeculativeAlgorithm.NEXTN
|
13
20
|
|
14
21
|
@staticmethod
|
15
22
|
def from_string(name: str):
|
16
23
|
name_map = {
|
17
24
|
"EAGLE": SpeculativeAlgorithm.EAGLE,
|
25
|
+
"NEXTN": SpeculativeAlgorithm.NEXTN,
|
18
26
|
None: SpeculativeAlgorithm.NONE,
|
19
27
|
}
|
28
|
+
if name is not None:
|
29
|
+
name = name.upper()
|
20
30
|
return name_map[name]
|
21
31
|
|
22
32
|
|
sglang/utils.py
CHANGED
@@ -306,22 +306,112 @@ def download_and_cache_file(url: str, filename: Optional[str] = None):
|
|
306
306
|
return filename
|
307
307
|
|
308
308
|
|
309
|
-
|
309
|
+
import fcntl
|
310
|
+
|
311
|
+
|
312
|
+
def is_in_ci():
|
313
|
+
from sglang.test.test_utils import is_in_ci
|
314
|
+
|
315
|
+
return is_in_ci()
|
316
|
+
|
317
|
+
|
318
|
+
LOCKFILE = os.path.expanduser("~/.sglang_port_lock")
|
319
|
+
PORT_REGISTRY = os.path.expanduser("~/.sglang_port_registry.json")
|
320
|
+
|
321
|
+
if not os.path.exists(LOCKFILE):
|
322
|
+
with open(LOCKFILE, "w") as f:
|
323
|
+
pass
|
324
|
+
|
325
|
+
if not os.path.exists(PORT_REGISTRY):
|
326
|
+
with open(PORT_REGISTRY, "w") as f:
|
327
|
+
json.dump([], f)
|
328
|
+
|
329
|
+
|
330
|
+
def print_highlight(html_content: str):
|
331
|
+
if is_in_ci():
|
332
|
+
html_content = str(html_content).replace("\n", "<br>")
|
333
|
+
display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
|
334
|
+
else:
|
335
|
+
print(html_content)
|
336
|
+
|
337
|
+
|
338
|
+
def init_port_registry():
|
339
|
+
"""Initialize the port registry file if it doesn't exist."""
|
340
|
+
if not os.path.exists(PORT_REGISTRY):
|
341
|
+
with open(PORT_REGISTRY, "w") as f:
|
342
|
+
json.dump([], f)
|
343
|
+
|
344
|
+
|
345
|
+
def reserve_port(start=30000, end=40000):
|
346
|
+
"""
|
347
|
+
Reserve an available port using a file lock and a registry.
|
348
|
+
Returns the allocated port.
|
310
349
|
"""
|
311
|
-
|
350
|
+
init_port_registry()
|
351
|
+
with open(LOCKFILE, "w") as lock:
|
352
|
+
fcntl.flock(lock, fcntl.LOCK_EX)
|
353
|
+
try:
|
354
|
+
with open(PORT_REGISTRY, "r") as f:
|
355
|
+
used = json.load(f)
|
356
|
+
except Exception:
|
357
|
+
used = []
|
358
|
+
for port in range(start, end):
|
359
|
+
if port not in used:
|
360
|
+
used.append(port)
|
361
|
+
with open(PORT_REGISTRY, "w") as f:
|
362
|
+
json.dump(used, f)
|
363
|
+
return port
|
364
|
+
raise RuntimeError("No free port available")
|
365
|
+
|
366
|
+
|
367
|
+
def release_port(port):
|
368
|
+
"""Release the reserved port by removing it from the registry."""
|
369
|
+
with open(LOCKFILE, "w") as lock:
|
370
|
+
fcntl.flock(lock, fcntl.LOCK_EX)
|
371
|
+
try:
|
372
|
+
with open(PORT_REGISTRY, "r") as f:
|
373
|
+
used = json.load(f)
|
374
|
+
except Exception:
|
375
|
+
used = []
|
376
|
+
if port in used:
|
377
|
+
used.remove(port)
|
378
|
+
with open(PORT_REGISTRY, "w") as f:
|
379
|
+
json.dump(used, f)
|
312
380
|
|
313
|
-
|
314
|
-
|
315
|
-
Returns:
|
316
|
-
subprocess.Popen: Process handle
|
381
|
+
|
382
|
+
def execute_shell_command(command: str) -> subprocess.Popen:
|
317
383
|
"""
|
318
|
-
|
384
|
+
Execute a shell command and return its process handle.
|
385
|
+
"""
|
386
|
+
# Replace newline continuations and split the command string.
|
319
387
|
command = command.replace("\\\n", " ").replace("\\", " ")
|
320
388
|
parts = command.split()
|
321
|
-
|
322
389
|
return subprocess.Popen(parts, text=True, stderr=subprocess.STDOUT)
|
323
390
|
|
324
391
|
|
392
|
+
def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
|
393
|
+
"""
|
394
|
+
Launch the server using the given command.
|
395
|
+
If no port is specified, a free port is reserved.
|
396
|
+
"""
|
397
|
+
if port is None:
|
398
|
+
port = reserve_port()
|
399
|
+
full_command = f"{command} --port {port}"
|
400
|
+
process = execute_shell_command(full_command)
|
401
|
+
return process, port
|
402
|
+
|
403
|
+
|
404
|
+
def terminate_process(process, port=None):
|
405
|
+
"""
|
406
|
+
Terminate the process and, if a port was reserved, release it.
|
407
|
+
"""
|
408
|
+
from sglang.srt.utils import kill_process_tree
|
409
|
+
|
410
|
+
kill_process_tree(process.pid)
|
411
|
+
if port is not None:
|
412
|
+
release_port(port)
|
413
|
+
|
414
|
+
|
325
415
|
def wait_for_server(base_url: str, timeout: int = None) -> None:
|
326
416
|
"""Wait for the server to be ready by polling the /v1/models endpoint.
|
327
417
|
|
@@ -343,6 +433,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
|
|
343
433
|
NOTE: Typically, the server runs in a separate terminal.
|
344
434
|
In this notebook, we run the server and notebook code together, so their outputs are combined.
|
345
435
|
To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
|
436
|
+
We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.
|
346
437
|
"""
|
347
438
|
)
|
348
439
|
break
|
@@ -353,17 +444,6 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
|
|
353
444
|
time.sleep(1)
|
354
445
|
|
355
446
|
|
356
|
-
def terminate_process(process):
|
357
|
-
from sglang.srt.utils import kill_process_tree
|
358
|
-
|
359
|
-
kill_process_tree(process.pid)
|
360
|
-
|
361
|
-
|
362
|
-
def print_highlight(html_content: str):
|
363
|
-
html_content = str(html_content).replace("\n", "<br>")
|
364
|
-
display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
|
365
|
-
|
366
|
-
|
367
447
|
class TypeBasedDispatcher:
|
368
448
|
def __init__(self, mapping: List[Tuple[Type, Callable]]):
|
369
449
|
self._mapping = mapping
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.3"
|
1
|
+
__version__ = "0.4.3.post2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.3
|
3
|
+
Version: 0.4.3.post2
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -235,7 +235,7 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
|
235
235
|
Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
|
236
236
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
237
237
|
Requires-Dist: uvloop; extra == "runtime-common"
|
238
|
-
Requires-Dist: xgrammar
|
238
|
+
Requires-Dist: xgrammar==0.1.10; extra == "runtime-common"
|
239
239
|
Requires-Dist: ninja; extra == "runtime-common"
|
240
240
|
Provides-Extra: srt
|
241
241
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
@@ -243,7 +243,7 @@ Requires-Dist: cuda-python; extra == "srt"
|
|
243
243
|
Requires-Dist: sgl-kernel>=0.0.3.post6; extra == "srt"
|
244
244
|
Requires-Dist: torch; extra == "srt"
|
245
245
|
Requires-Dist: vllm<=0.7.2,>=0.6.4.post1; extra == "srt"
|
246
|
-
Requires-Dist: flashinfer_python>=0.2.1.
|
246
|
+
Requires-Dist: flashinfer_python>=0.2.1.post2; extra == "srt"
|
247
247
|
Requires-Dist: outlines<=0.1.11,>=0.0.44; extra == "srt"
|
248
248
|
Provides-Extra: srt-hip
|
249
249
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
@@ -9,20 +9,20 @@ sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
|
|
9
9
|
sglang/global_config.py,sha256=crt5cernXnDa1iQ8kGOq_ScTFclRlTQbJ-atFHM7I5I,1330
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
|
-
sglang/utils.py,sha256=
|
13
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/utils.py,sha256=9fm5ghtYPXqsWKjUzlQKJIoH5iFit6Rz21RhyaC3YL4,15673
|
13
|
+
sglang/version.py,sha256=AJcJAUlaCr4igIr4vVaRiJeU678q5BdTdso-33Tq98k,28
|
14
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
sglang/lang/chat_template.py,sha256=
|
15
|
+
sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
|
16
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
17
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
18
|
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
19
|
-
sglang/lang/ir.py,sha256=
|
19
|
+
sglang/lang/ir.py,sha256=YQlEX2eYMAVHG12xJ2Jds6S6el45_O-udsXJumpEoEQ,18552
|
20
20
|
sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
|
21
21
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
23
23
|
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
24
24
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
25
|
-
sglang/lang/backend/openai.py,sha256=
|
25
|
+
sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
|
26
26
|
sglang/lang/backend/runtime_endpoint.py,sha256=gM97bi8Kv8sLzCDJnH5ZZTQ9I6t31CeVUve7qdTsopo,16755
|
27
27
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
28
28
|
sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
|
@@ -30,21 +30,21 @@ sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
|
30
30
|
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
31
31
|
sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
|
32
32
|
sglang/srt/function_call_parser.py,sha256=YmagXt1BIuTbeiWmSleZwJFCFR5r5EFqVQqKnJDYXiE,19568
|
33
|
-
sglang/srt/hf_transformers_utils.py,sha256=
|
33
|
+
sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
|
34
34
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
35
35
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
36
36
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
37
|
-
sglang/srt/server_args.py,sha256=
|
37
|
+
sglang/srt/server_args.py,sha256=C7zyFuYidgt__ZaqK8tNV9zPByQNaLyUNMOogBzBjXM,41128
|
38
38
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
39
39
|
sglang/srt/utils.py,sha256=RVU-OORgeVQICMPzj17KHxbDdSYGOKFBnNR4dZejP9A,46780
|
40
|
-
sglang/srt/configs/__init__.py,sha256=
|
40
|
+
sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
|
41
41
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
42
42
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
43
43
|
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
44
44
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
45
45
|
sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoHgQQ0,3107
|
46
|
-
sglang/srt/configs/model_config.py,sha256=
|
47
|
-
sglang/srt/configs/
|
46
|
+
sglang/srt/configs/model_config.py,sha256=MPC1XJox6wo0Ut1LJ-05flKWlA95ZuzVKaDP9il4hD4,17023
|
47
|
+
sglang/srt/configs/qwen2_5_vl_config.py,sha256=J8jq6QwseIOgqXQ3nuEX_yRVMNbyYjleZbf4nEhniGk,48184
|
48
48
|
sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
|
49
49
|
sglang/srt/constrained/outlines_backend.py,sha256=yPYgz44n-rSCStGGkS1lGazFiQzN7gqwSvpJ2YG0co4,7081
|
50
50
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
@@ -61,7 +61,7 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
|
|
61
61
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
62
62
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
63
63
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
64
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
64
|
+
sglang/srt/entrypoints/engine.py,sha256=t2UhSOnr22BBCyoIhDJZIcmthQOQcL1iaB06LZqqAnU,17555
|
65
65
|
sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
|
66
66
|
sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
|
67
67
|
sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
|
@@ -77,7 +77,7 @@ sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSa
|
|
77
77
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
|
78
78
|
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
79
79
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=4mVyFPfZxPTwkQHGNCfI_4hQ8CbsWXJfxz-IQW77gAc,9143
|
80
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
80
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=MnQCTvOOPXy4RXvvZNSMjm1nPHXoysQ1cWGVf_yilQU,50518
|
81
81
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
|
82
82
|
sglang/srt/layers/attention/triton_backend.py,sha256=mbYaYKHYrUyL2zEXrPtDRIcvVNe6L-bmcdLhKF92V-0,21292
|
83
83
|
sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
|
@@ -124,6 +124,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
|
|
124
124
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
|
125
125
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
|
126
126
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
127
|
+
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
|
127
128
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
|
128
129
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
|
129
130
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
|
@@ -210,24 +211,30 @@ sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttr
|
|
210
211
|
sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
|
211
212
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
212
213
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=CPo1WRF0HgsQMPBkvpoImElQMrfwpJLhEvL86e6fkPU,3247
|
214
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9qdyh6ki9LAyq7VDO9WMRmBOPWKSrZhU-I7z1E9bTKA,550
|
213
215
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
214
216
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
|
215
217
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9ya9f1Nt0g0RD-6sRRBZOaIPLSpSFZCz7jNvqTPrgFE,3732
|
216
218
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
217
219
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=UZljnxxCSjwnZlX3OgKWZJGXCf5BWF_agEpNX8I4Zxc,3248
|
218
220
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
221
|
+
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6POXzQZHdNwcBDv1w6BJKbLMRDt0jbFUuMsMNf-ToEs,549
|
219
222
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
220
223
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3matoCzEe4aexwoe7YTmkjyE4NA8khWXjL5EySuNwzA,3254
|
221
224
|
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
|
225
|
+
"sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=JeXNLkbMAjdDKV-WpzQy87SXN06towo3xUofLtvYCQI,551
|
222
226
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
|
223
227
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9f8Ib4gLEFSfdNpO8IL8uiONImvqnlPbJrZ0HM3OB-o,3247
|
224
228
|
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
|
229
|
+
"sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Boa83ZSPZ0LvzqtfLGvois5QK4TmJfwjA2n96c9ET58,549
|
225
230
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
|
226
231
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
227
232
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tG5_iVeRBHTgHX-liOf79nWRjj_lUZ-NQWTbBrBgORQ,3246
|
228
233
|
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
234
|
+
"sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9tCZxJ0eAD7AYMH7OqS3AGppJUllKnJLNvMq7FMXdsA,552
|
229
235
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
230
236
|
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
|
237
|
+
"sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=QMVfMXS0Yjgob8_9xps1xuZi6KnY5l2MeKxXLRjTeg4,548
|
231
238
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
232
239
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
|
233
240
|
"sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bSxTaptdcgj27mQGmdUmQtYTn4V_8EcmtRaVNigKjLA,3730
|
@@ -240,6 +247,7 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
240
247
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
|
241
248
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=B0lo3SuoQXhBEnojH2TwpVeurvlKD8yI8kQrJ5ORhWU,3249
|
242
249
|
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
|
250
|
+
"sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZYZ03ziPGwG2sCasEYLj7ZIP7vNO8UNBR5qNTmKgRMs,549
|
243
251
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
|
244
252
|
"sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
|
245
253
|
"sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
|
@@ -266,22 +274,28 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
|
|
266
274
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
267
275
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
|
268
276
|
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
277
|
+
"sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FcuzcmKFf2RbaUpAaAsuObUefcGMgNPMDbVdHXRkoGY,549
|
269
278
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
270
279
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=h32lCTFilLlyKbMeuJvNWG1v0yJJzNj93kwSvlrHfaY,3249
|
271
280
|
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
|
281
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kjQ_kvF38bZGcmaeJGSJsSR0NcUjUOh3LZ2-5c4kPvE,550
|
272
282
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
|
273
283
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0jX-z2lTgVw7ABLmWsIsQdqW4EjmbXKRDHye_XPLCAE,3245
|
274
284
|
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
|
285
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=mZGU39sobtUqNYKjtyIGjhOZyCOQFJMF3MinA1zjTJA,550
|
275
286
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
|
276
287
|
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_exM3wJ3FMmGHweBcH-8IxwZBzaOmPaF3ScMM6KDpiY,3253
|
288
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ue2oWml2ouUTZelYx5Nt5pgCmY-ib3mLV1reJL9ZudE,550
|
277
289
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
|
278
290
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
|
279
291
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENRWYdUwI0ooHb6IwcHliupRWOPnw-7-WtxZB-qQGJI,3245
|
280
292
|
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
293
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Nv9KP_KLGsRJdJF755dZBvbTws37u1GM2UigMRlAtl0,552
|
281
294
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
|
282
295
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
283
296
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TdWuE2RIsIyr4Im24MuWK3XyiNtbhO_hAiAXDz5gNUk,3246
|
284
297
|
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
|
298
|
+
"sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZOWEo__oUy8AhJiAlRCuGNAZNdNweFdWBFptJYkwxs8,552
|
285
299
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
286
300
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
|
287
301
|
"sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sTvaJ0RiCaQem4F1z7oES6RVRJ2gKgBuccX13S1SqGc,3733
|
@@ -318,9 +332,9 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
|
|
318
332
|
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
319
333
|
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
320
334
|
sglang/srt/managers/detokenizer_manager.py,sha256=XC2INyykOgwmIrFEGc-zf6LGZ5mMt6oPZt1YRXW_cbY,9650
|
321
|
-
sglang/srt/managers/image_processor.py,sha256=
|
335
|
+
sglang/srt/managers/image_processor.py,sha256=AWtCjl_zCbcn5LD4Hp4NXmsu225lQE0gWixIhQuUMpE,23872
|
322
336
|
sglang/srt/managers/io_struct.py,sha256=9jhu794cc_BljFmVL6kQseTHGZNwEzONdlGEy_wjAcA,18357
|
323
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
337
|
+
sglang/srt/managers/schedule_batch.py,sha256=70smg65ed4xbotRegd8NzRu1YKA6M0oKn2Q69i0qWgA,49246
|
324
338
|
sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
|
325
339
|
sglang/srt/managers/scheduler.py,sha256=w0FPjiU5MoyP58UdJoPBr-hf-WmlWPpqb-5TSJDJBLo,71908
|
326
340
|
sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
|
@@ -336,8 +350,8 @@ sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll5
|
|
336
350
|
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
337
351
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
338
352
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=hH646E_c4UlclGEawPDjg4KHgTUEk70WrPl6C7nnltM,18774
|
339
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
340
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
353
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=cTyRuJVBTBmkP4LAfScRSRrpjLCq7UfmUKoXuU5LZUw,15098
|
354
|
+
sglang/srt/model_executor/model_runner.py,sha256=FNi5BTrpQ5VQ-VsMHujaAuSno3Y3DYYjymzPJLJoOIM,33609
|
341
355
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
342
356
|
sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
|
343
357
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
@@ -347,7 +361,8 @@ sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,
|
|
347
361
|
sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
|
348
362
|
sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
|
349
363
|
sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
|
350
|
-
sglang/srt/models/
|
364
|
+
sglang/srt/models/deepseek_nextn.py,sha256=QmzByVDFw8F5cJfBU4-VVryXovn4HxvGBwbBTfJavJg,11740
|
365
|
+
sglang/srt/models/deepseek_v2.py,sha256=vjY8BznqlEjX4P_mZQp1Syv24YxVee9Q258O8KWqE8I,39429
|
351
366
|
sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
|
352
367
|
sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
|
353
368
|
sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
|
@@ -363,7 +378,7 @@ sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJ
|
|
363
378
|
sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
|
364
379
|
sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
|
365
380
|
sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
|
366
|
-
sglang/srt/models/llava.py,sha256=
|
381
|
+
sglang/srt/models/llava.py,sha256=Qbh26DcC6djw5G8olq0AC0WqzkkRVsiuT8I6RPCpH0o,26384
|
367
382
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
368
383
|
sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
|
369
384
|
sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
|
@@ -378,16 +393,17 @@ sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15
|
|
378
393
|
sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
|
379
394
|
sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
|
380
395
|
sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
|
396
|
+
sglang/srt/models/qwen2_5_vl.py,sha256=uSZEoCdyOlaANjnP21LxE7K_DqfG10JQ5sUkK6Ase2A,28045
|
381
397
|
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
382
398
|
sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
|
383
|
-
sglang/srt/models/qwen2_vl.py,sha256=
|
399
|
+
sglang/srt/models/qwen2_vl.py,sha256=1LM4iyE4rHFRgP58hSFpKgZdaew_OSdwGRwwy3NiOzo,23523
|
384
400
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
385
401
|
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
386
402
|
sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
|
387
403
|
sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
|
388
404
|
sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
|
389
405
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
390
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
406
|
+
sglang/srt/openai_api/adapter.py,sha256=tPsZ6cHlEofwJU7Cmfi3KtwSqvd3sv6EyeV6BfkdAcU,62349
|
391
407
|
sglang/srt/openai_api/protocol.py,sha256=UInFUKQqS8KWLrCzA6s5_uaNC6xAUAAJ4WepQzQ7xpo,11845
|
392
408
|
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
393
409
|
sglang/srt/sampling/sampling_batch_info.py,sha256=Ry1N79T9QQY_HJ8GjM50_W4tzKFxMtTfV4GccT7NQ0w,15129
|
@@ -401,8 +417,8 @@ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8
|
|
401
417
|
sglang/srt/speculative/build_eagle_tree.py,sha256=zWthboIgzPzSOXcGxDpDv0rBOQP55HYGrBKGqm2gWF0,20732
|
402
418
|
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FY4hcwd0Blx7AXbeX6quaXPNgWA8WGIqVcQiEgHyERk,8002
|
403
419
|
sglang/srt/speculative/eagle_utils.py,sha256=ypjVmVTVzCGclOVHRMJxdLUSPkf1-7bNXQS0oP6dn5U,25644
|
404
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
405
|
-
sglang/srt/speculative/spec_info.py,sha256=
|
420
|
+
sglang/srt/speculative/eagle_worker.py,sha256=w7sLcW-EeE_iWyMJQhBuSo5Zvq6iPe-3m73-OIP1b-E,13153
|
421
|
+
sglang/srt/speculative/spec_info.py,sha256=RWG4ik4Dah_V74mgP0gza6UaYFtN-BRV6aJZsHHGGtE,827
|
406
422
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
407
423
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
408
424
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
@@ -419,8 +435,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
|
|
419
435
|
sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
|
420
436
|
sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
|
421
437
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
422
|
-
sglang-0.4.3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
423
|
-
sglang-0.4.3.dist-info/METADATA,sha256=
|
424
|
-
sglang-0.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
425
|
-
sglang-0.4.3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
426
|
-
sglang-0.4.3.dist-info/RECORD,,
|
438
|
+
sglang-0.4.3.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
439
|
+
sglang-0.4.3.post2.dist-info/METADATA,sha256=kChnxBYuvq-HiSlLlTj6l3bOcIj_mLFuOsSQpUMGkZE,23821
|
440
|
+
sglang-0.4.3.post2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
441
|
+
sglang-0.4.3.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
442
|
+
sglang-0.4.3.post2.dist-info/RECORD,,
|