sglang 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/lang/backend/openai.py +5 -0
- sglang/lang/chat_template.py +22 -7
- sglang/lang/ir.py +1 -0
- sglang/srt/configs/__init__.py +6 -3
- sglang/srt/configs/model_config.py +2 -0
- sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
- sglang/srt/entrypoints/engine.py +16 -1
- sglang/srt/hf_transformers_utils.py +2 -3
- sglang/srt/managers/image_processor.py +217 -122
- sglang/srt/model_executor/forward_batch_info.py +4 -1
- sglang/srt/models/deepseek_nextn.py +295 -0
- sglang/srt/models/deepseek_v2.py +4 -1
- sglang/srt/models/llava.py +2 -1
- sglang/srt/models/qwen2_5_vl.py +722 -0
- sglang/srt/models/qwen2_vl.py +2 -1
- sglang/srt/openai_api/adapter.py +17 -3
- sglang/srt/server_args.py +6 -3
- sglang/srt/speculative/eagle_worker.py +7 -2
- sglang/srt/speculative/spec_info.py +11 -1
- sglang/utils.py +99 -19
- sglang/version.py +1 -1
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/METADATA +2 -2
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/RECORD +26 -24
- sglang/srt/configs/qwen2vl.py +0 -130
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/WHEEL +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post1.dist-info}/top_level.txt +0 -0
sglang/srt/models/qwen2_vl.py
CHANGED
@@ -31,8 +31,9 @@ import torch
|
|
31
31
|
import torch.nn as nn
|
32
32
|
import torch.nn.functional as F
|
33
33
|
from einops import rearrange
|
34
|
+
from transformers import Qwen2VLConfig
|
35
|
+
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
|
34
36
|
|
35
|
-
from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
|
36
37
|
from sglang.srt.hf_transformers_utils import get_processor
|
37
38
|
from sglang.srt.layers.activation import QuickGELU
|
38
39
|
from sglang.srt.layers.attention.vision import VisionAttention
|
sglang/srt/openai_api/adapter.py
CHANGED
@@ -20,12 +20,14 @@ import os
|
|
20
20
|
import time
|
21
21
|
import uuid
|
22
22
|
from http import HTTPStatus
|
23
|
-
from typing import Dict, List
|
23
|
+
from typing import Dict, List
|
24
24
|
|
25
25
|
from fastapi import HTTPException, Request, UploadFile
|
26
26
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
27
27
|
from pydantic import ValidationError
|
28
28
|
|
29
|
+
from sglang.lang.chat_template import get_chat_template_by_model_path
|
30
|
+
|
29
31
|
try:
|
30
32
|
from outlines.fsm.json_schema import convert_json_schema_to_str
|
31
33
|
except ImportError:
|
@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {}
|
|
92
94
|
# map file id to file path in SGLang backend
|
93
95
|
file_id_storage: Dict[str, str] = {}
|
94
96
|
|
95
|
-
|
96
97
|
# backend storage directory
|
97
98
|
storage_dir = None
|
98
99
|
|
@@ -116,12 +117,13 @@ def create_streaming_error_response(
|
|
116
117
|
return json_str
|
117
118
|
|
118
119
|
|
119
|
-
def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
|
120
|
+
def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg, model_path):
|
120
121
|
global chat_template_name
|
121
122
|
|
122
123
|
logger.info(
|
123
124
|
f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}"
|
124
125
|
)
|
126
|
+
|
125
127
|
if not chat_template_exists(chat_template_arg):
|
126
128
|
if not os.path.exists(chat_template_arg):
|
127
129
|
raise RuntimeError(
|
@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
|
|
163
165
|
else:
|
164
166
|
chat_template_name = chat_template_arg
|
165
167
|
|
168
|
+
# check chat-template
|
169
|
+
chat_template = get_chat_template_by_model_path(model_path)
|
170
|
+
if chat_template is not None:
|
171
|
+
official_chat_template = chat_template.name
|
172
|
+
used_chat_template = chat_template_name
|
173
|
+
if official_chat_template != used_chat_template:
|
174
|
+
logger.warning(
|
175
|
+
f"Using a chat_template: '{used_chat_template}', "
|
176
|
+
f"which is different from official chat template: '{official_chat_template}', "
|
177
|
+
f"This discrepancy may lead to performance degradation."
|
178
|
+
)
|
179
|
+
|
166
180
|
|
167
181
|
async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None):
|
168
182
|
try:
|
sglang/srt/server_args.py
CHANGED
@@ -262,14 +262,17 @@ class ServerArgs:
|
|
262
262
|
)
|
263
263
|
|
264
264
|
# Speculative Decoding
|
265
|
-
if
|
265
|
+
if (
|
266
|
+
self.speculative_algorithm == "EAGLE"
|
267
|
+
or self.speculative_algorithm == "NEXTN"
|
268
|
+
):
|
266
269
|
self.prefill_only_one_req = True
|
267
270
|
self.disable_cuda_graph_padding = True
|
268
271
|
self.disable_radix_cache = True
|
269
272
|
self.disable_overlap_schedule = True
|
270
273
|
self.chunked_prefill_size = -1
|
271
274
|
logger.info(
|
272
|
-
"The radix cache, chunked prefill, and overlap scheduler are disabled because of using
|
275
|
+
f"The radix cache, chunked prefill, and overlap scheduler are disabled because of using {self.speculative_algorithm} speculative decoding."
|
273
276
|
)
|
274
277
|
|
275
278
|
# GGUF
|
@@ -705,7 +708,7 @@ class ServerArgs:
|
|
705
708
|
parser.add_argument(
|
706
709
|
"--speculative-algorithm",
|
707
710
|
type=str,
|
708
|
-
choices=["EAGLE"],
|
711
|
+
choices=["EAGLE", "NEXTN"],
|
709
712
|
help="Speculative algorithm.",
|
710
713
|
)
|
711
714
|
parser.add_argument(
|
@@ -24,6 +24,7 @@ from sglang.srt.speculative.eagle_utils import (
|
|
24
24
|
fast_topk,
|
25
25
|
select_top_k_tokens,
|
26
26
|
)
|
27
|
+
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
27
28
|
|
28
29
|
logger = logging.getLogger(__name__)
|
29
30
|
|
@@ -57,11 +58,15 @@ class EAGLEWorker(TpModelWorker):
|
|
57
58
|
# Parse arguments
|
58
59
|
self.topk = server_args.speculative_eagle_topk
|
59
60
|
self.speculative_num_steps = server_args.speculative_num_steps
|
61
|
+
self.speculative_algorithm = SpeculativeAlgorithm.from_string(
|
62
|
+
server_args.speculative_algorithm
|
63
|
+
)
|
60
64
|
self.server_args = server_args
|
61
65
|
|
62
66
|
# Share the embedding and lm_head
|
63
|
-
|
64
|
-
|
67
|
+
if not self.speculative_algorithm.is_nextn():
|
68
|
+
embed, head = self.target_worker.model_runner.model.get_embed_and_head()
|
69
|
+
self.model_runner.model.set_embed_and_head(embed, head)
|
65
70
|
self.model_runner.server_args.disable_cuda_graph = backup_disable_cuda_graph
|
66
71
|
|
67
72
|
# Create multi-step attn backends and cuda graph runners
|
@@ -5,18 +5,28 @@ class SpeculativeAlgorithm(IntEnum):
|
|
5
5
|
NONE = auto()
|
6
6
|
EAGLE = auto()
|
7
7
|
|
8
|
+
# NEXTN spec decoding is for DeepSeek V3/R1
|
9
|
+
# currently it's implemented based on EAGLE
|
10
|
+
NEXTN = auto()
|
11
|
+
|
8
12
|
def is_none(self):
|
9
13
|
return self == SpeculativeAlgorithm.NONE
|
10
14
|
|
11
15
|
def is_eagle(self):
|
12
|
-
return self == SpeculativeAlgorithm.EAGLE
|
16
|
+
return self == SpeculativeAlgorithm.EAGLE or self == SpeculativeAlgorithm.NEXTN
|
17
|
+
|
18
|
+
def is_nextn(self):
|
19
|
+
return self == SpeculativeAlgorithm.NEXTN
|
13
20
|
|
14
21
|
@staticmethod
|
15
22
|
def from_string(name: str):
|
16
23
|
name_map = {
|
17
24
|
"EAGLE": SpeculativeAlgorithm.EAGLE,
|
25
|
+
"NEXTN": SpeculativeAlgorithm.NEXTN,
|
18
26
|
None: SpeculativeAlgorithm.NONE,
|
19
27
|
}
|
28
|
+
if name is not None:
|
29
|
+
name = name.upper()
|
20
30
|
return name_map[name]
|
21
31
|
|
22
32
|
|
sglang/utils.py
CHANGED
@@ -306,22 +306,112 @@ def download_and_cache_file(url: str, filename: Optional[str] = None):
|
|
306
306
|
return filename
|
307
307
|
|
308
308
|
|
309
|
-
|
309
|
+
import fcntl
|
310
|
+
|
311
|
+
|
312
|
+
def is_in_ci():
|
313
|
+
from sglang.test.test_utils import is_in_ci
|
314
|
+
|
315
|
+
return is_in_ci()
|
316
|
+
|
317
|
+
|
318
|
+
LOCKFILE = os.path.expanduser("~/.sglang_port_lock")
|
319
|
+
PORT_REGISTRY = os.path.expanduser("~/.sglang_port_registry.json")
|
320
|
+
|
321
|
+
if not os.path.exists(LOCKFILE):
|
322
|
+
with open(LOCKFILE, "w") as f:
|
323
|
+
pass
|
324
|
+
|
325
|
+
if not os.path.exists(PORT_REGISTRY):
|
326
|
+
with open(PORT_REGISTRY, "w") as f:
|
327
|
+
json.dump([], f)
|
328
|
+
|
329
|
+
|
330
|
+
def print_highlight(html_content: str):
|
331
|
+
if is_in_ci():
|
332
|
+
html_content = str(html_content).replace("\n", "<br>")
|
333
|
+
display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
|
334
|
+
else:
|
335
|
+
print(html_content)
|
336
|
+
|
337
|
+
|
338
|
+
def init_port_registry():
|
339
|
+
"""Initialize the port registry file if it doesn't exist."""
|
340
|
+
if not os.path.exists(PORT_REGISTRY):
|
341
|
+
with open(PORT_REGISTRY, "w") as f:
|
342
|
+
json.dump([], f)
|
343
|
+
|
344
|
+
|
345
|
+
def reserve_port(start=30000, end=40000):
|
346
|
+
"""
|
347
|
+
Reserve an available port using a file lock and a registry.
|
348
|
+
Returns the allocated port.
|
310
349
|
"""
|
311
|
-
|
350
|
+
init_port_registry()
|
351
|
+
with open(LOCKFILE, "w") as lock:
|
352
|
+
fcntl.flock(lock, fcntl.LOCK_EX)
|
353
|
+
try:
|
354
|
+
with open(PORT_REGISTRY, "r") as f:
|
355
|
+
used = json.load(f)
|
356
|
+
except Exception:
|
357
|
+
used = []
|
358
|
+
for port in range(start, end):
|
359
|
+
if port not in used:
|
360
|
+
used.append(port)
|
361
|
+
with open(PORT_REGISTRY, "w") as f:
|
362
|
+
json.dump(used, f)
|
363
|
+
return port
|
364
|
+
raise RuntimeError("No free port available")
|
365
|
+
|
366
|
+
|
367
|
+
def release_port(port):
|
368
|
+
"""Release the reserved port by removing it from the registry."""
|
369
|
+
with open(LOCKFILE, "w") as lock:
|
370
|
+
fcntl.flock(lock, fcntl.LOCK_EX)
|
371
|
+
try:
|
372
|
+
with open(PORT_REGISTRY, "r") as f:
|
373
|
+
used = json.load(f)
|
374
|
+
except Exception:
|
375
|
+
used = []
|
376
|
+
if port in used:
|
377
|
+
used.remove(port)
|
378
|
+
with open(PORT_REGISTRY, "w") as f:
|
379
|
+
json.dump(used, f)
|
312
380
|
|
313
|
-
|
314
|
-
|
315
|
-
Returns:
|
316
|
-
subprocess.Popen: Process handle
|
381
|
+
|
382
|
+
def execute_shell_command(command: str) -> subprocess.Popen:
|
317
383
|
"""
|
318
|
-
|
384
|
+
Execute a shell command and return its process handle.
|
385
|
+
"""
|
386
|
+
# Replace newline continuations and split the command string.
|
319
387
|
command = command.replace("\\\n", " ").replace("\\", " ")
|
320
388
|
parts = command.split()
|
321
|
-
|
322
389
|
return subprocess.Popen(parts, text=True, stderr=subprocess.STDOUT)
|
323
390
|
|
324
391
|
|
392
|
+
def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
|
393
|
+
"""
|
394
|
+
Launch the server using the given command.
|
395
|
+
If no port is specified, a free port is reserved.
|
396
|
+
"""
|
397
|
+
if port is None:
|
398
|
+
port = reserve_port()
|
399
|
+
full_command = f"{command} --port {port}"
|
400
|
+
process = execute_shell_command(full_command)
|
401
|
+
return process, port
|
402
|
+
|
403
|
+
|
404
|
+
def terminate_process(process, port=None):
|
405
|
+
"""
|
406
|
+
Terminate the process and, if a port was reserved, release it.
|
407
|
+
"""
|
408
|
+
from sglang.srt.utils import kill_process_tree
|
409
|
+
|
410
|
+
kill_process_tree(process.pid)
|
411
|
+
if port is not None:
|
412
|
+
release_port(port)
|
413
|
+
|
414
|
+
|
325
415
|
def wait_for_server(base_url: str, timeout: int = None) -> None:
|
326
416
|
"""Wait for the server to be ready by polling the /v1/models endpoint.
|
327
417
|
|
@@ -343,6 +433,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
|
|
343
433
|
NOTE: Typically, the server runs in a separate terminal.
|
344
434
|
In this notebook, we run the server and notebook code together, so their outputs are combined.
|
345
435
|
To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
|
436
|
+
We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.
|
346
437
|
"""
|
347
438
|
)
|
348
439
|
break
|
@@ -353,17 +444,6 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
|
|
353
444
|
time.sleep(1)
|
354
445
|
|
355
446
|
|
356
|
-
def terminate_process(process):
|
357
|
-
from sglang.srt.utils import kill_process_tree
|
358
|
-
|
359
|
-
kill_process_tree(process.pid)
|
360
|
-
|
361
|
-
|
362
|
-
def print_highlight(html_content: str):
|
363
|
-
html_content = str(html_content).replace("\n", "<br>")
|
364
|
-
display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
|
365
|
-
|
366
|
-
|
367
447
|
class TypeBasedDispatcher:
|
368
448
|
def __init__(self, mapping: List[Tuple[Type, Callable]]):
|
369
449
|
self._mapping = mapping
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.3"
|
1
|
+
__version__ = "0.4.3.post1"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.3
|
3
|
+
Version: 0.4.3.post1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -235,7 +235,7 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
|
235
235
|
Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
|
236
236
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
237
237
|
Requires-Dist: uvloop; extra == "runtime-common"
|
238
|
-
Requires-Dist: xgrammar
|
238
|
+
Requires-Dist: xgrammar==0.1.10; extra == "runtime-common"
|
239
239
|
Requires-Dist: ninja; extra == "runtime-common"
|
240
240
|
Provides-Extra: srt
|
241
241
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
@@ -9,20 +9,20 @@ sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
|
|
9
9
|
sglang/global_config.py,sha256=crt5cernXnDa1iQ8kGOq_ScTFclRlTQbJ-atFHM7I5I,1330
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
|
-
sglang/utils.py,sha256=
|
13
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/utils.py,sha256=9fm5ghtYPXqsWKjUzlQKJIoH5iFit6Rz21RhyaC3YL4,15673
|
13
|
+
sglang/version.py,sha256=rH9jaCKrx1Ahm1bUadSFX0yjfqoKnuKVlVyraMi28AU,28
|
14
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
sglang/lang/chat_template.py,sha256=
|
15
|
+
sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
|
16
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
17
17
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
18
18
|
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
19
|
-
sglang/lang/ir.py,sha256=
|
19
|
+
sglang/lang/ir.py,sha256=YQlEX2eYMAVHG12xJ2Jds6S6el45_O-udsXJumpEoEQ,18552
|
20
20
|
sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
|
21
21
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
23
23
|
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
24
24
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
25
|
-
sglang/lang/backend/openai.py,sha256=
|
25
|
+
sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
|
26
26
|
sglang/lang/backend/runtime_endpoint.py,sha256=gM97bi8Kv8sLzCDJnH5ZZTQ9I6t31CeVUve7qdTsopo,16755
|
27
27
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
28
28
|
sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
|
@@ -30,21 +30,21 @@ sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
|
30
30
|
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
31
31
|
sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
|
32
32
|
sglang/srt/function_call_parser.py,sha256=YmagXt1BIuTbeiWmSleZwJFCFR5r5EFqVQqKnJDYXiE,19568
|
33
|
-
sglang/srt/hf_transformers_utils.py,sha256=
|
33
|
+
sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
|
34
34
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
35
35
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
36
36
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
37
|
-
sglang/srt/server_args.py,sha256=
|
37
|
+
sglang/srt/server_args.py,sha256=C7zyFuYidgt__ZaqK8tNV9zPByQNaLyUNMOogBzBjXM,41128
|
38
38
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
39
39
|
sglang/srt/utils.py,sha256=RVU-OORgeVQICMPzj17KHxbDdSYGOKFBnNR4dZejP9A,46780
|
40
|
-
sglang/srt/configs/__init__.py,sha256=
|
40
|
+
sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
|
41
41
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
42
42
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
43
43
|
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
44
44
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
45
45
|
sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoHgQQ0,3107
|
46
|
-
sglang/srt/configs/model_config.py,sha256=
|
47
|
-
sglang/srt/configs/
|
46
|
+
sglang/srt/configs/model_config.py,sha256=MPC1XJox6wo0Ut1LJ-05flKWlA95ZuzVKaDP9il4hD4,17023
|
47
|
+
sglang/srt/configs/qwen2_5_vl_config.py,sha256=J8jq6QwseIOgqXQ3nuEX_yRVMNbyYjleZbf4nEhniGk,48184
|
48
48
|
sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
|
49
49
|
sglang/srt/constrained/outlines_backend.py,sha256=yPYgz44n-rSCStGGkS1lGazFiQzN7gqwSvpJ2YG0co4,7081
|
50
50
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
@@ -61,7 +61,7 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
|
|
61
61
|
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
62
62
|
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
63
63
|
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
64
|
-
sglang/srt/entrypoints/engine.py,sha256=
|
64
|
+
sglang/srt/entrypoints/engine.py,sha256=cEVosKgOTKF8dKX7wA1vaVOdUP0qjFlZ-X9I4PJ_Ta0,17555
|
65
65
|
sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
|
66
66
|
sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
|
67
67
|
sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
|
@@ -318,7 +318,7 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
|
|
318
318
|
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
319
319
|
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
320
320
|
sglang/srt/managers/detokenizer_manager.py,sha256=XC2INyykOgwmIrFEGc-zf6LGZ5mMt6oPZt1YRXW_cbY,9650
|
321
|
-
sglang/srt/managers/image_processor.py,sha256=
|
321
|
+
sglang/srt/managers/image_processor.py,sha256=AWtCjl_zCbcn5LD4Hp4NXmsu225lQE0gWixIhQuUMpE,23872
|
322
322
|
sglang/srt/managers/io_struct.py,sha256=9jhu794cc_BljFmVL6kQseTHGZNwEzONdlGEy_wjAcA,18357
|
323
323
|
sglang/srt/managers/schedule_batch.py,sha256=smqDrzohvA8j76CLgI53CvpduheW1m__26S0O8HcCf0,49187
|
324
324
|
sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
|
@@ -336,7 +336,7 @@ sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll5
|
|
336
336
|
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
337
337
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
338
338
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=hH646E_c4UlclGEawPDjg4KHgTUEk70WrPl6C7nnltM,18774
|
339
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
339
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=cTyRuJVBTBmkP4LAfScRSRrpjLCq7UfmUKoXuU5LZUw,15098
|
340
340
|
sglang/srt/model_executor/model_runner.py,sha256=uohQ2n2R1HcVyaHwbdwM6xDvFxZSLgxacjMSrrogLpw,33537
|
341
341
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
342
342
|
sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
|
@@ -347,7 +347,8 @@ sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,
|
|
347
347
|
sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
|
348
348
|
sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
|
349
349
|
sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
|
350
|
-
sglang/srt/models/
|
350
|
+
sglang/srt/models/deepseek_nextn.py,sha256=QmzByVDFw8F5cJfBU4-VVryXovn4HxvGBwbBTfJavJg,11740
|
351
|
+
sglang/srt/models/deepseek_v2.py,sha256=Er72pYPVxs6hpms9yJL4iSQou7J6kA7mCsmapX9_LJQ,39248
|
351
352
|
sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
|
352
353
|
sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
|
353
354
|
sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
|
@@ -363,7 +364,7 @@ sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJ
|
|
363
364
|
sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
|
364
365
|
sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
|
365
366
|
sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
|
366
|
-
sglang/srt/models/llava.py,sha256=
|
367
|
+
sglang/srt/models/llava.py,sha256=Qbh26DcC6djw5G8olq0AC0WqzkkRVsiuT8I6RPCpH0o,26384
|
367
368
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
368
369
|
sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
|
369
370
|
sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
|
@@ -378,16 +379,17 @@ sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15
|
|
378
379
|
sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
|
379
380
|
sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
|
380
381
|
sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
|
382
|
+
sglang/srt/models/qwen2_5_vl.py,sha256=uSZEoCdyOlaANjnP21LxE7K_DqfG10JQ5sUkK6Ase2A,28045
|
381
383
|
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
382
384
|
sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
|
383
|
-
sglang/srt/models/qwen2_vl.py,sha256=
|
385
|
+
sglang/srt/models/qwen2_vl.py,sha256=1LM4iyE4rHFRgP58hSFpKgZdaew_OSdwGRwwy3NiOzo,23523
|
384
386
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
385
387
|
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
386
388
|
sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
|
387
389
|
sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
|
388
390
|
sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
|
389
391
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
390
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
392
|
+
sglang/srt/openai_api/adapter.py,sha256=tPsZ6cHlEofwJU7Cmfi3KtwSqvd3sv6EyeV6BfkdAcU,62349
|
391
393
|
sglang/srt/openai_api/protocol.py,sha256=UInFUKQqS8KWLrCzA6s5_uaNC6xAUAAJ4WepQzQ7xpo,11845
|
392
394
|
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
393
395
|
sglang/srt/sampling/sampling_batch_info.py,sha256=Ry1N79T9QQY_HJ8GjM50_W4tzKFxMtTfV4GccT7NQ0w,15129
|
@@ -401,8 +403,8 @@ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8
|
|
401
403
|
sglang/srt/speculative/build_eagle_tree.py,sha256=zWthboIgzPzSOXcGxDpDv0rBOQP55HYGrBKGqm2gWF0,20732
|
402
404
|
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FY4hcwd0Blx7AXbeX6quaXPNgWA8WGIqVcQiEgHyERk,8002
|
403
405
|
sglang/srt/speculative/eagle_utils.py,sha256=ypjVmVTVzCGclOVHRMJxdLUSPkf1-7bNXQS0oP6dn5U,25644
|
404
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
405
|
-
sglang/srt/speculative/spec_info.py,sha256=
|
406
|
+
sglang/srt/speculative/eagle_worker.py,sha256=w7sLcW-EeE_iWyMJQhBuSo5Zvq6iPe-3m73-OIP1b-E,13153
|
407
|
+
sglang/srt/speculative/spec_info.py,sha256=RWG4ik4Dah_V74mgP0gza6UaYFtN-BRV6aJZsHHGGtE,827
|
406
408
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
407
409
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
408
410
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
@@ -419,8 +421,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
|
|
419
421
|
sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
|
420
422
|
sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
|
421
423
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
422
|
-
sglang-0.4.3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
423
|
-
sglang-0.4.3.dist-info/METADATA,sha256=
|
424
|
-
sglang-0.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
425
|
-
sglang-0.4.3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
426
|
-
sglang-0.4.3.dist-info/RECORD,,
|
424
|
+
sglang-0.4.3.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
425
|
+
sglang-0.4.3.post1.dist-info/METADATA,sha256=TypZMxQ7xbJ3Xh34H0HYZV4bZ8qrID2KMbtggp7j3mQ,23821
|
426
|
+
sglang-0.4.3.post1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
427
|
+
sglang-0.4.3.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
428
|
+
sglang-0.4.3.post1.dist-info/RECORD,,
|
sglang/srt/configs/qwen2vl.py
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
# coding=utf-8
|
2
|
-
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
|
3
|
-
# All rights reserved.
|
4
|
-
#
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
# you may not use this file except in compliance with the License.
|
7
|
-
# You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing, software
|
12
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
# See the License for the specific language governing permissions and
|
15
|
-
# limitations under the License.
|
16
|
-
"""Qwen2VL model configuration"""
|
17
|
-
|
18
|
-
import os
|
19
|
-
from typing import Union
|
20
|
-
|
21
|
-
from transformers import PretrainedConfig
|
22
|
-
|
23
|
-
|
24
|
-
class Qwen2VLVisionConfig(PretrainedConfig):
|
25
|
-
model_type = "qwen2_vl"
|
26
|
-
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
depth=32,
|
30
|
-
embed_dim=1280,
|
31
|
-
hidden_size=3584,
|
32
|
-
hidden_act="quick_gelu",
|
33
|
-
mlp_ratio=4,
|
34
|
-
num_heads=16,
|
35
|
-
in_channels=3,
|
36
|
-
patch_size=14,
|
37
|
-
spatial_merge_size=2,
|
38
|
-
temporal_patch_size=2,
|
39
|
-
**kwargs,
|
40
|
-
):
|
41
|
-
super().__init__(**kwargs)
|
42
|
-
|
43
|
-
self.depth = depth
|
44
|
-
self.embed_dim = embed_dim
|
45
|
-
self.hidden_size = hidden_size
|
46
|
-
self.hidden_act = hidden_act
|
47
|
-
self.mlp_ratio = mlp_ratio
|
48
|
-
self.num_heads = num_heads
|
49
|
-
self.in_channels = in_channels
|
50
|
-
self.patch_size = patch_size
|
51
|
-
self.spatial_merge_size = spatial_merge_size
|
52
|
-
self.temporal_patch_size = temporal_patch_size
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def from_pretrained(
|
56
|
-
cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
|
57
|
-
) -> "PretrainedConfig":
|
58
|
-
cls._set_token_in_kwargs(kwargs)
|
59
|
-
|
60
|
-
config_dict, kwargs = cls.get_config_dict(
|
61
|
-
pretrained_model_name_or_path, **kwargs
|
62
|
-
)
|
63
|
-
|
64
|
-
if config_dict.get("model_type") == "qwen2_vl":
|
65
|
-
config_dict = config_dict["vision_config"]
|
66
|
-
|
67
|
-
return cls.from_dict(config_dict, **kwargs)
|
68
|
-
|
69
|
-
|
70
|
-
class Qwen2VLConfig(PretrainedConfig):
|
71
|
-
model_type = "qwen2_vl"
|
72
|
-
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
vocab_size=152064,
|
76
|
-
hidden_size=8192,
|
77
|
-
intermediate_size=29568,
|
78
|
-
num_hidden_layers=80,
|
79
|
-
num_attention_heads=64,
|
80
|
-
num_key_value_heads=8,
|
81
|
-
hidden_act="silu",
|
82
|
-
max_position_embeddings=32768,
|
83
|
-
initializer_range=0.02,
|
84
|
-
rms_norm_eps=1e-05,
|
85
|
-
use_cache=True,
|
86
|
-
tie_word_embeddings=False,
|
87
|
-
rope_theta=1000000.0,
|
88
|
-
use_sliding_window=False,
|
89
|
-
sliding_window=4096,
|
90
|
-
max_window_layers=80,
|
91
|
-
attention_dropout=0.0,
|
92
|
-
vision_config=None,
|
93
|
-
rope_scaling=None,
|
94
|
-
**kwargs,
|
95
|
-
):
|
96
|
-
if isinstance(vision_config, dict):
|
97
|
-
self.vision_config = Qwen2VLVisionConfig(**vision_config)
|
98
|
-
elif vision_config is None:
|
99
|
-
self.vision_config = Qwen2VLVisionConfig()
|
100
|
-
|
101
|
-
self.vocab_size = vocab_size
|
102
|
-
self.max_position_embeddings = max_position_embeddings
|
103
|
-
self.hidden_size = hidden_size
|
104
|
-
self.intermediate_size = intermediate_size
|
105
|
-
self.num_hidden_layers = num_hidden_layers
|
106
|
-
self.num_attention_heads = num_attention_heads
|
107
|
-
self.use_sliding_window = use_sliding_window
|
108
|
-
self.sliding_window = sliding_window
|
109
|
-
self.max_window_layers = max_window_layers
|
110
|
-
|
111
|
-
# for backward compatibility
|
112
|
-
if num_key_value_heads is None:
|
113
|
-
num_key_value_heads = num_attention_heads
|
114
|
-
|
115
|
-
self.num_key_value_heads = num_key_value_heads
|
116
|
-
self.hidden_act = hidden_act
|
117
|
-
self.initializer_range = initializer_range
|
118
|
-
self.rms_norm_eps = rms_norm_eps
|
119
|
-
self.use_cache = use_cache
|
120
|
-
self.rope_theta = rope_theta
|
121
|
-
self.attention_dropout = attention_dropout
|
122
|
-
self.rope_scaling = rope_scaling
|
123
|
-
|
124
|
-
# NOTE(HandH1998): This is necessary for configuring the `rope_type`` of qwen2vl models after removing dependencies on vllm.
|
125
|
-
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
126
|
-
if self.rope_scaling["type"] == "mrope":
|
127
|
-
self.rope_scaling["type"] = "default"
|
128
|
-
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
129
|
-
|
130
|
-
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
File without changes
|
File without changes
|
File without changes
|