sglang 0.4.3__py3-none-any.whl → 0.4.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,8 +31,9 @@ import torch
31
31
  import torch.nn as nn
32
32
  import torch.nn.functional as F
33
33
  from einops import rearrange
34
+ from transformers import Qwen2VLConfig
35
+ from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
34
36
 
35
- from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
36
37
  from sglang.srt.hf_transformers_utils import get_processor
37
38
  from sglang.srt.layers.activation import QuickGELU
38
39
  from sglang.srt.layers.attention.vision import VisionAttention
@@ -20,12 +20,14 @@ import os
20
20
  import time
21
21
  import uuid
22
22
  from http import HTTPStatus
23
- from typing import Dict, List, Optional
23
+ from typing import Dict, List
24
24
 
25
25
  from fastapi import HTTPException, Request, UploadFile
26
26
  from fastapi.responses import ORJSONResponse, StreamingResponse
27
27
  from pydantic import ValidationError
28
28
 
29
+ from sglang.lang.chat_template import get_chat_template_by_model_path
30
+
29
31
  try:
30
32
  from outlines.fsm.json_schema import convert_json_schema_to_str
31
33
  except ImportError:
@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {}
92
94
  # map file id to file path in SGLang backend
93
95
  file_id_storage: Dict[str, str] = {}
94
96
 
95
-
96
97
  # backend storage directory
97
98
  storage_dir = None
98
99
 
@@ -116,12 +117,13 @@ def create_streaming_error_response(
116
117
  return json_str
117
118
 
118
119
 
119
- def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
120
+ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg, model_path):
120
121
  global chat_template_name
121
122
 
122
123
  logger.info(
123
124
  f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}"
124
125
  )
126
+
125
127
  if not chat_template_exists(chat_template_arg):
126
128
  if not os.path.exists(chat_template_arg):
127
129
  raise RuntimeError(
@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
163
165
  else:
164
166
  chat_template_name = chat_template_arg
165
167
 
168
+ # check chat-template
169
+ chat_template = get_chat_template_by_model_path(model_path)
170
+ if chat_template is not None:
171
+ official_chat_template = chat_template.name
172
+ used_chat_template = chat_template_name
173
+ if official_chat_template != used_chat_template:
174
+ logger.warning(
175
+ f"Using a chat_template: '{used_chat_template}', "
176
+ f"which is different from official chat template: '{official_chat_template}', "
177
+ f"This discrepancy may lead to performance degradation."
178
+ )
179
+
166
180
 
167
181
  async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None):
168
182
  try:
sglang/srt/server_args.py CHANGED
@@ -262,14 +262,17 @@ class ServerArgs:
262
262
  )
263
263
 
264
264
  # Speculative Decoding
265
- if self.speculative_algorithm == "EAGLE":
265
+ if (
266
+ self.speculative_algorithm == "EAGLE"
267
+ or self.speculative_algorithm == "NEXTN"
268
+ ):
266
269
  self.prefill_only_one_req = True
267
270
  self.disable_cuda_graph_padding = True
268
271
  self.disable_radix_cache = True
269
272
  self.disable_overlap_schedule = True
270
273
  self.chunked_prefill_size = -1
271
274
  logger.info(
272
- "The radix cache, chunked prefill, and overlap scheduler are disabled because of using eagle speculative decoding."
275
+ f"The radix cache, chunked prefill, and overlap scheduler are disabled because of using {self.speculative_algorithm} speculative decoding."
273
276
  )
274
277
 
275
278
  # GGUF
@@ -705,7 +708,7 @@ class ServerArgs:
705
708
  parser.add_argument(
706
709
  "--speculative-algorithm",
707
710
  type=str,
708
- choices=["EAGLE"],
711
+ choices=["EAGLE", "NEXTN"],
709
712
  help="Speculative algorithm.",
710
713
  )
711
714
  parser.add_argument(
@@ -24,6 +24,7 @@ from sglang.srt.speculative.eagle_utils import (
24
24
  fast_topk,
25
25
  select_top_k_tokens,
26
26
  )
27
+ from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
@@ -57,11 +58,15 @@ class EAGLEWorker(TpModelWorker):
57
58
  # Parse arguments
58
59
  self.topk = server_args.speculative_eagle_topk
59
60
  self.speculative_num_steps = server_args.speculative_num_steps
61
+ self.speculative_algorithm = SpeculativeAlgorithm.from_string(
62
+ server_args.speculative_algorithm
63
+ )
60
64
  self.server_args = server_args
61
65
 
62
66
  # Share the embedding and lm_head
63
- embed, head = self.target_worker.model_runner.model.get_embed_and_head()
64
- self.model_runner.model.set_embed_and_head(embed, head)
67
+ if not self.speculative_algorithm.is_nextn():
68
+ embed, head = self.target_worker.model_runner.model.get_embed_and_head()
69
+ self.model_runner.model.set_embed_and_head(embed, head)
65
70
  self.model_runner.server_args.disable_cuda_graph = backup_disable_cuda_graph
66
71
 
67
72
  # Create multi-step attn backends and cuda graph runners
@@ -5,18 +5,28 @@ class SpeculativeAlgorithm(IntEnum):
5
5
  NONE = auto()
6
6
  EAGLE = auto()
7
7
 
8
+ # NEXTN spec decoding is for DeepSeek V3/R1
9
+ # currently it's implemented based on EAGLE
10
+ NEXTN = auto()
11
+
8
12
  def is_none(self):
9
13
  return self == SpeculativeAlgorithm.NONE
10
14
 
11
15
  def is_eagle(self):
12
- return self == SpeculativeAlgorithm.EAGLE
16
+ return self == SpeculativeAlgorithm.EAGLE or self == SpeculativeAlgorithm.NEXTN
17
+
18
+ def is_nextn(self):
19
+ return self == SpeculativeAlgorithm.NEXTN
13
20
 
14
21
  @staticmethod
15
22
  def from_string(name: str):
16
23
  name_map = {
17
24
  "EAGLE": SpeculativeAlgorithm.EAGLE,
25
+ "NEXTN": SpeculativeAlgorithm.NEXTN,
18
26
  None: SpeculativeAlgorithm.NONE,
19
27
  }
28
+ if name is not None:
29
+ name = name.upper()
20
30
  return name_map[name]
21
31
 
22
32
 
sglang/utils.py CHANGED
@@ -306,22 +306,112 @@ def download_and_cache_file(url: str, filename: Optional[str] = None):
306
306
  return filename
307
307
 
308
308
 
309
- def execute_shell_command(command: str) -> subprocess.Popen:
309
+ import fcntl
310
+
311
+
312
+ def is_in_ci():
313
+ from sglang.test.test_utils import is_in_ci
314
+
315
+ return is_in_ci()
316
+
317
+
318
+ LOCKFILE = os.path.expanduser("~/.sglang_port_lock")
319
+ PORT_REGISTRY = os.path.expanduser("~/.sglang_port_registry.json")
320
+
321
+ if not os.path.exists(LOCKFILE):
322
+ with open(LOCKFILE, "w") as f:
323
+ pass
324
+
325
+ if not os.path.exists(PORT_REGISTRY):
326
+ with open(PORT_REGISTRY, "w") as f:
327
+ json.dump([], f)
328
+
329
+
330
+ def print_highlight(html_content: str):
331
+ if is_in_ci():
332
+ html_content = str(html_content).replace("\n", "<br>")
333
+ display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
334
+ else:
335
+ print(html_content)
336
+
337
+
338
+ def init_port_registry():
339
+ """Initialize the port registry file if it doesn't exist."""
340
+ if not os.path.exists(PORT_REGISTRY):
341
+ with open(PORT_REGISTRY, "w") as f:
342
+ json.dump([], f)
343
+
344
+
345
+ def reserve_port(start=30000, end=40000):
346
+ """
347
+ Reserve an available port using a file lock and a registry.
348
+ Returns the allocated port.
310
349
  """
311
- Execute a shell command and return the process handle
350
+ init_port_registry()
351
+ with open(LOCKFILE, "w") as lock:
352
+ fcntl.flock(lock, fcntl.LOCK_EX)
353
+ try:
354
+ with open(PORT_REGISTRY, "r") as f:
355
+ used = json.load(f)
356
+ except Exception:
357
+ used = []
358
+ for port in range(start, end):
359
+ if port not in used:
360
+ used.append(port)
361
+ with open(PORT_REGISTRY, "w") as f:
362
+ json.dump(used, f)
363
+ return port
364
+ raise RuntimeError("No free port available")
365
+
366
+
367
+ def release_port(port):
368
+ """Release the reserved port by removing it from the registry."""
369
+ with open(LOCKFILE, "w") as lock:
370
+ fcntl.flock(lock, fcntl.LOCK_EX)
371
+ try:
372
+ with open(PORT_REGISTRY, "r") as f:
373
+ used = json.load(f)
374
+ except Exception:
375
+ used = []
376
+ if port in used:
377
+ used.remove(port)
378
+ with open(PORT_REGISTRY, "w") as f:
379
+ json.dump(used, f)
312
380
 
313
- Args:
314
- command: Shell command as a string (can include \\ line continuations)
315
- Returns:
316
- subprocess.Popen: Process handle
381
+
382
+ def execute_shell_command(command: str) -> subprocess.Popen:
317
383
  """
318
- # Replace \ newline with space and split
384
+ Execute a shell command and return its process handle.
385
+ """
386
+ # Replace newline continuations and split the command string.
319
387
  command = command.replace("\\\n", " ").replace("\\", " ")
320
388
  parts = command.split()
321
-
322
389
  return subprocess.Popen(parts, text=True, stderr=subprocess.STDOUT)
323
390
 
324
391
 
392
+ def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
393
+ """
394
+ Launch the server using the given command.
395
+ If no port is specified, a free port is reserved.
396
+ """
397
+ if port is None:
398
+ port = reserve_port()
399
+ full_command = f"{command} --port {port}"
400
+ process = execute_shell_command(full_command)
401
+ return process, port
402
+
403
+
404
+ def terminate_process(process, port=None):
405
+ """
406
+ Terminate the process and, if a port was reserved, release it.
407
+ """
408
+ from sglang.srt.utils import kill_process_tree
409
+
410
+ kill_process_tree(process.pid)
411
+ if port is not None:
412
+ release_port(port)
413
+
414
+
325
415
  def wait_for_server(base_url: str, timeout: int = None) -> None:
326
416
  """Wait for the server to be ready by polling the /v1/models endpoint.
327
417
 
@@ -343,6 +433,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
343
433
  NOTE: Typically, the server runs in a separate terminal.
344
434
  In this notebook, we run the server and notebook code together, so their outputs are combined.
345
435
  To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
436
+ We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.
346
437
  """
347
438
  )
348
439
  break
@@ -353,17 +444,6 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
353
444
  time.sleep(1)
354
445
 
355
446
 
356
- def terminate_process(process):
357
- from sglang.srt.utils import kill_process_tree
358
-
359
- kill_process_tree(process.pid)
360
-
361
-
362
- def print_highlight(html_content: str):
363
- html_content = str(html_content).replace("\n", "<br>")
364
- display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
365
-
366
-
367
447
  class TypeBasedDispatcher:
368
448
  def __init__(self, mapping: List[Tuple[Type, Callable]]):
369
449
  self._mapping = mapping
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.3"
1
+ __version__ = "0.4.3.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sglang
3
- Version: 0.4.3
3
+ Version: 0.4.3.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -235,7 +235,7 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
235
235
  Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
236
236
  Requires-Dist: uvicorn; extra == "runtime-common"
237
237
  Requires-Dist: uvloop; extra == "runtime-common"
238
- Requires-Dist: xgrammar>=0.1.10; extra == "runtime-common"
238
+ Requires-Dist: xgrammar==0.1.10; extra == "runtime-common"
239
239
  Requires-Dist: ninja; extra == "runtime-common"
240
240
  Provides-Extra: srt
241
241
  Requires-Dist: sglang[runtime_common]; extra == "srt"
@@ -9,20 +9,20 @@ sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
9
9
  sglang/global_config.py,sha256=crt5cernXnDa1iQ8kGOq_ScTFclRlTQbJ-atFHM7I5I,1330
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
12
- sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
13
- sglang/version.py,sha256=Nyg0pmk5ea9-SLCAFEIF96ByFx4-TJFtrqYPN-Zn6g4,22
12
+ sglang/utils.py,sha256=9fm5ghtYPXqsWKjUzlQKJIoH5iFit6Rz21RhyaC3YL4,15673
13
+ sglang/version.py,sha256=rH9jaCKrx1Ahm1bUadSFX0yjfqoKnuKVlVyraMi28AU,28
14
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
15
+ sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
16
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
17
17
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
18
18
  sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
19
- sglang/lang/ir.py,sha256=dtA6rs5JIN0tMm3jhgRqdpRhH2Sckil-BMyLRMyBEIY,18494
19
+ sglang/lang/ir.py,sha256=YQlEX2eYMAVHG12xJ2Jds6S6el45_O-udsXJumpEoEQ,18552
20
20
  sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
21
21
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
23
23
  sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
24
24
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
25
- sglang/lang/backend/openai.py,sha256=ha9a2P6T80TmSgYlyIwB1qYawWkjcOgiOptkktkqa1U,15436
25
+ sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
26
26
  sglang/lang/backend/runtime_endpoint.py,sha256=gM97bi8Kv8sLzCDJnH5ZZTQ9I6t31CeVUve7qdTsopo,16755
27
27
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
28
28
  sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
@@ -30,21 +30,21 @@ sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
30
30
  sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
31
31
  sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
32
32
  sglang/srt/function_call_parser.py,sha256=YmagXt1BIuTbeiWmSleZwJFCFR5r5EFqVQqKnJDYXiE,19568
33
- sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
33
+ sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
35
35
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
36
36
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
37
- sglang/srt/server_args.py,sha256=97YAjIbttBjty9Rv9CQ-yGERprFRSu2Jq0zX8Yx-QH8,41018
37
+ sglang/srt/server_args.py,sha256=C7zyFuYidgt__ZaqK8tNV9zPByQNaLyUNMOogBzBjXM,41128
38
38
  sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
39
39
  sglang/srt/utils.py,sha256=RVU-OORgeVQICMPzj17KHxbDdSYGOKFBnNR4dZejP9A,46780
40
- sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
40
+ sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
41
41
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
42
42
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
43
43
  sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
44
44
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
45
45
  sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoHgQQ0,3107
46
- sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
47
- sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
46
+ sglang/srt/configs/model_config.py,sha256=MPC1XJox6wo0Ut1LJ-05flKWlA95ZuzVKaDP9il4hD4,17023
47
+ sglang/srt/configs/qwen2_5_vl_config.py,sha256=J8jq6QwseIOgqXQ3nuEX_yRVMNbyYjleZbf4nEhniGk,48184
48
48
  sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
49
49
  sglang/srt/constrained/outlines_backend.py,sha256=yPYgz44n-rSCStGGkS1lGazFiQzN7gqwSvpJ2YG0co4,7081
50
50
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
@@ -61,7 +61,7 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
61
61
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
62
62
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
63
63
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
64
- sglang/srt/entrypoints/engine.py,sha256=3JL7aj0K5UHCBsqCOG2MTLAv-8IpwtN2K-3_xB6fNqk,16888
64
+ sglang/srt/entrypoints/engine.py,sha256=cEVosKgOTKF8dKX7wA1vaVOdUP0qjFlZ-X9I4PJ_Ta0,17555
65
65
  sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
66
66
  sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
67
67
  sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
@@ -318,7 +318,7 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
318
318
  sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
319
319
  sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
320
320
  sglang/srt/managers/detokenizer_manager.py,sha256=XC2INyykOgwmIrFEGc-zf6LGZ5mMt6oPZt1YRXW_cbY,9650
321
- sglang/srt/managers/image_processor.py,sha256=s1QH9cSzT_nnitc6idzFjuGDp-pDnMTpbVZoQfzdSXU,20671
321
+ sglang/srt/managers/image_processor.py,sha256=AWtCjl_zCbcn5LD4Hp4NXmsu225lQE0gWixIhQuUMpE,23872
322
322
  sglang/srt/managers/io_struct.py,sha256=9jhu794cc_BljFmVL6kQseTHGZNwEzONdlGEy_wjAcA,18357
323
323
  sglang/srt/managers/schedule_batch.py,sha256=smqDrzohvA8j76CLgI53CvpduheW1m__26S0O8HcCf0,49187
324
324
  sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
@@ -336,7 +336,7 @@ sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll5
336
336
  sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
337
337
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
338
338
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=hH646E_c4UlclGEawPDjg4KHgTUEk70WrPl6C7nnltM,18774
339
- sglang/srt/model_executor/forward_batch_info.py,sha256=t1RlBgoeS-_Ikl28Xjvt-aouh1nNUc3eLM4iGY4_QqY,14988
339
+ sglang/srt/model_executor/forward_batch_info.py,sha256=cTyRuJVBTBmkP4LAfScRSRrpjLCq7UfmUKoXuU5LZUw,15098
340
340
  sglang/srt/model_executor/model_runner.py,sha256=uohQ2n2R1HcVyaHwbdwM6xDvFxZSLgxacjMSrrogLpw,33537
341
341
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
342
342
  sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
@@ -347,7 +347,8 @@ sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,
347
347
  sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
348
348
  sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
349
349
  sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
350
- sglang/srt/models/deepseek_v2.py,sha256=9_284fDT15WEmv8qfnH2EzKX8fxSZrNyiz4iQtgb0tI,39065
350
+ sglang/srt/models/deepseek_nextn.py,sha256=QmzByVDFw8F5cJfBU4-VVryXovn4HxvGBwbBTfJavJg,11740
351
+ sglang/srt/models/deepseek_v2.py,sha256=Er72pYPVxs6hpms9yJL4iSQou7J6kA7mCsmapX9_LJQ,39248
351
352
  sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
352
353
  sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
353
354
  sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
@@ -363,7 +364,7 @@ sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJ
363
364
  sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
364
365
  sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
365
366
  sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
366
- sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
367
+ sglang/srt/models/llava.py,sha256=Qbh26DcC6djw5G8olq0AC0WqzkkRVsiuT8I6RPCpH0o,26384
367
368
  sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
368
369
  sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
369
370
  sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
@@ -378,16 +379,17 @@ sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15
378
379
  sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
379
380
  sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
380
381
  sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
382
+ sglang/srt/models/qwen2_5_vl.py,sha256=uSZEoCdyOlaANjnP21LxE7K_DqfG10JQ5sUkK6Ase2A,28045
381
383
  sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
382
384
  sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
383
- sglang/srt/models/qwen2_vl.py,sha256=d8jCZyoJ6sMZ8-Pw9bHb3VbwiZlbFzJvffA4yYBRj0U,23466
385
+ sglang/srt/models/qwen2_vl.py,sha256=1LM4iyE4rHFRgP58hSFpKgZdaew_OSdwGRwwy3NiOzo,23523
384
386
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
385
387
  sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
386
388
  sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
387
389
  sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
388
390
  sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
389
391
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
390
- sglang/srt/openai_api/adapter.py,sha256=yQaAiZ43w3OIsNj8nC6BEjt9DPE5IZ-ovLAd0r7LbcY,61716
392
+ sglang/srt/openai_api/adapter.py,sha256=tPsZ6cHlEofwJU7Cmfi3KtwSqvd3sv6EyeV6BfkdAcU,62349
391
393
  sglang/srt/openai_api/protocol.py,sha256=UInFUKQqS8KWLrCzA6s5_uaNC6xAUAAJ4WepQzQ7xpo,11845
392
394
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
393
395
  sglang/srt/sampling/sampling_batch_info.py,sha256=Ry1N79T9QQY_HJ8GjM50_W4tzKFxMtTfV4GccT7NQ0w,15129
@@ -401,8 +403,8 @@ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8
401
403
  sglang/srt/speculative/build_eagle_tree.py,sha256=zWthboIgzPzSOXcGxDpDv0rBOQP55HYGrBKGqm2gWF0,20732
402
404
  sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FY4hcwd0Blx7AXbeX6quaXPNgWA8WGIqVcQiEgHyERk,8002
403
405
  sglang/srt/speculative/eagle_utils.py,sha256=ypjVmVTVzCGclOVHRMJxdLUSPkf1-7bNXQS0oP6dn5U,25644
404
- sglang/srt/speculative/eagle_worker.py,sha256=33zC6txEsNp9hD48iy-_67ov83Pf4iASulLg8GHLy5U,12898
405
- sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
406
+ sglang/srt/speculative/eagle_worker.py,sha256=w7sLcW-EeE_iWyMJQhBuSo5Zvq6iPe-3m73-OIP1b-E,13153
407
+ sglang/srt/speculative/spec_info.py,sha256=RWG4ik4Dah_V74mgP0gza6UaYFtN-BRV6aJZsHHGGtE,827
406
408
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
407
409
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
408
410
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
@@ -419,8 +421,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
419
421
  sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
420
422
  sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
421
423
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
422
- sglang-0.4.3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
423
- sglang-0.4.3.dist-info/METADATA,sha256=cpA0ecZd4jfaThOrStEpOGbXDTorUxqYdU4catzo2t4,23815
424
- sglang-0.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
425
- sglang-0.4.3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
426
- sglang-0.4.3.dist-info/RECORD,,
424
+ sglang-0.4.3.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
425
+ sglang-0.4.3.post1.dist-info/METADATA,sha256=TypZMxQ7xbJ3Xh34H0HYZV4bZ8qrID2KMbtggp7j3mQ,23821
426
+ sglang-0.4.3.post1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
427
+ sglang-0.4.3.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
428
+ sglang-0.4.3.post1.dist-info/RECORD,,
@@ -1,130 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
3
- # All rights reserved.
4
- #
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- """Qwen2VL model configuration"""
17
-
18
- import os
19
- from typing import Union
20
-
21
- from transformers import PretrainedConfig
22
-
23
-
24
- class Qwen2VLVisionConfig(PretrainedConfig):
25
- model_type = "qwen2_vl"
26
-
27
- def __init__(
28
- self,
29
- depth=32,
30
- embed_dim=1280,
31
- hidden_size=3584,
32
- hidden_act="quick_gelu",
33
- mlp_ratio=4,
34
- num_heads=16,
35
- in_channels=3,
36
- patch_size=14,
37
- spatial_merge_size=2,
38
- temporal_patch_size=2,
39
- **kwargs,
40
- ):
41
- super().__init__(**kwargs)
42
-
43
- self.depth = depth
44
- self.embed_dim = embed_dim
45
- self.hidden_size = hidden_size
46
- self.hidden_act = hidden_act
47
- self.mlp_ratio = mlp_ratio
48
- self.num_heads = num_heads
49
- self.in_channels = in_channels
50
- self.patch_size = patch_size
51
- self.spatial_merge_size = spatial_merge_size
52
- self.temporal_patch_size = temporal_patch_size
53
-
54
- @classmethod
55
- def from_pretrained(
56
- cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
57
- ) -> "PretrainedConfig":
58
- cls._set_token_in_kwargs(kwargs)
59
-
60
- config_dict, kwargs = cls.get_config_dict(
61
- pretrained_model_name_or_path, **kwargs
62
- )
63
-
64
- if config_dict.get("model_type") == "qwen2_vl":
65
- config_dict = config_dict["vision_config"]
66
-
67
- return cls.from_dict(config_dict, **kwargs)
68
-
69
-
70
- class Qwen2VLConfig(PretrainedConfig):
71
- model_type = "qwen2_vl"
72
-
73
- def __init__(
74
- self,
75
- vocab_size=152064,
76
- hidden_size=8192,
77
- intermediate_size=29568,
78
- num_hidden_layers=80,
79
- num_attention_heads=64,
80
- num_key_value_heads=8,
81
- hidden_act="silu",
82
- max_position_embeddings=32768,
83
- initializer_range=0.02,
84
- rms_norm_eps=1e-05,
85
- use_cache=True,
86
- tie_word_embeddings=False,
87
- rope_theta=1000000.0,
88
- use_sliding_window=False,
89
- sliding_window=4096,
90
- max_window_layers=80,
91
- attention_dropout=0.0,
92
- vision_config=None,
93
- rope_scaling=None,
94
- **kwargs,
95
- ):
96
- if isinstance(vision_config, dict):
97
- self.vision_config = Qwen2VLVisionConfig(**vision_config)
98
- elif vision_config is None:
99
- self.vision_config = Qwen2VLVisionConfig()
100
-
101
- self.vocab_size = vocab_size
102
- self.max_position_embeddings = max_position_embeddings
103
- self.hidden_size = hidden_size
104
- self.intermediate_size = intermediate_size
105
- self.num_hidden_layers = num_hidden_layers
106
- self.num_attention_heads = num_attention_heads
107
- self.use_sliding_window = use_sliding_window
108
- self.sliding_window = sliding_window
109
- self.max_window_layers = max_window_layers
110
-
111
- # for backward compatibility
112
- if num_key_value_heads is None:
113
- num_key_value_heads = num_attention_heads
114
-
115
- self.num_key_value_heads = num_key_value_heads
116
- self.hidden_act = hidden_act
117
- self.initializer_range = initializer_range
118
- self.rms_norm_eps = rms_norm_eps
119
- self.use_cache = use_cache
120
- self.rope_theta = rope_theta
121
- self.attention_dropout = attention_dropout
122
- self.rope_scaling = rope_scaling
123
-
124
- # NOTE(HandH1998): This is necessary for configuring the `rope_type`` of qwen2vl models after removing dependencies on vllm.
125
- if self.rope_scaling is not None and "type" in self.rope_scaling:
126
- if self.rope_scaling["type"] == "mrope":
127
- self.rope_scaling["type"] = "default"
128
- self.rope_scaling["rope_type"] = self.rope_scaling["type"]
129
-
130
- super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)