sglang 0.4.3__py3-none-any.whl → 0.4.3.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. sglang/lang/backend/openai.py +5 -0
  2. sglang/lang/chat_template.py +22 -7
  3. sglang/lang/ir.py +1 -0
  4. sglang/srt/configs/__init__.py +6 -3
  5. sglang/srt/configs/model_config.py +2 -0
  6. sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
  7. sglang/srt/entrypoints/engine.py +17 -2
  8. sglang/srt/hf_transformers_utils.py +2 -3
  9. sglang/srt/layers/attention/flashinfer_backend.py +101 -30
  10. sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  11. sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  12. sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  13. sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  14. sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  15. sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  16. sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  17. sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  18. sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  19. sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  20. sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  21. sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  22. sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  23. sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
  24. sglang/srt/managers/image_processor.py +217 -122
  25. sglang/srt/managers/schedule_batch.py +1 -0
  26. sglang/srt/model_executor/forward_batch_info.py +4 -1
  27. sglang/srt/model_executor/model_runner.py +1 -0
  28. sglang/srt/models/deepseek_nextn.py +295 -0
  29. sglang/srt/models/deepseek_v2.py +9 -3
  30. sglang/srt/models/llava.py +2 -1
  31. sglang/srt/models/qwen2_5_vl.py +722 -0
  32. sglang/srt/models/qwen2_vl.py +2 -1
  33. sglang/srt/openai_api/adapter.py +17 -3
  34. sglang/srt/server_args.py +6 -3
  35. sglang/srt/speculative/eagle_worker.py +7 -2
  36. sglang/srt/speculative/spec_info.py +11 -1
  37. sglang/utils.py +99 -19
  38. sglang/version.py +1 -1
  39. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/METADATA +3 -3
  40. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/RECORD +43 -27
  41. sglang/srt/configs/qwen2vl.py +0 -130
  42. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/LICENSE +0 -0
  43. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/WHEEL +0 -0
  44. {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/top_level.txt +0 -0
@@ -31,8 +31,9 @@ import torch
31
31
  import torch.nn as nn
32
32
  import torch.nn.functional as F
33
33
  from einops import rearrange
34
+ from transformers import Qwen2VLConfig
35
+ from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
34
36
 
35
- from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
36
37
  from sglang.srt.hf_transformers_utils import get_processor
37
38
  from sglang.srt.layers.activation import QuickGELU
38
39
  from sglang.srt.layers.attention.vision import VisionAttention
@@ -20,12 +20,14 @@ import os
20
20
  import time
21
21
  import uuid
22
22
  from http import HTTPStatus
23
- from typing import Dict, List, Optional
23
+ from typing import Dict, List
24
24
 
25
25
  from fastapi import HTTPException, Request, UploadFile
26
26
  from fastapi.responses import ORJSONResponse, StreamingResponse
27
27
  from pydantic import ValidationError
28
28
 
29
+ from sglang.lang.chat_template import get_chat_template_by_model_path
30
+
29
31
  try:
30
32
  from outlines.fsm.json_schema import convert_json_schema_to_str
31
33
  except ImportError:
@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {}
92
94
  # map file id to file path in SGLang backend
93
95
  file_id_storage: Dict[str, str] = {}
94
96
 
95
-
96
97
  # backend storage directory
97
98
  storage_dir = None
98
99
 
@@ -116,12 +117,13 @@ def create_streaming_error_response(
116
117
  return json_str
117
118
 
118
119
 
119
- def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
120
+ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg, model_path):
120
121
  global chat_template_name
121
122
 
122
123
  logger.info(
123
124
  f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}"
124
125
  )
126
+
125
127
  if not chat_template_exists(chat_template_arg):
126
128
  if not os.path.exists(chat_template_arg):
127
129
  raise RuntimeError(
@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
163
165
  else:
164
166
  chat_template_name = chat_template_arg
165
167
 
168
+ # check chat-template
169
+ chat_template = get_chat_template_by_model_path(model_path)
170
+ if chat_template is not None:
171
+ official_chat_template = chat_template.name
172
+ used_chat_template = chat_template_name
173
+ if official_chat_template != used_chat_template:
174
+ logger.warning(
175
+ f"Using a chat_template: '{used_chat_template}', "
176
+ f"which is different from official chat template: '{official_chat_template}', "
177
+ f"This discrepancy may lead to performance degradation."
178
+ )
179
+
166
180
 
167
181
  async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None):
168
182
  try:
sglang/srt/server_args.py CHANGED
@@ -262,14 +262,17 @@ class ServerArgs:
262
262
  )
263
263
 
264
264
  # Speculative Decoding
265
- if self.speculative_algorithm == "EAGLE":
265
+ if (
266
+ self.speculative_algorithm == "EAGLE"
267
+ or self.speculative_algorithm == "NEXTN"
268
+ ):
266
269
  self.prefill_only_one_req = True
267
270
  self.disable_cuda_graph_padding = True
268
271
  self.disable_radix_cache = True
269
272
  self.disable_overlap_schedule = True
270
273
  self.chunked_prefill_size = -1
271
274
  logger.info(
272
- "The radix cache, chunked prefill, and overlap scheduler are disabled because of using eagle speculative decoding."
275
+ f"The radix cache, chunked prefill, and overlap scheduler are disabled because of using {self.speculative_algorithm} speculative decoding."
273
276
  )
274
277
 
275
278
  # GGUF
@@ -705,7 +708,7 @@ class ServerArgs:
705
708
  parser.add_argument(
706
709
  "--speculative-algorithm",
707
710
  type=str,
708
- choices=["EAGLE"],
711
+ choices=["EAGLE", "NEXTN"],
709
712
  help="Speculative algorithm.",
710
713
  )
711
714
  parser.add_argument(
@@ -24,6 +24,7 @@ from sglang.srt.speculative.eagle_utils import (
24
24
  fast_topk,
25
25
  select_top_k_tokens,
26
26
  )
27
+ from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
@@ -57,11 +58,15 @@ class EAGLEWorker(TpModelWorker):
57
58
  # Parse arguments
58
59
  self.topk = server_args.speculative_eagle_topk
59
60
  self.speculative_num_steps = server_args.speculative_num_steps
61
+ self.speculative_algorithm = SpeculativeAlgorithm.from_string(
62
+ server_args.speculative_algorithm
63
+ )
60
64
  self.server_args = server_args
61
65
 
62
66
  # Share the embedding and lm_head
63
- embed, head = self.target_worker.model_runner.model.get_embed_and_head()
64
- self.model_runner.model.set_embed_and_head(embed, head)
67
+ if not self.speculative_algorithm.is_nextn():
68
+ embed, head = self.target_worker.model_runner.model.get_embed_and_head()
69
+ self.model_runner.model.set_embed_and_head(embed, head)
65
70
  self.model_runner.server_args.disable_cuda_graph = backup_disable_cuda_graph
66
71
 
67
72
  # Create multi-step attn backends and cuda graph runners
@@ -5,18 +5,28 @@ class SpeculativeAlgorithm(IntEnum):
5
5
  NONE = auto()
6
6
  EAGLE = auto()
7
7
 
8
+ # NEXTN spec decoding is for DeepSeek V3/R1
9
+ # currently it's implemented based on EAGLE
10
+ NEXTN = auto()
11
+
8
12
  def is_none(self):
9
13
  return self == SpeculativeAlgorithm.NONE
10
14
 
11
15
  def is_eagle(self):
12
- return self == SpeculativeAlgorithm.EAGLE
16
+ return self == SpeculativeAlgorithm.EAGLE or self == SpeculativeAlgorithm.NEXTN
17
+
18
+ def is_nextn(self):
19
+ return self == SpeculativeAlgorithm.NEXTN
13
20
 
14
21
  @staticmethod
15
22
  def from_string(name: str):
16
23
  name_map = {
17
24
  "EAGLE": SpeculativeAlgorithm.EAGLE,
25
+ "NEXTN": SpeculativeAlgorithm.NEXTN,
18
26
  None: SpeculativeAlgorithm.NONE,
19
27
  }
28
+ if name is not None:
29
+ name = name.upper()
20
30
  return name_map[name]
21
31
 
22
32
 
sglang/utils.py CHANGED
@@ -306,22 +306,112 @@ def download_and_cache_file(url: str, filename: Optional[str] = None):
306
306
  return filename
307
307
 
308
308
 
309
- def execute_shell_command(command: str) -> subprocess.Popen:
309
+ import fcntl
310
+
311
+
312
+ def is_in_ci():
313
+ from sglang.test.test_utils import is_in_ci
314
+
315
+ return is_in_ci()
316
+
317
+
318
+ LOCKFILE = os.path.expanduser("~/.sglang_port_lock")
319
+ PORT_REGISTRY = os.path.expanduser("~/.sglang_port_registry.json")
320
+
321
+ if not os.path.exists(LOCKFILE):
322
+ with open(LOCKFILE, "w") as f:
323
+ pass
324
+
325
+ if not os.path.exists(PORT_REGISTRY):
326
+ with open(PORT_REGISTRY, "w") as f:
327
+ json.dump([], f)
328
+
329
+
330
+ def print_highlight(html_content: str):
331
+ if is_in_ci():
332
+ html_content = str(html_content).replace("\n", "<br>")
333
+ display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
334
+ else:
335
+ print(html_content)
336
+
337
+
338
+ def init_port_registry():
339
+ """Initialize the port registry file if it doesn't exist."""
340
+ if not os.path.exists(PORT_REGISTRY):
341
+ with open(PORT_REGISTRY, "w") as f:
342
+ json.dump([], f)
343
+
344
+
345
+ def reserve_port(start=30000, end=40000):
346
+ """
347
+ Reserve an available port using a file lock and a registry.
348
+ Returns the allocated port.
310
349
  """
311
- Execute a shell command and return the process handle
350
+ init_port_registry()
351
+ with open(LOCKFILE, "w") as lock:
352
+ fcntl.flock(lock, fcntl.LOCK_EX)
353
+ try:
354
+ with open(PORT_REGISTRY, "r") as f:
355
+ used = json.load(f)
356
+ except Exception:
357
+ used = []
358
+ for port in range(start, end):
359
+ if port not in used:
360
+ used.append(port)
361
+ with open(PORT_REGISTRY, "w") as f:
362
+ json.dump(used, f)
363
+ return port
364
+ raise RuntimeError("No free port available")
365
+
366
+
367
+ def release_port(port):
368
+ """Release the reserved port by removing it from the registry."""
369
+ with open(LOCKFILE, "w") as lock:
370
+ fcntl.flock(lock, fcntl.LOCK_EX)
371
+ try:
372
+ with open(PORT_REGISTRY, "r") as f:
373
+ used = json.load(f)
374
+ except Exception:
375
+ used = []
376
+ if port in used:
377
+ used.remove(port)
378
+ with open(PORT_REGISTRY, "w") as f:
379
+ json.dump(used, f)
312
380
 
313
- Args:
314
- command: Shell command as a string (can include \\ line continuations)
315
- Returns:
316
- subprocess.Popen: Process handle
381
+
382
+ def execute_shell_command(command: str) -> subprocess.Popen:
317
383
  """
318
- # Replace \ newline with space and split
384
+ Execute a shell command and return its process handle.
385
+ """
386
+ # Replace newline continuations and split the command string.
319
387
  command = command.replace("\\\n", " ").replace("\\", " ")
320
388
  parts = command.split()
321
-
322
389
  return subprocess.Popen(parts, text=True, stderr=subprocess.STDOUT)
323
390
 
324
391
 
392
+ def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
393
+ """
394
+ Launch the server using the given command.
395
+ If no port is specified, a free port is reserved.
396
+ """
397
+ if port is None:
398
+ port = reserve_port()
399
+ full_command = f"{command} --port {port}"
400
+ process = execute_shell_command(full_command)
401
+ return process, port
402
+
403
+
404
+ def terminate_process(process, port=None):
405
+ """
406
+ Terminate the process and, if a port was reserved, release it.
407
+ """
408
+ from sglang.srt.utils import kill_process_tree
409
+
410
+ kill_process_tree(process.pid)
411
+ if port is not None:
412
+ release_port(port)
413
+
414
+
325
415
  def wait_for_server(base_url: str, timeout: int = None) -> None:
326
416
  """Wait for the server to be ready by polling the /v1/models endpoint.
327
417
 
@@ -343,6 +433,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
343
433
  NOTE: Typically, the server runs in a separate terminal.
344
434
  In this notebook, we run the server and notebook code together, so their outputs are combined.
345
435
  To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
436
+ We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.
346
437
  """
347
438
  )
348
439
  break
@@ -353,17 +444,6 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
353
444
  time.sleep(1)
354
445
 
355
446
 
356
- def terminate_process(process):
357
- from sglang.srt.utils import kill_process_tree
358
-
359
- kill_process_tree(process.pid)
360
-
361
-
362
- def print_highlight(html_content: str):
363
- html_content = str(html_content).replace("\n", "<br>")
364
- display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
365
-
366
-
367
447
  class TypeBasedDispatcher:
368
448
  def __init__(self, mapping: List[Tuple[Type, Callable]]):
369
449
  self._mapping = mapping
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.3"
1
+ __version__ = "0.4.3.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sglang
3
- Version: 0.4.3
3
+ Version: 0.4.3.post2
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -235,7 +235,7 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
235
235
  Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
236
236
  Requires-Dist: uvicorn; extra == "runtime-common"
237
237
  Requires-Dist: uvloop; extra == "runtime-common"
238
- Requires-Dist: xgrammar>=0.1.10; extra == "runtime-common"
238
+ Requires-Dist: xgrammar==0.1.10; extra == "runtime-common"
239
239
  Requires-Dist: ninja; extra == "runtime-common"
240
240
  Provides-Extra: srt
241
241
  Requires-Dist: sglang[runtime_common]; extra == "srt"
@@ -243,7 +243,7 @@ Requires-Dist: cuda-python; extra == "srt"
243
243
  Requires-Dist: sgl-kernel>=0.0.3.post6; extra == "srt"
244
244
  Requires-Dist: torch; extra == "srt"
245
245
  Requires-Dist: vllm<=0.7.2,>=0.6.4.post1; extra == "srt"
246
- Requires-Dist: flashinfer_python>=0.2.1.post1; extra == "srt"
246
+ Requires-Dist: flashinfer_python>=0.2.1.post2; extra == "srt"
247
247
  Requires-Dist: outlines<=0.1.11,>=0.0.44; extra == "srt"
248
248
  Provides-Extra: srt-hip
249
249
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
@@ -9,20 +9,20 @@ sglang/check_env.py,sha256=lDVA3ybt1wOE33HIMpkkU7zGRgLWez1_ifRRJ8qxbtw,8445
9
9
  sglang/global_config.py,sha256=crt5cernXnDa1iQ8kGOq_ScTFclRlTQbJ-atFHM7I5I,1330
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
12
- sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
13
- sglang/version.py,sha256=Nyg0pmk5ea9-SLCAFEIF96ByFx4-TJFtrqYPN-Zn6g4,22
12
+ sglang/utils.py,sha256=9fm5ghtYPXqsWKjUzlQKJIoH5iFit6Rz21RhyaC3YL4,15673
13
+ sglang/version.py,sha256=AJcJAUlaCr4igIr4vVaRiJeU678q5BdTdso-33Tq98k,28
14
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
15
+ sglang/lang/chat_template.py,sha256=0tZX67LgtYGrWopnSuTeqWVdxaw2deJOFWOBJpd6htU,17547
16
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
17
17
  sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
18
18
  sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
19
- sglang/lang/ir.py,sha256=dtA6rs5JIN0tMm3jhgRqdpRhH2Sckil-BMyLRMyBEIY,18494
19
+ sglang/lang/ir.py,sha256=YQlEX2eYMAVHG12xJ2Jds6S6el45_O-udsXJumpEoEQ,18552
20
20
  sglang/lang/tracer.py,sha256=o-jLAPPSuy2vBfsGGrTAnbuWtORzQ50B4C_P5zvYkx8,8291
21
21
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
23
23
  sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
24
24
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
25
- sglang/lang/backend/openai.py,sha256=ha9a2P6T80TmSgYlyIwB1qYawWkjcOgiOptkktkqa1U,15436
25
+ sglang/lang/backend/openai.py,sha256=BQj1FHPXmSfFVQV-SIs7WW6v7tUDUckjtpvs9mhP8Ok,15645
26
26
  sglang/lang/backend/runtime_endpoint.py,sha256=gM97bi8Kv8sLzCDJnH5ZZTQ9I6t31CeVUve7qdTsopo,16755
27
27
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
28
28
  sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
@@ -30,21 +30,21 @@ sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
30
30
  sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
31
31
  sglang/srt/custom_op.py,sha256=M5oqlgh32vAVeStFCruydTUfi_blGFJihVTnQBEOvwo,1134
32
32
  sglang/srt/function_call_parser.py,sha256=YmagXt1BIuTbeiWmSleZwJFCFR5r5EFqVQqKnJDYXiE,19568
33
- sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
33
+ sglang/srt/hf_transformers_utils.py,sha256=ymMz_MjaeHirDwzzCWz5ktPEzWdIoP3K9DiZqNtjs6k,7737
34
34
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
35
35
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
36
36
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
37
- sglang/srt/server_args.py,sha256=97YAjIbttBjty9Rv9CQ-yGERprFRSu2Jq0zX8Yx-QH8,41018
37
+ sglang/srt/server_args.py,sha256=C7zyFuYidgt__ZaqK8tNV9zPByQNaLyUNMOogBzBjXM,41128
38
38
  sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
39
39
  sglang/srt/utils.py,sha256=RVU-OORgeVQICMPzj17KHxbDdSYGOKFBnNR4dZejP9A,46780
40
- sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
40
+ sglang/srt/configs/__init__.py,sha256=naCw3LwTLHOCsldy2UyRmxoIWrWfX3hgEP2Gt7frXaw,382
41
41
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
42
42
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
43
43
  sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
44
44
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
45
45
  sglang/srt/configs/load_config.py,sha256=la2ezNRcUZs7qiTYta2KEXqZ0U4TcmWW3U0sjoHgQQ0,3107
46
- sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
47
- sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
46
+ sglang/srt/configs/model_config.py,sha256=MPC1XJox6wo0Ut1LJ-05flKWlA95ZuzVKaDP9il4hD4,17023
47
+ sglang/srt/configs/qwen2_5_vl_config.py,sha256=J8jq6QwseIOgqXQ3nuEX_yRVMNbyYjleZbf4nEhniGk,48184
48
48
  sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
49
49
  sglang/srt/constrained/outlines_backend.py,sha256=yPYgz44n-rSCStGGkS1lGazFiQzN7gqwSvpJ2YG0co4,7081
50
50
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
@@ -61,7 +61,7 @@ sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--Zq
61
61
  sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
62
62
  sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
63
63
  sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
64
- sglang/srt/entrypoints/engine.py,sha256=3JL7aj0K5UHCBsqCOG2MTLAv-8IpwtN2K-3_xB6fNqk,16888
64
+ sglang/srt/entrypoints/engine.py,sha256=t2UhSOnr22BBCyoIhDJZIcmthQOQcL1iaB06LZqqAnU,17555
65
65
  sglang/srt/entrypoints/http_server.py,sha256=TJlekPuw01_AvfAhDUdD-DaxCmmW_uH_rWL2CNv2OGE,19545
66
66
  sglang/srt/layers/activation.py,sha256=f9KGwGi2znUx5SFKH_vO8htpBkfQ550VZZIycFDfPlk,5602
67
67
  sglang/srt/layers/dp_attention.py,sha256=LLUMHIdphhQy1rNR52uwIFl85oDFPAsogMwYF3d83PU,1910
@@ -77,7 +77,7 @@ sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSa
77
77
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
78
78
  sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
79
79
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=4mVyFPfZxPTwkQHGNCfI_4hQ8CbsWXJfxz-IQW77gAc,9143
80
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=8bEnIXFXBB8lv0ruH_paqT8uLIHsKYKYhQ9ytzDcsYI,47681
80
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=MnQCTvOOPXy4RXvvZNSMjm1nPHXoysQ1cWGVf_yilQU,50518
81
81
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
82
82
  sglang/srt/layers/attention/triton_backend.py,sha256=mbYaYKHYrUyL2zEXrPtDRIcvVNe6L-bmcdLhKF92V-0,21292
83
83
  sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
@@ -124,6 +124,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=RWBo3j5AzZls5eD2eaejetSfM
124
124
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=kklgf2qLI5CQYiJJ5e9Gxx2gAfGxcyMDYpdJnIXPV8E,2748
125
125
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=8e0tN_DHPwvh_HECVHx9oOF_4WWdaht4s6Nmd_K-aBU,2904
126
126
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
127
+ "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=XmKFaMheq7NNrsvYCJteul0w809l_l460ZiDQC9ToGs,3262
127
128
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
128
129
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
129
130
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=5ro9O8Nf-7MB8NGCQ3QV5kB2k3iSWe0rdTz2A4W72CA,3732
@@ -210,24 +211,30 @@ sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttr
210
211
  sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
211
212
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
212
213
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=CPo1WRF0HgsQMPBkvpoImElQMrfwpJLhEvL86e6fkPU,3247
214
+ "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9qdyh6ki9LAyq7VDO9WMRmBOPWKSrZhU-I7z1E9bTKA,550
213
215
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
214
216
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=xqJNygFgPCe4jDpkfVOWCXpuNMUjmssdD8oGhp2iXv8,3726
215
217
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9ya9f1Nt0g0RD-6sRRBZOaIPLSpSFZCz7jNvqTPrgFE,3732
216
218
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
217
219
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=UZljnxxCSjwnZlX3OgKWZJGXCf5BWF_agEpNX8I4Zxc,3248
218
220
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
221
+ "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6POXzQZHdNwcBDv1w6BJKbLMRDt0jbFUuMsMNf-ToEs,549
219
222
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
220
223
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=3matoCzEe4aexwoe7YTmkjyE4NA8khWXjL5EySuNwzA,3254
221
224
  "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
225
+ "sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=JeXNLkbMAjdDKV-WpzQy87SXN06towo3xUofLtvYCQI,551
222
226
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
223
227
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9f8Ib4gLEFSfdNpO8IL8uiONImvqnlPbJrZ0HM3OB-o,3247
224
228
  "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
229
+ "sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Boa83ZSPZ0LvzqtfLGvois5QK4TmJfwjA2n96c9ET58,549
225
230
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
226
231
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
227
232
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tG5_iVeRBHTgHX-liOf79nWRjj_lUZ-NQWTbBrBgORQ,3246
228
233
  "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
234
+ "sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=9tCZxJ0eAD7AYMH7OqS3AGppJUllKnJLNvMq7FMXdsA,552
229
235
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
230
236
  "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
237
+ "sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=QMVfMXS0Yjgob8_9xps1xuZi6KnY5l2MeKxXLRjTeg4,548
231
238
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
232
239
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=I6a5wQ0R3cBp8ix_PDamWZN2aJmV_1p0tg430L3Updg,3727
233
240
  "sglang/srt/layers/quantization/configs/N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bSxTaptdcgj27mQGmdUmQtYTn4V_8EcmtRaVNigKjLA,3730
@@ -240,6 +247,7 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
240
247
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
241
248
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=B0lo3SuoQXhBEnojH2TwpVeurvlKD8yI8kQrJ5ORhWU,3249
242
249
  "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
250
+ "sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZYZ03ziPGwG2sCasEYLj7ZIP7vNO8UNBR5qNTmKgRMs,549
243
251
  "sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
244
252
  "sglang/srt/layers/quantization/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
245
253
  "sglang/srt/layers/quantization/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6HvsrapGzLyCAZe_NE0VdcJTSJv1aztD1ZqFG7VODUA,3729
@@ -266,22 +274,28 @@ sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id
266
274
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
267
275
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=BpKweY4HfBx5xvqSvwNahy9x7R5FH-YK8j6rLFQdKwo,3248
268
276
  "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
277
+ "sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FcuzcmKFf2RbaUpAaAsuObUefcGMgNPMDbVdHXRkoGY,549
269
278
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
270
279
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=h32lCTFilLlyKbMeuJvNWG1v0yJJzNj93kwSvlrHfaY,3249
271
280
  "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
281
+ "sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kjQ_kvF38bZGcmaeJGSJsSR0NcUjUOh3LZ2-5c4kPvE,550
272
282
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
273
283
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0jX-z2lTgVw7ABLmWsIsQdqW4EjmbXKRDHye_XPLCAE,3245
274
284
  "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
285
+ "sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=mZGU39sobtUqNYKjtyIGjhOZyCOQFJMF3MinA1zjTJA,550
275
286
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
276
287
  "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=_exM3wJ3FMmGHweBcH-8IxwZBzaOmPaF3ScMM6KDpiY,3253
288
+ "sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ue2oWml2ouUTZelYx5Nt5pgCmY-ib3mLV1reJL9ZudE,550
277
289
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
278
290
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
279
291
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ENRWYdUwI0ooHb6IwcHliupRWOPnw-7-WtxZB-qQGJI,3245
280
292
  "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
293
+ "sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Nv9KP_KLGsRJdJF755dZBvbTws37u1GM2UigMRlAtl0,552
281
294
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
282
295
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
283
296
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TdWuE2RIsIyr4Im24MuWK3XyiNtbhO_hAiAXDz5gNUk,3246
284
297
  "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
298
+ "sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=ZOWEo__oUy8AhJiAlRCuGNAZNdNweFdWBFptJYkwxs8,552
285
299
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
286
300
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sVbH4YRLTxBqvTh_6xbtXkj3orOrKytlwM-_4gtD6IY,3725
287
301
  "sglang/srt/layers/quantization/configs/N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=sTvaJ0RiCaQem4F1z7oES6RVRJ2gKgBuccX13S1SqGc,3733
@@ -318,9 +332,9 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
318
332
  sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
319
333
  sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
320
334
  sglang/srt/managers/detokenizer_manager.py,sha256=XC2INyykOgwmIrFEGc-zf6LGZ5mMt6oPZt1YRXW_cbY,9650
321
- sglang/srt/managers/image_processor.py,sha256=s1QH9cSzT_nnitc6idzFjuGDp-pDnMTpbVZoQfzdSXU,20671
335
+ sglang/srt/managers/image_processor.py,sha256=AWtCjl_zCbcn5LD4Hp4NXmsu225lQE0gWixIhQuUMpE,23872
322
336
  sglang/srt/managers/io_struct.py,sha256=9jhu794cc_BljFmVL6kQseTHGZNwEzONdlGEy_wjAcA,18357
323
- sglang/srt/managers/schedule_batch.py,sha256=smqDrzohvA8j76CLgI53CvpduheW1m__26S0O8HcCf0,49187
337
+ sglang/srt/managers/schedule_batch.py,sha256=70smg65ed4xbotRegd8NzRu1YKA6M0oKn2Q69i0qWgA,49246
324
338
  sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
325
339
  sglang/srt/managers/scheduler.py,sha256=w0FPjiU5MoyP58UdJoPBr-hf-WmlWPpqb-5TSJDJBLo,71908
326
340
  sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
@@ -336,8 +350,8 @@ sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll5
336
350
  sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
337
351
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
338
352
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=hH646E_c4UlclGEawPDjg4KHgTUEk70WrPl6C7nnltM,18774
339
- sglang/srt/model_executor/forward_batch_info.py,sha256=t1RlBgoeS-_Ikl28Xjvt-aouh1nNUc3eLM4iGY4_QqY,14988
340
- sglang/srt/model_executor/model_runner.py,sha256=uohQ2n2R1HcVyaHwbdwM6xDvFxZSLgxacjMSrrogLpw,33537
353
+ sglang/srt/model_executor/forward_batch_info.py,sha256=cTyRuJVBTBmkP4LAfScRSRrpjLCq7UfmUKoXuU5LZUw,15098
354
+ sglang/srt/model_executor/model_runner.py,sha256=FNi5BTrpQ5VQ-VsMHujaAuSno3Y3DYYjymzPJLJoOIM,33609
341
355
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
342
356
  sglang/srt/model_loader/loader.py,sha256=2d9fJNxC3Y6YWmQX4nVOB-b9Glc43ztlkJYJFX1_kxk,46811
343
357
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -347,7 +361,8 @@ sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,
347
361
  sglang/srt/models/commandr.py,sha256=y8DFUW0NKbkoY2DP6nhgJ1f7F_ysjaHEkEnZYZW2zdk,14523
348
362
  sglang/srt/models/dbrx.py,sha256=-L9QkUr_xuMuI6mn0AzG_VE1MqRXoaaFtD4r8UuAzkY,14789
349
363
  sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
350
- sglang/srt/models/deepseek_v2.py,sha256=9_284fDT15WEmv8qfnH2EzKX8fxSZrNyiz4iQtgb0tI,39065
364
+ sglang/srt/models/deepseek_nextn.py,sha256=QmzByVDFw8F5cJfBU4-VVryXovn4HxvGBwbBTfJavJg,11740
365
+ sglang/srt/models/deepseek_v2.py,sha256=vjY8BznqlEjX4P_mZQp1Syv24YxVee9Q258O8KWqE8I,39429
351
366
  sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
352
367
  sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
353
368
  sglang/srt/models/gemma2.py,sha256=cyQfby-kp2OZPsUACmBh3-jsXkYwQg9Tj6xqtZ7mTwM,15947
@@ -363,7 +378,7 @@ sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJ
363
378
  sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
364
379
  sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
365
380
  sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
366
- sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
381
+ sglang/srt/models/llava.py,sha256=Qbh26DcC6djw5G8olq0AC0WqzkkRVsiuT8I6RPCpH0o,26384
367
382
  sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
368
383
  sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
369
384
  sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
@@ -378,16 +393,17 @@ sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15
378
393
  sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
379
394
  sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
380
395
  sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
396
+ sglang/srt/models/qwen2_5_vl.py,sha256=uSZEoCdyOlaANjnP21LxE7K_DqfG10JQ5sUkK6Ase2A,28045
381
397
  sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
382
398
  sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
383
- sglang/srt/models/qwen2_vl.py,sha256=d8jCZyoJ6sMZ8-Pw9bHb3VbwiZlbFzJvffA4yYBRj0U,23466
399
+ sglang/srt/models/qwen2_vl.py,sha256=1LM4iyE4rHFRgP58hSFpKgZdaew_OSdwGRwwy3NiOzo,23523
384
400
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
385
401
  sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
386
402
  sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
387
403
  sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
388
404
  sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
389
405
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
390
- sglang/srt/openai_api/adapter.py,sha256=yQaAiZ43w3OIsNj8nC6BEjt9DPE5IZ-ovLAd0r7LbcY,61716
406
+ sglang/srt/openai_api/adapter.py,sha256=tPsZ6cHlEofwJU7Cmfi3KtwSqvd3sv6EyeV6BfkdAcU,62349
391
407
  sglang/srt/openai_api/protocol.py,sha256=UInFUKQqS8KWLrCzA6s5_uaNC6xAUAAJ4WepQzQ7xpo,11845
392
408
  sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
393
409
  sglang/srt/sampling/sampling_batch_info.py,sha256=Ry1N79T9QQY_HJ8GjM50_W4tzKFxMtTfV4GccT7NQ0w,15129
@@ -401,8 +417,8 @@ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=l1DyU8kC8
401
417
  sglang/srt/speculative/build_eagle_tree.py,sha256=zWthboIgzPzSOXcGxDpDv0rBOQP55HYGrBKGqm2gWF0,20732
402
418
  sglang/srt/speculative/eagle_draft_cuda_graph_runner.py,sha256=FY4hcwd0Blx7AXbeX6quaXPNgWA8WGIqVcQiEgHyERk,8002
403
419
  sglang/srt/speculative/eagle_utils.py,sha256=ypjVmVTVzCGclOVHRMJxdLUSPkf1-7bNXQS0oP6dn5U,25644
404
- sglang/srt/speculative/eagle_worker.py,sha256=33zC6txEsNp9hD48iy-_67ov83Pf4iASulLg8GHLy5U,12898
405
- sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
420
+ sglang/srt/speculative/eagle_worker.py,sha256=w7sLcW-EeE_iWyMJQhBuSo5Zvq6iPe-3m73-OIP1b-E,13153
421
+ sglang/srt/speculative/spec_info.py,sha256=RWG4ik4Dah_V74mgP0gza6UaYFtN-BRV6aJZsHHGGtE,827
406
422
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
407
423
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
408
424
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
@@ -419,8 +435,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
419
435
  sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
420
436
  sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
421
437
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
422
- sglang-0.4.3.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
423
- sglang-0.4.3.dist-info/METADATA,sha256=cpA0ecZd4jfaThOrStEpOGbXDTorUxqYdU4catzo2t4,23815
424
- sglang-0.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
425
- sglang-0.4.3.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
426
- sglang-0.4.3.dist-info/RECORD,,
438
+ sglang-0.4.3.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
439
+ sglang-0.4.3.post2.dist-info/METADATA,sha256=kChnxBYuvq-HiSlLlTj6l3bOcIj_mLFuOsSQpUMGkZE,23821
440
+ sglang-0.4.3.post2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
441
+ sglang-0.4.3.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
442
+ sglang-0.4.3.post2.dist-info/RECORD,,