sglang 0.4.1.post5__py3-none-any.whl → 0.4.1.post6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sglang/srt/configs/model_config.py +15 -6
  2. sglang/srt/layers/attention/flashinfer_backend.py +17 -3
  3. sglang/srt/layers/linear.py +36 -98
  4. sglang/srt/layers/moe/fused_moe_triton/layer.py +37 -9
  5. sglang/srt/layers/moe/topk.py +4 -2
  6. sglang/srt/layers/parameter.py +24 -16
  7. sglang/srt/layers/quantization/__init__.py +2 -0
  8. sglang/srt/layers/quantization/fp8.py +106 -52
  9. sglang/srt/layers/quantization/fp8_utils.py +1 -1
  10. sglang/srt/layers/quantization/int8_kernel.py +54 -0
  11. sglang/srt/layers/quantization/modelopt_quant.py +1 -1
  12. sglang/srt/layers/quantization/w8a8_int8.py +117 -0
  13. sglang/srt/layers/radix_attention.py +2 -0
  14. sglang/srt/layers/vocab_parallel_embedding.py +15 -2
  15. sglang/srt/managers/configure_logging.py +43 -0
  16. sglang/srt/managers/detokenizer_manager.py +0 -2
  17. sglang/srt/managers/io_struct.py +29 -13
  18. sglang/srt/managers/scheduler.py +48 -9
  19. sglang/srt/managers/tokenizer_manager.py +109 -49
  20. sglang/srt/mem_cache/memory_pool.py +107 -52
  21. sglang/srt/metrics/collector.py +10 -5
  22. sglang/srt/model_executor/model_runner.py +43 -6
  23. sglang/srt/models/llama.py +37 -2
  24. sglang/srt/models/qwen2.py +11 -0
  25. sglang/srt/models/qwen2_eagle.py +131 -0
  26. sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +15 -5
  27. sglang/srt/sampling/sampling_batch_info.py +14 -5
  28. sglang/srt/sampling/sampling_params.py +1 -1
  29. sglang/srt/server.py +114 -61
  30. sglang/srt/server_args.py +27 -18
  31. sglang/srt/speculative/eagle_worker.py +1 -0
  32. sglang/srt/torch_memory_saver_adapter.py +59 -0
  33. sglang/srt/utils.py +29 -0
  34. sglang/version.py +1 -1
  35. {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post6.dist-info}/METADATA +12 -10
  36. {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post6.dist-info}/RECORD +39 -34
  37. {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post6.dist-info}/LICENSE +0 -0
  38. {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post6.dist-info}/WHEEL +0 -0
  39. {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post6.dist-info}/top_level.txt +0 -0
sglang/srt/server_args.py CHANGED
@@ -23,7 +23,6 @@ from typing import List, Optional
23
23
  import torch
24
24
 
25
25
  from sglang.srt.hf_transformers_utils import check_gguf_file
26
- from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
27
26
  from sglang.srt.utils import (
28
27
  get_amdgpu_memory_capacity,
29
28
  get_hpu_memory_capacity,
@@ -32,6 +31,7 @@ from sglang.srt.utils import (
32
31
  is_hip,
33
32
  is_ipv6,
34
33
  is_port_available,
34
+ nullable_str,
35
35
  )
36
36
 
37
37
  logger = logging.getLogger(__name__)
@@ -47,6 +47,7 @@ class ServerArgs:
47
47
  trust_remote_code: bool = True
48
48
  dtype: str = "auto"
49
49
  kv_cache_dtype: str = "auto"
50
+ quantization_param_path: nullable_str = None
50
51
  quantization: Optional[str] = None
51
52
  context_length: Optional[int] = None
52
53
  device: str = "cuda"
@@ -55,7 +56,6 @@ class ServerArgs:
55
56
  is_embedding: bool = False
56
57
  revision: Optional[str] = None
57
58
  skip_tokenizer_init: bool = False
58
- return_token_ids: bool = False
59
59
 
60
60
  # Port for the HTTP server
61
61
  host: str = "127.0.0.1"
@@ -91,7 +91,7 @@ class ServerArgs:
91
91
 
92
92
  # API related
93
93
  api_key: Optional[str] = None
94
- file_storage_pth: str = "SGLang_storage"
94
+ file_storage_pth: str = "sglang_storage"
95
95
  enable_cache_report: bool = False
96
96
 
97
97
  # Data parallelism
@@ -156,6 +156,7 @@ class ServerArgs:
156
156
  triton_attention_num_kv_splits: int = 8
157
157
  num_continuous_decode_steps: int = 1
158
158
  delete_ckpt_after_loading: bool = False
159
+ enable_memory_saver: bool = False
159
160
 
160
161
  def __post_init__(self):
161
162
  # Set missing default values
@@ -296,6 +297,11 @@ class ServerArgs:
296
297
  "tokenizer if available, and 'slow' will "
297
298
  "always use the slow tokenizer.",
298
299
  )
300
+ parser.add_argument(
301
+ "--skip-tokenizer-init",
302
+ action="store_true",
303
+ help="If set, skip init tokenizer and pass input_ids in generate request",
304
+ )
299
305
  parser.add_argument(
300
306
  "--load-format",
301
307
  type=str,
@@ -346,8 +352,17 @@ class ServerArgs:
346
352
  "--kv-cache-dtype",
347
353
  type=str,
348
354
  default=ServerArgs.kv_cache_dtype,
349
- choices=["auto", "fp8_e5m2"],
350
- help='Data type for kv cache storage. "auto" will use model data type. "fp8_e5m2" is supported for CUDA 11.8+.',
355
+ choices=["auto", "fp8_e5m2", "fp8_e4m3"],
356
+ help='Data type for kv cache storage. "auto" will use model data type. "fp8_e5m2" and "fp8_e4m3" is supported for CUDA 11.8+.',
357
+ )
358
+ parser.add_argument(
359
+ "--quantization-param-path",
360
+ type=nullable_str,
361
+ default=None,
362
+ help="Path to the JSON file containing the KV cache "
363
+ "scaling factors. This should generally be supplied, when "
364
+ "KV cache dtype is FP8. Otherwise, KV cache scaling factors "
365
+ "default to 1.0, which may cause accuracy issues. ",
351
366
  )
352
367
  parser.add_argument(
353
368
  "--quantization",
@@ -363,6 +378,7 @@ class ServerArgs:
363
378
  "bitsandbytes",
364
379
  "gguf",
365
380
  "modelopt",
381
+ "w8a8_int8",
366
382
  ],
367
383
  help="The quantization method.",
368
384
  )
@@ -404,18 +420,6 @@ class ServerArgs:
404
420
  "name, a tag name, or a commit id. If unspecified, will use "
405
421
  "the default version.",
406
422
  )
407
- parser.add_argument(
408
- "--skip-tokenizer-init",
409
- action="store_true",
410
- help="If set, skip init tokenizer and pass input_ids in generate request",
411
- )
412
- parser.add_argument(
413
- "--return-token-ids",
414
- action="store_true",
415
- default=ServerArgs.return_token_ids,
416
- help="Whether to return token IDs in the output, this may introduce additional overhead.",
417
- )
418
-
419
423
  # Memory and scheduling
420
424
  parser.add_argument(
421
425
  "--mem-fraction-static",
@@ -551,7 +555,7 @@ class ServerArgs:
551
555
  "--decode-log-interval",
552
556
  type=int,
553
557
  default=ServerArgs.decode_log_interval,
554
- help="The log interval of decode batch",
558
+ help="The log interval of decode batch.",
555
559
  )
556
560
 
557
561
  # API related
@@ -851,6 +855,11 @@ class ServerArgs:
851
855
  action="store_true",
852
856
  help="Delete the model checkpoint after loading the model.",
853
857
  )
858
+ parser.add_argument(
859
+ "--enable-memory-saver",
860
+ action="store_true",
861
+ help="Allow saving memory using release_memory_occupation and resume_memory_occupation",
862
+ )
854
863
 
855
864
  @classmethod
856
865
  def from_cli_args(cls, args: argparse.Namespace):
@@ -40,6 +40,7 @@ class EAGLEWorker(TpModelWorker):
40
40
  )
41
41
  self.target_worker = target_worker
42
42
  self.server_args = server_args
43
+ self.finish_extend_len = []
43
44
 
44
45
  # Share the embedding and lm_head
45
46
  embed, head = self.target_worker.model_runner.model.get_embed_and_head()
@@ -0,0 +1,59 @@
1
+ from abc import ABC
2
+ from contextlib import contextmanager
3
+
4
+ try:
5
+ import torch_memory_saver
6
+
7
+ _primary_memory_saver = torch_memory_saver.TorchMemorySaver()
8
+ except ImportError:
9
+ pass
10
+
11
+
12
+ class TorchMemorySaverAdapter(ABC):
13
+ @staticmethod
14
+ def create(enable: bool):
15
+ return (
16
+ _TorchMemorySaverAdapterReal() if enable else _TorchMemorySaverAdapterNoop()
17
+ )
18
+
19
+ def configure_subprocess(self):
20
+ raise NotImplementedError
21
+
22
+ def region(self):
23
+ raise NotImplementedError
24
+
25
+ def pause(self):
26
+ raise NotImplementedError
27
+
28
+ def resume(self):
29
+ raise NotImplementedError
30
+
31
+
32
+ class _TorchMemorySaverAdapterReal(TorchMemorySaverAdapter):
33
+ def configure_subprocess(self):
34
+ return torch_memory_saver.configure_subprocess()
35
+
36
+ def region(self):
37
+ return _primary_memory_saver.region()
38
+
39
+ def pause(self):
40
+ return _primary_memory_saver.pause()
41
+
42
+ def resume(self):
43
+ return _primary_memory_saver.resume()
44
+
45
+
46
+ class _TorchMemorySaverAdapterNoop(TorchMemorySaverAdapter):
47
+ @contextmanager
48
+ def configure_subprocess(self):
49
+ yield
50
+
51
+ @contextmanager
52
+ def region(self):
53
+ yield
54
+
55
+ def pause(self):
56
+ pass
57
+
58
+ def resume(self):
59
+ pass
sglang/srt/utils.py CHANGED
@@ -97,6 +97,10 @@ def is_flashinfer_available():
97
97
  return torch.cuda.is_available() and torch.version.cuda
98
98
 
99
99
 
100
+ def is_cuda_available():
101
+ return torch.cuda.is_available() and torch.version.cuda
102
+
103
+
100
104
  def is_ipv6(address):
101
105
  try:
102
106
  ipaddress.IPv6Address(address)
@@ -1340,6 +1344,25 @@ def parse_tool_response(text, tools, **kwargs):
1340
1344
  return text, call_info_list
1341
1345
 
1342
1346
 
1347
+ def permute_weight(x: torch.Tensor) -> torch.Tensor:
1348
+ b_ = x.shape[0]
1349
+ n_ = x.shape[1]
1350
+ k_ = x.shape[2]
1351
+
1352
+ x_ = x
1353
+ if x.dtype == torch.bfloat16 or x.dtype == torch.float16:
1354
+ x_ = x_.view(int(b_), int(n_ / 16), 16, int(k_ / 32), 4, 8)
1355
+ elif x.dtype == torch.float8_e4m3fnuz or x.dtype == torch.int8:
1356
+ x_ = x_.view(int(b_), int(n_ / 16), 16, int(k_ / 64), 4, 16)
1357
+ else:
1358
+ return x_
1359
+
1360
+ x_ = x_.permute(0, 1, 3, 4, 2, 5)
1361
+ x_ = x_.contiguous()
1362
+ x_ = x_.view(*x.shape)
1363
+ return x_
1364
+
1365
+
1343
1366
  class MultiprocessingSerializer:
1344
1367
  @staticmethod
1345
1368
  def serialize(obj):
@@ -1375,3 +1398,9 @@ def debug_timing(func):
1375
1398
  return func(*args, **kwargs)
1376
1399
 
1377
1400
  return wrapper
1401
+
1402
+
1403
+ def nullable_str(val: str):
1404
+ if not val or val == "None":
1405
+ return None
1406
+ return val
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.1.post5"
1
+ __version__ = "0.4.1.post6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sglang
3
- Version: 0.4.1.post5
3
+ Version: 0.4.1.post6
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -240,7 +240,7 @@ Requires-Dist: xgrammar>=0.1.6; extra == "runtime-common"
240
240
  Provides-Extra: srt
241
241
  Requires-Dist: sglang[runtime_common]; extra == "srt"
242
242
  Requires-Dist: cuda-python; extra == "srt"
243
- Requires-Dist: sgl-kernel>=0.0.2.post11; extra == "srt"
243
+ Requires-Dist: sgl-kernel>=0.0.2.post12; extra == "srt"
244
244
  Requires-Dist: torch; extra == "srt"
245
245
  Requires-Dist: vllm<=0.6.4.post1,>=0.6.3.post1; extra == "srt"
246
246
  Requires-Dist: flashinfer==0.1.6; extra == "srt"
@@ -259,6 +259,8 @@ Provides-Extra: anthropic
259
259
  Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
260
260
  Provides-Extra: litellm
261
261
  Requires-Dist: litellm>=1.0.0; extra == "litellm"
262
+ Provides-Extra: torch-memory-saver
263
+ Requires-Dist: torch_memory_saver; extra == "torch-memory-saver"
262
264
  Provides-Extra: test
263
265
  Requires-Dist: jsonlines; extra == "test"
264
266
  Requires-Dist: matplotlib; extra == "test"
@@ -314,9 +316,9 @@ Requires-Dist: sglang[test]; extra == "dev-hpu"
314
316
  --------------------------------------------------------------------------------
315
317
 
316
318
  | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
317
- | [**Documentation**](https://sgl-project.github.io/)
318
- | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2um0ad92q-LkU19KQTxCGzlCgRiOiQEw)
319
- | [**Join Bi-Weekly Development Meeting**](https://docs.google.com/document/d/1xEow4eIM152xNcRxqZz9VEcOiTQo8-CEuuQ5qTmkt-E/edit?usp=sharing)
319
+ | [**Documentation**](https://docs.sglang.ai/)
320
+ | [**Join Slack**](https://slack.sglang.ai/)
321
+ | [**Join Bi-Weekly Development Meeting**](https://meeting.sglang.ai/)
320
322
  | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
321
323
 
322
324
  ## News
@@ -346,11 +348,11 @@ The core features include:
346
348
  - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
347
349
 
348
350
  ## Getting Started
349
- - [Install SGLang](https://sgl-project.github.io/start/install.html)
350
- - [Quick Start](https://sgl-project.github.io/start/send_request.html)
351
- - [Backend Tutorial](https://sgl-project.github.io/backend/openai_api_completions.html)
352
- - [Frontend Tutorial](https://sgl-project.github.io/frontend/frontend.html)
353
- - [Contribution Guide](https://sgl-project.github.io/references/contribution_guide.html)
351
+ - [Install SGLang](https://docs.sglang.ai/start/install.html)
352
+ - [Quick Start](https://docs.sglang.ai/start/send_request.html)
353
+ - [Backend Tutorial](https://docs.sglang.ai/backend/openai_api_completions.html)
354
+ - [Frontend Tutorial](https://docs.sglang.ai/frontend/frontend.html)
355
+ - [Contribution Guide](https://docs.sglang.ai/references/contribution_guide.html)
354
356
 
355
357
  ## Benchmark and Performance
356
358
  Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/), [v0.4 blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)
@@ -11,7 +11,7 @@ sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
11
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
12
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
13
13
  sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
14
- sglang/version.py,sha256=hn1mDUw1bYeP3zAc9Kr-wHIjuSeJC4zGGsfaHDKujkg,28
14
+ sglang/version.py,sha256=67TlBPUpVb158CbDn3v32POQ-USKtg7P1fg71jmrBWc,28
15
15
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
17
17
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -32,16 +32,17 @@ sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21
32
32
  sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
33
33
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
34
34
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
35
- sglang/srt/server.py,sha256=zqTk-il1cdQPZxz2sVE4w9OQpvlRBkijG1QYttkJJh4,35145
36
- sglang/srt/server_args.py,sha256=sRh76rD0P8M22PamOscDiszV5Jl2LILckTa7JlgVNY0,36539
37
- sglang/srt/utils.py,sha256=acB-l8FPp5e35eavVznBov8r1-fw4ppXGVYsJ3EDPVk,45468
35
+ sglang/srt/server.py,sha256=g2Wf1S3tOev0T2Wn98UkaOuDYPMixsy2xUzW2jUrQ3o,37148
36
+ sglang/srt/server_args.py,sha256=N8ByNO3vlQ-nl_-rgiCsRkiksefKtyKY9W7-24rhQKw,36965
37
+ sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
38
+ sglang/srt/utils.py,sha256=8TobQ4TwR22aa4j3W-XMkhJVBsuZ85t0zI8Mupx7L3M,46180
38
39
  sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
39
40
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
40
41
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
41
42
  sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
42
43
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
43
44
  sglang/srt/configs/load_config.py,sha256=TcPi_HY6xu5SiVZsxPOoB5pGeDUNebOk7muoUH9VBDg,3083
44
- sglang/srt/configs/model_config.py,sha256=Q2Mx3ww6ER4knXUMtedUbtpv9bTnpVPU77UDmfZeF5U,16427
45
+ sglang/srt/configs/model_config.py,sha256=qDTL1oxSlCxptPX8AI-VlEuxMB7m0UCAUDsbwXpUjow,16831
45
46
  sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
46
47
  sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
47
48
  sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
@@ -64,18 +65,18 @@ sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=P3WKgddcf
64
65
  sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwxBQ,5471
65
66
  sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
66
67
  sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
67
- sglang/srt/layers/linear.py,sha256=NSiZhylgI8mtH05c3Ixu-F3yLk0x4Wk135UbB4XXOZQ,50790
68
+ sglang/srt/layers/linear.py,sha256=s5hGfdBgYkFMHolTTsSLXQdOay9HZxYyrS6AYFZaeYA,48860
68
69
  sglang/srt/layers/logits_processor.py,sha256=r2yGmNqQTpi1l7qvN2Bvjb7lVKfBsxIBrJ6CpBh-_wg,12993
69
- sglang/srt/layers/parameter.py,sha256=wTne5O8_RfTL4Yvd7GrUNH94_FlE2VlQzSRCRUf9oeY,14502
70
+ sglang/srt/layers/parameter.py,sha256=pC6hz2Vu9bFKH4Mt5lh-BwNWUNrJO_GsaFY9aNVDsrY,14684
70
71
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
71
- sglang/srt/layers/radix_attention.py,sha256=E4cmvkcCdCtb6VyLNrCKy1D6VwHQ063oH3JQXPaRy6w,2178
72
+ sglang/srt/layers/radix_attention.py,sha256=nVHKPFyr-CWNm6AnMGPhuuTFTtgYwPL8sAVBZ5u3d94,2232
72
73
  sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
73
74
  sglang/srt/layers/sampler.py,sha256=HQWi1zb1gmD9pHMQyEP3WPjnL8vy-ncZDVMENbjQW7c,6944
74
75
  sglang/srt/layers/torchao_utils.py,sha256=8c2vzt106iP_QKbJtfN1GuABW8nCuP5dElQLUeci6qg,3934
75
- sglang/srt/layers/vocab_parallel_embedding.py,sha256=hGACDb1Ion8L9NfrHv6j6GnpfV9zOhJ--0sHiEt4m0o,21622
76
+ sglang/srt/layers/vocab_parallel_embedding.py,sha256=8TvdxJZipUy6Ewm8Ovsbho7GzZ_yvDZ-eXjK_8vc_8k,22149
76
77
  sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
77
78
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
78
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=d7XwoHYdmJHUwexghPUHLtKPg6WwTghBJ1PK5zOtrec,33261
79
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=1He2KvcPQmLbr-8wkgy20NYjsu_hicW6NlumoVP9-kM,33842
79
80
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
80
81
  sglang/srt/layers/attention/triton_backend.py,sha256=44ScKsVs-rFvqsaAZG_mREEpczhGaUBvaflvWqrukVE,6743
81
82
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
@@ -83,13 +84,13 @@ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXf
83
84
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
84
85
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
85
86
  sglang/srt/layers/moe/fused_moe_native.py,sha256=8q-LFZMSCGLc2_Gltp2lH0gSb4A1WOuKQW3wo3rpj5g,1601
86
- sglang/srt/layers/moe/topk.py,sha256=JpeIl_-CNk0yyG3k5fmmNbbmR2_9bkKC23UoLOlMkjw,6954
87
+ sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
87
88
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
89
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
89
90
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=6iQU5ZjQ8IXGoQ8ZlBuJqyQxYTEem9vXI6rbVIWKlZw,22303
90
91
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
91
92
  sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=3at2h0NDC8JF144jH6h5ze_YkBasvjo227bdFLiK0vs,36759
92
- sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=LwEoCt1lUc0uvCvRhBAy6Gkx1uCmOiFpnJPo-deXSQQ,20797
93
+ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=KCYdT1kftwY8V_wRahoW6GbXkrm7lAZ86xvmu1qZK8w,21802
93
94
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
94
95
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
95
96
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
@@ -181,12 +182,14 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=LwEoCt1lUc0uvCvRhBAy6Gkx1
181
182
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
182
183
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
183
184
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
184
- sglang/srt/layers/quantization/__init__.py,sha256=iprNsQDppt1BH3JX_GZlhvg0fEvypWCq8tAdN2v5HnE,4684
185
+ sglang/srt/layers/quantization/__init__.py,sha256=vM6Vhlu-Jv4t9DDwywitXGz58psTQ5k7guVuK0o4jTk,4785
185
186
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
186
- sglang/srt/layers/quantization/fp8.py,sha256=FZB2bzi-fw52WzSdpWcLNvAZEuuiLEhR1yeNPUEFCO8,32668
187
+ sglang/srt/layers/quantization/fp8.py,sha256=2k6vk2sTVB6JCtEJLsFFn5bJKR8lWwMRke4tu9nnTP0,34806
187
188
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
188
- sglang/srt/layers/quantization/fp8_utils.py,sha256=qBVJXxbxqmf8-Juq0t-IXWjlaZoePJqFNYcs9-oT5Yo,4150
189
- sglang/srt/layers/quantization/modelopt_quant.py,sha256=07WU6ej0nvAvmZdySwo8l4TH9cu8_rp3th8a86CMu2o,6247
189
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
190
+ sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
191
+ sglang/srt/layers/quantization/modelopt_quant.py,sha256=64Qec1kzduAcxyDLd_Y47wDHZ4ShS9Vb-Rf57jc1Zmg,6245
192
+ sglang/srt/layers/quantization/w8a8_int8.py,sha256=RO_s0KPH5wSx2HaI5PbAkdEXVqPS05AS6yo3oyZnIbw,3353
190
193
  "sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
191
194
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
192
195
  "sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
@@ -229,27 +232,28 @@ sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
229
232
  sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
230
233
  sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
231
234
  sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
235
+ sglang/srt/managers/configure_logging.py,sha256=wa1NLWaxC2NGSTJflZvCvUrONH4i6wreNvVHb90bd14,1374
232
236
  sglang/srt/managers/data_parallel_controller.py,sha256=VZSXGsNJ029BJlu56lCugaapMPvzjzE2yFATd8KWLNY,8468
233
- sglang/srt/managers/detokenizer_manager.py,sha256=XvyxUhY_SNXlAcVsx9zczrGllpEMzj7p2Vbh6M_yHy8,8555
237
+ sglang/srt/managers/detokenizer_manager.py,sha256=nZkbwt4yty_oy8rvg4T7PbgyVLoBLohvHl25xlQpBoo,8439
234
238
  sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
235
- sglang/srt/managers/io_struct.py,sha256=02NMBHRCjs9TUSdhKJmvMp3HculMC-50SkCGOEaYEHg,16197
239
+ sglang/srt/managers/io_struct.py,sha256=H1rNLCl2iqDijUGLBafjodTrohaUi1ztJn69XjkhjTk,16207
236
240
  sglang/srt/managers/schedule_batch.py,sha256=jmPTc-XyI-AXktz9Rofs-Fb3OlOgb-bThI142kOy--g,47134
237
241
  sglang/srt/managers/schedule_policy.py,sha256=aHkIL9pZtc4Kdmy8XU9tsjaDzdChVN2dnGKvJkSyqFg,17965
238
- sglang/srt/managers/scheduler.py,sha256=uapaewsUvKNuzOqaamfZcdyDARlETjobYrVaQuQGAB4,65405
242
+ sglang/srt/managers/scheduler.py,sha256=Kn7NyoLwHIeuGKQercV4jKsC5-KVLK4JhRiflNNLu9A,66790
239
243
  sglang/srt/managers/session_controller.py,sha256=0L9_3lhFGU4kLm8b2G1QAeslxvTT_y_Iw8spwrpgr30,5508
240
- sglang/srt/managers/tokenizer_manager.py,sha256=YfNDv_kswSsnhwhdsE0PXCsfUx8D6oVJE4RPkTXnMWo,33865
244
+ sglang/srt/managers/tokenizer_manager.py,sha256=p9k7fvFWyKkHO-Am-2JdbR6-VRsuGEiwQO7t1F7_rfs,35956
241
245
  sglang/srt/managers/tp_worker.py,sha256=-bvUFCo544QQSEHqPPjeOvCWMEFn01Bva6AeO39Qe3o,8043
242
246
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=rdHz2thdGSmceDedrolHOqjNPhrralyDTuNREL56oNI,9067
243
247
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
244
248
  sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
245
249
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
246
- sglang/srt/mem_cache/memory_pool.py,sha256=PzkTrQV8r0Ih58v46JibITOKdzuF32frBn78OdT1Ggw,18548
250
+ sglang/srt/mem_cache/memory_pool.py,sha256=McBKAcV444ewM-idOuCbfeKHoF-lhCL9m5R27M8H9ew,20401
247
251
  sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
248
- sglang/srt/metrics/collector.py,sha256=sIi_22L_vaaEXzTmjWXOUVwxzumIS-lxpLSPyCL0USA,6651
252
+ sglang/srt/metrics/collector.py,sha256=sbgruNDzxBmTd-lnRi8mBZGCt2J7qgRVvDk2LQ5HvQU,6936
249
253
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
250
254
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=rGG0ZS673YC_RVaXMlmNTBJln-L7ugsgDz0Q6XmO0Cc,18544
251
255
  sglang/srt/model_executor/forward_batch_info.py,sha256=Vu6qlbfm6dMUfvGaSmmLIroi8hBqfDpNVLxl7oECzIs,15001
252
- sglang/srt/model_executor/model_runner.py,sha256=aAu4ZsaYOpgdKq_ODocvV1YuK7URdDkOM4wfLS-TFYs,30126
256
+ sglang/srt/model_executor/model_runner.py,sha256=AQPN4q-Wuw3yCeFjXwWvN5m07geS07l21SXFKr-FeCk,31955
253
257
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
254
258
  sglang/srt/model_loader/loader.py,sha256=7OG_8-66vFDFZ9kVKGNK1BFBjZ6ql449dlyvdCbMqvE,43876
255
259
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
@@ -270,7 +274,7 @@ sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,
270
274
  sglang/srt/models/grok.py,sha256=gIr6uFNLv42v-yjAko4w8uugAA7vE0396S23V98Aiu4,18002
271
275
  sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
272
276
  sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
273
- sglang/srt/models/llama.py,sha256=-RYH3tiPP7UM6DYeMK_vIf_EjhIaOPpen4thmS4UNc0,20613
277
+ sglang/srt/models/llama.py,sha256=r9MwIsKv5SrwpLewdB_gqai1YDfjyG-2dlT_pYPNIac,22087
274
278
  sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
275
279
  sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
276
280
  sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
@@ -288,7 +292,8 @@ sglang/srt/models/olmo2.py,sha256=aC7svioN7XT5owRxPrvhvWBNMON9QXGQBWJ1KHMyXeA,13
288
292
  sglang/srt/models/olmoe.py,sha256=LiHVGfRaC5c_BU_vVgtV9uLuDH_SC0dw1kEc61posmI,15351
289
293
  sglang/srt/models/phi3_small.py,sha256=44_my3QmgJ2N7SOkGZzEb62DXBeCVHojfmCWgkk2uCI,14802
290
294
  sglang/srt/models/qwen.py,sha256=_FKDbwaS5C07uJyyivZpBrXJVej4Ph9ivzJdzWJPxJ4,9904
291
- sglang/srt/models/qwen2.py,sha256=be4xgcuqNa9kBdaL7x3PjsnUky6fh5K33c_khAWSi04,12959
295
+ sglang/srt/models/qwen2.py,sha256=aRumlGWYYUntMHR3LoOpeduelnzo9Ls0FXVwVKiL7tY,13332
296
+ sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
292
297
  sglang/srt/models/qwen2_moe.py,sha256=6xRRJxWWh1M5UFPfvhsCpY477zv-30AeSRJXsvOkgFc,16542
293
298
  sglang/srt/models/qwen2_vl.py,sha256=3EaUlTbyWOTRXA7eViK1WqmVbCFhXLIpnos49zzf-yM,26561
294
299
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
@@ -299,17 +304,17 @@ sglang/srt/models/xverse_moe.py,sha256=7E60YIST4ELYwLRgjtHiLRI5Uyc7XqQTM7jQXiWaQ
299
304
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
300
305
  sglang/srt/openai_api/adapter.py,sha256=Yv-rEA0Jd54iFlnkVy-OZM4EnPqkW_NLtDPGCiPWVWo,56386
301
306
  sglang/srt/openai_api/protocol.py,sha256=v_YUwH1PF4vIVqSE5rj1ODdSglprTe_vGiXoS99cOV4,11613
302
- sglang/srt/sampling/sampling_batch_info.py,sha256=TFceDjC6Xkbn1TThKu9uGoCvutRQbJEFppJPn1-WXUg,9343
303
- sglang/srt/sampling/sampling_params.py,sha256=KjUhZzRJvNTQZgJul2zSq3U8r352WzMKLbXfhP3V-nU,5685
307
+ sglang/srt/sampling/sampling_batch_info.py,sha256=BEcDjMlTQ6wRuvwwCjB-2cy6GMgS3dpmjG4xetBuI4Q,9637
308
+ sglang/srt/sampling/sampling_params.py,sha256=YdfObBzfkgK9rU2XY6_7kxl7H1wjtDGrinpyIszTGUw,5678
304
309
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
305
310
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
306
311
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
307
312
  sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
308
313
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
309
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
314
+ sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=vmE5muVz_ztRA6glgYOiQnKas_zTvQZ3nxcUEQao-L8,3070
310
315
  sglang/srt/speculative/build_eagle_tree.py,sha256=SIKuOFUOIzMLyanL5vViPmFBEiUHm_ezwiGuIyLmauE,9886
311
316
  sglang/srt/speculative/eagle_utils.py,sha256=Z51xGuvn-ZIMp0OXENZUhpDOz8kTDkujhHZA-Z2MKbA,23422
312
- sglang/srt/speculative/eagle_worker.py,sha256=Yu2Uibg9Fvo3M0NeYnjCxRgInPkqPyJoXhi378UqIQs,7807
317
+ sglang/srt/speculative/eagle_worker.py,sha256=P__BMJ0eKLaPzCS8jEWylk2POstue5u3RIVZeFtj84I,7843
313
318
  sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
314
319
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
315
320
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
@@ -327,8 +332,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
327
332
  sglang/test/test_programs.py,sha256=AABFLu0W9FlK-VN2wb2rLkwFCK6YCkLYrgQClymzpcw,18835
328
333
  sglang/test/test_utils.py,sha256=3xUJpb-HNSwzoRZ_eVO_Q52m5pWlQMU84PXnsSzoD9g,24585
329
334
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
330
- sglang-0.4.1.post5.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
331
- sglang-0.4.1.post5.dist-info/METADATA,sha256=DbUY9Mcojw2gnDGk7H1o4vOk2YqNciroomu8vKGnMDg,22601
332
- sglang-0.4.1.post5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
333
- sglang-0.4.1.post5.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
334
- sglang-0.4.1.post5.dist-info/RECORD,,
335
+ sglang-0.4.1.post6.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
336
+ sglang-0.4.1.post6.dist-info/METADATA,sha256=hls-gahHEVIiMlj9JHUiKHzKkiUiS_J5_JACvVh6riM,22527
337
+ sglang-0.4.1.post6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
338
+ sglang-0.4.1.post6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
339
+ sglang-0.4.1.post6.dist-info/RECORD,,