sglang 0.4.2__py3-none-any.whl → 0.4.2.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,12 +30,10 @@ import numpy as np
30
30
  import torch
31
31
  import torch.nn as nn
32
32
  import torch.nn.functional as F
33
- from einops import rearrange, repeat
33
+ from einops import rearrange
34
34
  from vllm.model_executor.layers.activation import QuickGELU
35
35
 
36
36
  from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
37
- from sglang.srt.distributed import parallel_state
38
- from sglang.srt.distributed import utils as dist_utils
39
37
  from sglang.srt.hf_transformers_utils import get_processor
40
38
  from sglang.srt.layers.attention.vision import VisionAttention
41
39
  from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
@@ -118,6 +116,7 @@ class Qwen2VisionBlock(nn.Module):
118
116
  mlp_ratio: float,
119
117
  act_layer: Type[nn.Module] = QuickGELU,
120
118
  norm_layer: Type[nn.Module] = None,
119
+ attn_implementation: Optional[str] = "sdpa",
121
120
  quant_config: Optional[QuantizationConfig] = None,
122
121
  ) -> None:
123
122
  super().__init__()
@@ -126,12 +125,24 @@ class Qwen2VisionBlock(nn.Module):
126
125
  self.norm1 = norm_layer(dim)
127
126
  self.norm2 = norm_layer(dim)
128
127
  mlp_hidden_dim = int(dim * mlp_ratio)
128
+ if attn_implementation == "sdpa":
129
+ use_context_forward = False
130
+ use_full_precision_softmax = False
131
+ elif attn_implementation == "flash_attention_2":
132
+ use_full_precision_softmax = False
133
+ use_context_forward = True
134
+ elif attn_implementation == "eager":
135
+ use_full_precision_softmax = True
136
+ use_context_forward = False
129
137
 
130
138
  self.attn = VisionAttention(
131
139
  embed_dim=dim,
132
140
  num_heads=num_heads,
133
141
  projection_size=dim,
134
142
  use_qkv_parallel=False,
143
+ use_context_forward=use_context_forward,
144
+ use_full_precision_softmax=use_full_precision_softmax,
145
+ flatten_batch=True,
135
146
  quant_config=quant_config,
136
147
  )
137
148
  self.mlp = Qwen2VisionMLP(
@@ -286,7 +297,6 @@ class Qwen2VisionTransformer(nn.Module):
286
297
  norm_layer = partial(nn.LayerNorm, eps=norm_eps)
287
298
  head_dim = embed_dim // num_heads
288
299
  self.rotary_pos_emb = Qwen2VisionRotaryEmbedding(head_dim // 2)
289
-
290
300
  self.blocks = nn.ModuleList(
291
301
  [
292
302
  Qwen2VisionBlock(
@@ -294,6 +304,7 @@ class Qwen2VisionTransformer(nn.Module):
294
304
  num_heads=num_heads,
295
305
  mlp_ratio=mlp_ratio,
296
306
  norm_layer=norm_layer,
307
+ attn_implementation="sdpa",
297
308
  quant_config=quant_config,
298
309
  )
299
310
  for _ in range(depth)
@@ -482,10 +493,6 @@ class Qwen2VLForConditionalGeneration(nn.Module):
482
493
  opensource models), the shape will be `(3, seq_len)`,
483
494
  otherwise it will be `(seq_len,).
484
495
  (Use input_metadata.mrope_positions to replace it)
485
- pixel_values: Pixel values to be fed to a model.
486
- `None` if no images are passed.
487
- image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in LLM.
488
- `None` if no images are passed.
489
496
  """
490
497
  if getattr(self.config, "rope_scaling", {}).get("type", None) == "mrope":
491
498
  positions = forward_batch.mrope_positions
@@ -540,15 +547,18 @@ class Qwen2VLForConditionalGeneration(nn.Module):
540
547
  num_image_tokens = self.calculate_num_image_tokens(
541
548
  image_grid_thws[idx]
542
549
  )
550
+
543
551
  left_idx = start_idx + (image_offset - prefix_len)
544
552
  right_idx = (
545
553
  start_idx + (image_offset - prefix_len) + num_image_tokens
546
554
  )
555
+
547
556
  inputs_embeds[left_idx:right_idx] = image_embeds[
548
557
  image_embeds_offset : image_embeds_offset + num_image_tokens
549
558
  ]
550
559
  image_embeds_offset += num_image_tokens
551
560
 
561
+ input_ids = None
552
562
  hidden_states = self.model(
553
563
  input_ids=input_ids,
554
564
  positions=positions,
sglang/srt/server_args.py CHANGED
@@ -163,6 +163,7 @@ class ServerArgs:
163
163
  # Custom logit processor
164
164
  enable_custom_logit_processor: bool = False
165
165
  tool_call_parser: str = None
166
+ enable_hierarchical_cache: bool = False
166
167
 
167
168
  def __post_init__(self):
168
169
  # Set missing default values
@@ -892,6 +893,11 @@ class ServerArgs:
892
893
  default=ServerArgs.tool_call_parser,
893
894
  help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
894
895
  )
896
+ parser.add_argument(
897
+ "--enable-hierarchical-cache",
898
+ action="store_true",
899
+ help="Enable hierarchical cache",
900
+ )
895
901
 
896
902
  @classmethod
897
903
  def from_cli_args(cls, args: argparse.Namespace):
sglang/srt/utils.py CHANGED
@@ -444,8 +444,6 @@ def load_image(image_file: Union[str, bytes]):
444
444
  else:
445
445
  raise ValueError(f"Invalid image: {image}")
446
446
 
447
- # if image_size is None:
448
- # image_size = image.size
449
447
  return image, image_size
450
448
 
451
449
 
sglang/utils.py CHANGED
@@ -373,3 +373,45 @@ class TypeBasedDispatcher:
373
373
  if isinstance(obj, ty):
374
374
  return fn(obj)
375
375
  raise ValueError(f"Invalid object: {obj}")
376
+
377
+
378
+ def trim_overlap(existing_text, new_chunk):
379
+ """
380
+ Finds the largest suffix of 'existing_text' that is a prefix of 'new_chunk'
381
+ and removes that overlap from the start of 'new_chunk'.
382
+ """
383
+ max_overlap = 0
384
+ max_possible = min(len(existing_text), len(new_chunk))
385
+ for i in range(max_possible, 0, -1):
386
+ if existing_text.endswith(new_chunk[:i]):
387
+ max_overlap = i
388
+ break
389
+ return new_chunk[max_overlap:]
390
+
391
+
392
+ def stream_and_merge(llm, prompt, sampling_params):
393
+ """
394
+ 1) Streams the text,
395
+ 2) Removes chunk overlaps,
396
+ 3) Returns the merged text.
397
+ """
398
+ final_text = ""
399
+ for chunk in llm.generate(prompt, sampling_params, stream=True):
400
+ chunk_text = chunk["text"]
401
+ cleaned_chunk = trim_overlap(final_text, chunk_text)
402
+ final_text += cleaned_chunk
403
+ return final_text
404
+
405
+
406
+ async def async_stream_and_merge(llm, prompt, sampling_params):
407
+ """
408
+ Streams tokens asynchronously, removes chunk overlaps,
409
+ and yields the cleaned chunk in real time for printing.
410
+ """
411
+ final_text = ""
412
+ generator = await llm.async_generate(prompt, sampling_params, stream=True)
413
+ async for chunk in generator:
414
+ chunk_text = chunk["text"]
415
+ cleaned_chunk = trim_overlap(final_text, chunk_text)
416
+ final_text += cleaned_chunk
417
+ yield cleaned_chunk # yield the non-overlapping portion
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.2"
1
+ __version__ = "0.4.2.post1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sglang
3
- Version: 0.4.2
3
+ Version: 0.4.2.post1
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -333,7 +333,7 @@ Requires-Dist: sglang[test]; extra == "dev-cpu"
333
333
  | [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
334
334
 
335
335
  ## News
336
- - [2025/01] 🔥 SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with DeekSeek-specific optimizations. ([instructions](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3), [AMD blog](https://www.amd.com/en/developer/resources/technical-articles/amd-instinct-gpus-power-deepseek-v3-revolutionizing-ai-development-with-sglang.html))
336
+ - [2025/01] 🔥 SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with DeepSeek-specific optimizations. ([instructions](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3), [AMD blog](https://www.amd.com/en/developer/resources/technical-articles/amd-instinct-gpus-power-deepseek-v3-revolutionizing-ai-development-with-sglang.html))
337
337
  - [2024/12] 🔥 v0.4 Release: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
338
338
  - [2024/09] v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
339
339
  - [2024/07] v0.2 Release: Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
@@ -372,7 +372,7 @@ Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
372
372
  [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
373
373
 
374
374
  ## Adoption and Sponsorship
375
- The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
375
+ The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, Novita AI, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
376
376
 
377
377
  ## Acknowledgment and Citation
378
378
  We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
@@ -9,8 +9,8 @@ sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
9
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
10
10
  sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
11
11
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
12
- sglang/utils.py,sha256=wvLVVC8U2qIhCSCrtzvV3wXapvJweir1XDNdpfoPFRM,11934
13
- sglang/version.py,sha256=6hfVa12Q-nXyUEXr6SyKpqPEDJW6vlRHyPxlA27PfTs,22
12
+ sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
13
+ sglang/version.py,sha256=BObAQyMJTgNEQbPpM5x4R8aeAPCZ_eHVSXPwL90NUlk,28
14
14
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
16
16
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -33,9 +33,9 @@ sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mv
33
33
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
34
34
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
35
35
  sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
36
- sglang/srt/server_args.py,sha256=GN9NAKDkSWpMQWoTBzHyvp-UimfKKpwAmzgiwUJTe4A,39792
36
+ sglang/srt/server_args.py,sha256=opURYsAG9anR5EINNq45f8GJv3NLDllhP9AlwpJ3lK8,39997
37
37
  sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
38
- sglang/srt/utils.py,sha256=jzHsVQDYF25Z7rPjiaO4w5iz7ZskRRZxTvEiUeFcSJw,46380
38
+ sglang/srt/utils.py,sha256=yIQ5XtfJa_jPDKTzxqXnCdbhA2kKMihzcP4fSAWU4bs,46317
39
39
  sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
40
40
  sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
41
41
  sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
@@ -71,8 +71,8 @@ sglang/srt/layers/logits_processor.py,sha256=_3TZNUbvjmw63ywBv6V6WU87G1TErMaXGa7
71
71
  sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
72
72
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
73
73
  sglang/srt/layers/radix_attention.py,sha256=tPjJA3P9kuFBk2QWFTgOI8UbVUFLVDZgFaQWuokx894,2234
74
- sglang/srt/layers/rotary_embedding.py,sha256=CdnkPxUtef-o29i1G2p1an3H7sEgScWLvI_XVoMaPbo,43444
75
- sglang/srt/layers/sampler.py,sha256=T_Lvjc7PhmOUhNAeSoI14DG9EO4XFve0z3wgWYy7YLU,9769
74
+ sglang/srt/layers/rotary_embedding.py,sha256=tEvy-IAi7GaI5PYFwV30Rek3m6oUHKYuGSa23FXWXSE,44100
75
+ sglang/srt/layers/sampler.py,sha256=4GFSuaNrvnM9S-GPtqTeiWu3I0XZvxM7B48NdSLwpW4,9934
76
76
  sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
77
77
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
78
78
  sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
@@ -80,11 +80,11 @@ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jH
80
80
  sglang/srt/layers/attention/flashinfer_backend.py,sha256=XUyR97-WSyE6esq4r4XOcvXRtEJm8JOZ6MrXE-YfsYM,33949
81
81
  sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
82
82
  sglang/srt/layers/attention/triton_backend.py,sha256=P329qd6i7XfgB2UH7KXNid67v-kziV1sgcAuh3RWna8,6654
83
- sglang/srt/layers/attention/vision.py,sha256=mn8fruFob-Cif0_6V5P6W-2lCqJOiIsvsmYtH-bEvcU,6643
83
+ sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
84
84
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
85
85
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
86
86
  sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
87
- sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
87
+ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
88
88
  sglang/srt/layers/moe/fused_moe_native.py,sha256=OEWpM93X5tJG4-rwz5qmdpTzEUR73zun29YRV3bZglY,4269
89
89
  sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
90
90
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -187,7 +187,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
187
187
  "sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
188
188
  sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
189
189
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
190
- sglang/srt/layers/quantization/fp8.py,sha256=XjLGHFPgX0NBuXa8eOglZ8TPMvXMNges0l4gDdcumRE,34866
190
+ sglang/srt/layers/quantization/fp8.py,sha256=ibttPVCUsCQ0LXy7FUb8wnzqGcGZQXQLqwCB4a2fai4,35160
191
191
  sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
192
192
  sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
193
193
  sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
@@ -247,21 +247,21 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
247
247
  sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
248
248
  sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
249
249
  sglang/srt/managers/detokenizer_manager.py,sha256=A-tZi9VPkrIAVteQItYUY-07V1rWmySFHNcVf8qAdPI,9578
250
- sglang/srt/managers/image_processor.py,sha256=dEjEWzrmJyEXhr5sKBw4BEUEjla8CNdkzFGfogPGmFY,19103
250
+ sglang/srt/managers/image_processor.py,sha256=s1QH9cSzT_nnitc6idzFjuGDp-pDnMTpbVZoQfzdSXU,20671
251
251
  sglang/srt/managers/io_struct.py,sha256=1Z6MCVI1LN2lS_7e8WHkpVNT_LW62mE-jpZ2Jn_FAtE,18267
252
252
  sglang/srt/managers/schedule_batch.py,sha256=oP6ygJUOmo6PuXcA_wecRvOOa_WdpwmIyCPSgJy4qAc,48743
253
253
  sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
254
- sglang/srt/managers/scheduler.py,sha256=QGGSau-ydbRzIFdCvE63Na-tpYKHJj_QL1d9raogvXc,70019
254
+ sglang/srt/managers/scheduler.py,sha256=akwBfBcNgpCXY1vp3FlD5-bOUMKfUBR5AC3XzSBRDYQ,70757
255
255
  sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
256
256
  sglang/srt/managers/tokenizer_manager.py,sha256=TjhX0IeFCmk31PDmtVV7Ilc8rqI361XUf_p2KO3ai7s,38669
257
257
  sglang/srt/managers/tp_worker.py,sha256=OiHpFR9Hy1GpgLEkTDsykBiFuv1VKmkjQS58gQVPQIs,8126
258
258
  sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7p6zREndc4a9fmYfqW4iY9IYANxdoAioaf0hU92-8Ow,8893
259
259
  sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
260
- sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
261
- sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
260
+ sglang/srt/mem_cache/base_prefix_cache.py,sha256=qxgpSHm3qtMdab4U35Mr2BE9TQNjElrnrNMTwL_Osdo,1049
261
+ sglang/srt/mem_cache/chunk_cache.py,sha256=hc_reKKvoI4r8xkgf4I4eIkwXWTJC2ZXaQWuODQZnx0,2572
262
262
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
263
263
  sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBgHqhinc,19682
264
- sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
264
+ sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll53MSLeU,11754
265
265
  sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
266
266
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
267
267
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=3_s7zmLn9d0pVCxZd43bYtPmgkbe1kcRaNZbryMMjPU,18520
@@ -296,20 +296,20 @@ sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26
296
296
  sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
297
297
  sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
298
298
  sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
299
- sglang/srt/models/minicpmv.py,sha256=lgWqj1bWMDvPHPE5POVEjhnY-_qMSidkbsBLMYBtDgM,43181
299
+ sglang/srt/models/minicpmv.py,sha256=AHLQkg2Klimkr7-M3vOT0y5OeFBR6ftPkvDqzGs5hWM,45051
300
300
  sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
301
301
  sglang/srt/models/mixtral.py,sha256=ybArp6vx7VTrjQ3kqH1FHJ1gQzsFPI5vv1C-Pnix6ws,14520
302
302
  sglang/srt/models/mixtral_quant.py,sha256=_gy4gKwFX6BNlU6xE-n0N3vVNhftxgZjWEDKTCKV_2M,14019
303
- sglang/srt/models/mllama.py,sha256=vK80nRlSH7hFjBeqVRGVJ4XepfxkpY4HJPpvZ15CzMA,37751
303
+ sglang/srt/models/mllama.py,sha256=q2JNkfqEZghmHWhcgX_YaaKVpXwTHnrz7z1TuzfHEjA,36340
304
304
  sglang/srt/models/olmo.py,sha256=-t5s3DI-CxiMqRAvKS73NTMNrRpQRD8eh2VabCNYDnE,11699
305
305
  sglang/srt/models/olmo2.py,sha256=Wg4mo53c3OIAWmAMZ-TR9VRzSfKqhBZixqvrF8AbIJg,13430
306
306
  sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15147
307
307
  sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
308
308
  sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
309
- sglang/srt/models/qwen2.py,sha256=caVvsTu0QteR9Q65p81JEgESSV9_nP3yPaShRMu6uDY,14936
309
+ sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
310
310
  sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
311
311
  sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
312
- sglang/srt/models/qwen2_vl.py,sha256=r0OmFH8OcsIZ96fKqXaAWGLUe6oTVW_w6Gt5PChYUXE,23139
312
+ sglang/srt/models/qwen2_vl.py,sha256=M-8abTK2Id36Ba6TfvAQHwbdbDNcbgFAxNw6gXkbBCU,23475
313
313
  sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
314
314
  sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
315
315
  sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
@@ -347,8 +347,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
347
347
  sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
348
348
  sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
349
349
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
350
- sglang-0.4.2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
351
- sglang-0.4.2.dist-info/METADATA,sha256=tSq4P2Rhddw25uHR313Islv7DCbaPFSUrZzD8C8_pas,23224
352
- sglang-0.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
353
- sglang-0.4.2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
354
- sglang-0.4.2.dist-info/RECORD,,
350
+ sglang-0.4.2.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
351
+ sglang-0.4.2.post1.dist-info/METADATA,sha256=DAQlsgAw08lMBLrkY88_9VfhV50Mt3Vi0H3uhM_RJOU,23241
352
+ sglang-0.4.2.post1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
353
+ sglang-0.4.2.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
354
+ sglang-0.4.2.post1.dist-info/RECORD,,