sglang 0.4.2__py3-none-any.whl → 0.4.2.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/layers/attention/triton_ops/prefill_attention.py +6 -0
- sglang/srt/layers/attention/vision.py +243 -40
- sglang/srt/layers/quantization/fp8.py +7 -0
- sglang/srt/layers/rotary_embedding.py +28 -12
- sglang/srt/layers/sampler.py +5 -2
- sglang/srt/managers/image_processor.py +77 -38
- sglang/srt/managers/scheduler.py +17 -3
- sglang/srt/mem_cache/base_prefix_cache.py +4 -0
- sglang/srt/mem_cache/chunk_cache.py +3 -0
- sglang/srt/mem_cache/radix_cache.py +30 -1
- sglang/srt/models/minicpmv.py +129 -76
- sglang/srt/models/mllama.py +16 -56
- sglang/srt/models/qwen2.py +4 -1
- sglang/srt/models/qwen2_vl.py +18 -8
- sglang/srt/server_args.py +6 -0
- sglang/srt/utils.py +0 -2
- sglang/utils.py +42 -0
- sglang/version.py +1 -1
- {sglang-0.4.2.dist-info → sglang-0.4.2.post1.dist-info}/METADATA +3 -3
- {sglang-0.4.2.dist-info → sglang-0.4.2.post1.dist-info}/RECORD +23 -23
- {sglang-0.4.2.dist-info → sglang-0.4.2.post1.dist-info}/LICENSE +0 -0
- {sglang-0.4.2.dist-info → sglang-0.4.2.post1.dist-info}/WHEEL +0 -0
- {sglang-0.4.2.dist-info → sglang-0.4.2.post1.dist-info}/top_level.txt +0 -0
sglang/srt/models/qwen2_vl.py
CHANGED
@@ -30,12 +30,10 @@ import numpy as np
|
|
30
30
|
import torch
|
31
31
|
import torch.nn as nn
|
32
32
|
import torch.nn.functional as F
|
33
|
-
from einops import rearrange
|
33
|
+
from einops import rearrange
|
34
34
|
from vllm.model_executor.layers.activation import QuickGELU
|
35
35
|
|
36
36
|
from sglang.srt.configs import Qwen2VLConfig, Qwen2VLVisionConfig
|
37
|
-
from sglang.srt.distributed import parallel_state
|
38
|
-
from sglang.srt.distributed import utils as dist_utils
|
39
37
|
from sglang.srt.hf_transformers_utils import get_processor
|
40
38
|
from sglang.srt.layers.attention.vision import VisionAttention
|
41
39
|
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
@@ -118,6 +116,7 @@ class Qwen2VisionBlock(nn.Module):
|
|
118
116
|
mlp_ratio: float,
|
119
117
|
act_layer: Type[nn.Module] = QuickGELU,
|
120
118
|
norm_layer: Type[nn.Module] = None,
|
119
|
+
attn_implementation: Optional[str] = "sdpa",
|
121
120
|
quant_config: Optional[QuantizationConfig] = None,
|
122
121
|
) -> None:
|
123
122
|
super().__init__()
|
@@ -126,12 +125,24 @@ class Qwen2VisionBlock(nn.Module):
|
|
126
125
|
self.norm1 = norm_layer(dim)
|
127
126
|
self.norm2 = norm_layer(dim)
|
128
127
|
mlp_hidden_dim = int(dim * mlp_ratio)
|
128
|
+
if attn_implementation == "sdpa":
|
129
|
+
use_context_forward = False
|
130
|
+
use_full_precision_softmax = False
|
131
|
+
elif attn_implementation == "flash_attention_2":
|
132
|
+
use_full_precision_softmax = False
|
133
|
+
use_context_forward = True
|
134
|
+
elif attn_implementation == "eager":
|
135
|
+
use_full_precision_softmax = True
|
136
|
+
use_context_forward = False
|
129
137
|
|
130
138
|
self.attn = VisionAttention(
|
131
139
|
embed_dim=dim,
|
132
140
|
num_heads=num_heads,
|
133
141
|
projection_size=dim,
|
134
142
|
use_qkv_parallel=False,
|
143
|
+
use_context_forward=use_context_forward,
|
144
|
+
use_full_precision_softmax=use_full_precision_softmax,
|
145
|
+
flatten_batch=True,
|
135
146
|
quant_config=quant_config,
|
136
147
|
)
|
137
148
|
self.mlp = Qwen2VisionMLP(
|
@@ -286,7 +297,6 @@ class Qwen2VisionTransformer(nn.Module):
|
|
286
297
|
norm_layer = partial(nn.LayerNorm, eps=norm_eps)
|
287
298
|
head_dim = embed_dim // num_heads
|
288
299
|
self.rotary_pos_emb = Qwen2VisionRotaryEmbedding(head_dim // 2)
|
289
|
-
|
290
300
|
self.blocks = nn.ModuleList(
|
291
301
|
[
|
292
302
|
Qwen2VisionBlock(
|
@@ -294,6 +304,7 @@ class Qwen2VisionTransformer(nn.Module):
|
|
294
304
|
num_heads=num_heads,
|
295
305
|
mlp_ratio=mlp_ratio,
|
296
306
|
norm_layer=norm_layer,
|
307
|
+
attn_implementation="sdpa",
|
297
308
|
quant_config=quant_config,
|
298
309
|
)
|
299
310
|
for _ in range(depth)
|
@@ -482,10 +493,6 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
|
482
493
|
opensource models), the shape will be `(3, seq_len)`,
|
483
494
|
otherwise it will be `(seq_len,).
|
484
495
|
(Use input_metadata.mrope_positions to replace it)
|
485
|
-
pixel_values: Pixel values to be fed to a model.
|
486
|
-
`None` if no images are passed.
|
487
|
-
image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in LLM.
|
488
|
-
`None` if no images are passed.
|
489
496
|
"""
|
490
497
|
if getattr(self.config, "rope_scaling", {}).get("type", None) == "mrope":
|
491
498
|
positions = forward_batch.mrope_positions
|
@@ -540,15 +547,18 @@ class Qwen2VLForConditionalGeneration(nn.Module):
|
|
540
547
|
num_image_tokens = self.calculate_num_image_tokens(
|
541
548
|
image_grid_thws[idx]
|
542
549
|
)
|
550
|
+
|
543
551
|
left_idx = start_idx + (image_offset - prefix_len)
|
544
552
|
right_idx = (
|
545
553
|
start_idx + (image_offset - prefix_len) + num_image_tokens
|
546
554
|
)
|
555
|
+
|
547
556
|
inputs_embeds[left_idx:right_idx] = image_embeds[
|
548
557
|
image_embeds_offset : image_embeds_offset + num_image_tokens
|
549
558
|
]
|
550
559
|
image_embeds_offset += num_image_tokens
|
551
560
|
|
561
|
+
input_ids = None
|
552
562
|
hidden_states = self.model(
|
553
563
|
input_ids=input_ids,
|
554
564
|
positions=positions,
|
sglang/srt/server_args.py
CHANGED
@@ -163,6 +163,7 @@ class ServerArgs:
|
|
163
163
|
# Custom logit processor
|
164
164
|
enable_custom_logit_processor: bool = False
|
165
165
|
tool_call_parser: str = None
|
166
|
+
enable_hierarchical_cache: bool = False
|
166
167
|
|
167
168
|
def __post_init__(self):
|
168
169
|
# Set missing default values
|
@@ -892,6 +893,11 @@ class ServerArgs:
|
|
892
893
|
default=ServerArgs.tool_call_parser,
|
893
894
|
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
|
894
895
|
)
|
896
|
+
parser.add_argument(
|
897
|
+
"--enable-hierarchical-cache",
|
898
|
+
action="store_true",
|
899
|
+
help="Enable hierarchical cache",
|
900
|
+
)
|
895
901
|
|
896
902
|
@classmethod
|
897
903
|
def from_cli_args(cls, args: argparse.Namespace):
|
sglang/srt/utils.py
CHANGED
sglang/utils.py
CHANGED
@@ -373,3 +373,45 @@ class TypeBasedDispatcher:
|
|
373
373
|
if isinstance(obj, ty):
|
374
374
|
return fn(obj)
|
375
375
|
raise ValueError(f"Invalid object: {obj}")
|
376
|
+
|
377
|
+
|
378
|
+
def trim_overlap(existing_text, new_chunk):
|
379
|
+
"""
|
380
|
+
Finds the largest suffix of 'existing_text' that is a prefix of 'new_chunk'
|
381
|
+
and removes that overlap from the start of 'new_chunk'.
|
382
|
+
"""
|
383
|
+
max_overlap = 0
|
384
|
+
max_possible = min(len(existing_text), len(new_chunk))
|
385
|
+
for i in range(max_possible, 0, -1):
|
386
|
+
if existing_text.endswith(new_chunk[:i]):
|
387
|
+
max_overlap = i
|
388
|
+
break
|
389
|
+
return new_chunk[max_overlap:]
|
390
|
+
|
391
|
+
|
392
|
+
def stream_and_merge(llm, prompt, sampling_params):
|
393
|
+
"""
|
394
|
+
1) Streams the text,
|
395
|
+
2) Removes chunk overlaps,
|
396
|
+
3) Returns the merged text.
|
397
|
+
"""
|
398
|
+
final_text = ""
|
399
|
+
for chunk in llm.generate(prompt, sampling_params, stream=True):
|
400
|
+
chunk_text = chunk["text"]
|
401
|
+
cleaned_chunk = trim_overlap(final_text, chunk_text)
|
402
|
+
final_text += cleaned_chunk
|
403
|
+
return final_text
|
404
|
+
|
405
|
+
|
406
|
+
async def async_stream_and_merge(llm, prompt, sampling_params):
|
407
|
+
"""
|
408
|
+
Streams tokens asynchronously, removes chunk overlaps,
|
409
|
+
and yields the cleaned chunk in real time for printing.
|
410
|
+
"""
|
411
|
+
final_text = ""
|
412
|
+
generator = await llm.async_generate(prompt, sampling_params, stream=True)
|
413
|
+
async for chunk in generator:
|
414
|
+
chunk_text = chunk["text"]
|
415
|
+
cleaned_chunk = trim_overlap(final_text, chunk_text)
|
416
|
+
final_text += cleaned_chunk
|
417
|
+
yield cleaned_chunk # yield the non-overlapping portion
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.2"
|
1
|
+
__version__ = "0.4.2.post1"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.2
|
3
|
+
Version: 0.4.2.post1
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -333,7 +333,7 @@ Requires-Dist: sglang[test]; extra == "dev-cpu"
|
|
333
333
|
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
|
334
334
|
|
335
335
|
## News
|
336
|
-
- [2025/01] 🔥 SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with
|
336
|
+
- [2025/01] 🔥 SGLang provides day one support for DeepSeek V3/R1 models on NVIDIA and AMD GPUs with DeepSeek-specific optimizations. ([instructions](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3), [AMD blog](https://www.amd.com/en/developer/resources/technical-articles/amd-instinct-gpus-power-deepseek-v3-revolutionizing-ai-development-with-sglang.html))
|
337
337
|
- [2024/12] 🔥 v0.4 Release: Zero-Overhead Batch Scheduler, Cache-Aware Load Balancer, Faster Structured Outputs ([blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)).
|
338
338
|
- [2024/09] v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
|
339
339
|
- [2024/07] v0.2 Release: Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
|
@@ -372,7 +372,7 @@ Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
|
|
372
372
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
373
373
|
|
374
374
|
## Adoption and Sponsorship
|
375
|
-
The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
|
375
|
+
The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, Novita AI, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
|
376
376
|
|
377
377
|
## Acknowledgment and Citation
|
378
378
|
We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
@@ -9,8 +9,8 @@ sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
|
|
9
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
10
|
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
11
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
12
|
-
sglang/utils.py,sha256=
|
13
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/utils.py,sha256=7HpOrPBhMivWH719m7Dy1rjrAXOAsnqelpwNBBbvjqs,13319
|
13
|
+
sglang/version.py,sha256=BObAQyMJTgNEQbPpM5x4R8aeAPCZ_eHVSXPwL90NUlk,28
|
14
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
sglang/lang/chat_template.py,sha256=v4SyYViPHX3i3XT46F7vlARn4UaSiP3PBpTGtzO6uRY,17006
|
16
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -33,9 +33,9 @@ sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mv
|
|
33
33
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
34
34
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
35
35
|
sglang/srt/server.py,sha256=PrQb9r6L9syWHKlggbbiQYsKtpwSmECqozRbf8qnoV8,874
|
36
|
-
sglang/srt/server_args.py,sha256=
|
36
|
+
sglang/srt/server_args.py,sha256=opURYsAG9anR5EINNq45f8GJv3NLDllhP9AlwpJ3lK8,39997
|
37
37
|
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
38
|
-
sglang/srt/utils.py,sha256=
|
38
|
+
sglang/srt/utils.py,sha256=yIQ5XtfJa_jPDKTzxqXnCdbhA2kKMihzcP4fSAWU4bs,46317
|
39
39
|
sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
|
40
40
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
41
41
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
@@ -71,8 +71,8 @@ sglang/srt/layers/logits_processor.py,sha256=_3TZNUbvjmw63ywBv6V6WU87G1TErMaXGa7
|
|
71
71
|
sglang/srt/layers/parameter.py,sha256=sX6aB69qbD6jRqQeOfXqK_ueyyZlXCeC0AlglbsRPcM,14901
|
72
72
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
73
73
|
sglang/srt/layers/radix_attention.py,sha256=tPjJA3P9kuFBk2QWFTgOI8UbVUFLVDZgFaQWuokx894,2234
|
74
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
75
|
-
sglang/srt/layers/sampler.py,sha256=
|
74
|
+
sglang/srt/layers/rotary_embedding.py,sha256=tEvy-IAi7GaI5PYFwV30Rek3m6oUHKYuGSa23FXWXSE,44100
|
75
|
+
sglang/srt/layers/sampler.py,sha256=4GFSuaNrvnM9S-GPtqTeiWu3I0XZvxM7B48NdSLwpW4,9934
|
76
76
|
sglang/srt/layers/torchao_utils.py,sha256=Ws24FdRBSkTpyeyA6bQrdDm-W5wfDxKvSIPUSahyMfA,4063
|
77
77
|
sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
|
78
78
|
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
@@ -80,11 +80,11 @@ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jH
|
|
80
80
|
sglang/srt/layers/attention/flashinfer_backend.py,sha256=XUyR97-WSyE6esq4r4XOcvXRtEJm8JOZ6MrXE-YfsYM,33949
|
81
81
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
|
82
82
|
sglang/srt/layers/attention/triton_backend.py,sha256=P329qd6i7XfgB2UH7KXNid67v-kziV1sgcAuh3RWna8,6654
|
83
|
-
sglang/srt/layers/attention/vision.py,sha256=
|
83
|
+
sglang/srt/layers/attention/vision.py,sha256=zLjKmzUlkgq1RFcP3b4EPArOAKovoaDLgYfM5SyB2wM,13181
|
84
84
|
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
|
85
85
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
86
86
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
|
87
|
-
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=
|
87
|
+
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=Y66gZ37u0GKMPtI8n5MbO6uOxRuGEmKIG0IPbJTOqAM,6213
|
88
88
|
sglang/srt/layers/moe/fused_moe_native.py,sha256=OEWpM93X5tJG4-rwz5qmdpTzEUR73zun29YRV3bZglY,4269
|
89
89
|
sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
|
90
90
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -187,7 +187,7 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=bofJhiDnRNqD2D20QV7CPNf2S
|
|
187
187
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
188
188
|
sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
|
189
189
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
190
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
190
|
+
sglang/srt/layers/quantization/fp8.py,sha256=ibttPVCUsCQ0LXy7FUb8wnzqGcGZQXQLqwCB4a2fai4,35160
|
191
191
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
|
192
192
|
sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
|
193
193
|
sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
|
@@ -247,21 +247,21 @@ sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4
|
|
247
247
|
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
248
248
|
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
249
249
|
sglang/srt/managers/detokenizer_manager.py,sha256=A-tZi9VPkrIAVteQItYUY-07V1rWmySFHNcVf8qAdPI,9578
|
250
|
-
sglang/srt/managers/image_processor.py,sha256=
|
250
|
+
sglang/srt/managers/image_processor.py,sha256=s1QH9cSzT_nnitc6idzFjuGDp-pDnMTpbVZoQfzdSXU,20671
|
251
251
|
sglang/srt/managers/io_struct.py,sha256=1Z6MCVI1LN2lS_7e8WHkpVNT_LW62mE-jpZ2Jn_FAtE,18267
|
252
252
|
sglang/srt/managers/schedule_batch.py,sha256=oP6ygJUOmo6PuXcA_wecRvOOa_WdpwmIyCPSgJy4qAc,48743
|
253
253
|
sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
|
254
|
-
sglang/srt/managers/scheduler.py,sha256=
|
254
|
+
sglang/srt/managers/scheduler.py,sha256=akwBfBcNgpCXY1vp3FlD5-bOUMKfUBR5AC3XzSBRDYQ,70757
|
255
255
|
sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
|
256
256
|
sglang/srt/managers/tokenizer_manager.py,sha256=TjhX0IeFCmk31PDmtVV7Ilc8rqI361XUf_p2KO3ai7s,38669
|
257
257
|
sglang/srt/managers/tp_worker.py,sha256=OiHpFR9Hy1GpgLEkTDsykBiFuv1VKmkjQS58gQVPQIs,8126
|
258
258
|
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7p6zREndc4a9fmYfqW4iY9IYANxdoAioaf0hU92-8Ow,8893
|
259
259
|
sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
|
260
|
-
sglang/srt/mem_cache/base_prefix_cache.py,sha256=
|
261
|
-
sglang/srt/mem_cache/chunk_cache.py,sha256=
|
260
|
+
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qxgpSHm3qtMdab4U35Mr2BE9TQNjElrnrNMTwL_Osdo,1049
|
261
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=hc_reKKvoI4r8xkgf4I4eIkwXWTJC2ZXaQWuODQZnx0,2572
|
262
262
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
263
263
|
sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBgHqhinc,19682
|
264
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=
|
264
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=hVILXvc5PauHuLTeyZbm3NCf3AOimaAuXjll53MSLeU,11754
|
265
265
|
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
266
266
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
267
267
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=3_s7zmLn9d0pVCxZd43bYtPmgkbe1kcRaNZbryMMjPU,18520
|
@@ -296,20 +296,20 @@ sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26
|
|
296
296
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
297
297
|
sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
|
298
298
|
sglang/srt/models/minicpm3.py,sha256=DZ7LltHsyDq8iE7nMi5C9gLzYcQrAIZYkRmx6lCuAgo,24683
|
299
|
-
sglang/srt/models/minicpmv.py,sha256=
|
299
|
+
sglang/srt/models/minicpmv.py,sha256=AHLQkg2Klimkr7-M3vOT0y5OeFBR6ftPkvDqzGs5hWM,45051
|
300
300
|
sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
|
301
301
|
sglang/srt/models/mixtral.py,sha256=ybArp6vx7VTrjQ3kqH1FHJ1gQzsFPI5vv1C-Pnix6ws,14520
|
302
302
|
sglang/srt/models/mixtral_quant.py,sha256=_gy4gKwFX6BNlU6xE-n0N3vVNhftxgZjWEDKTCKV_2M,14019
|
303
|
-
sglang/srt/models/mllama.py,sha256=
|
303
|
+
sglang/srt/models/mllama.py,sha256=q2JNkfqEZghmHWhcgX_YaaKVpXwTHnrz7z1TuzfHEjA,36340
|
304
304
|
sglang/srt/models/olmo.py,sha256=-t5s3DI-CxiMqRAvKS73NTMNrRpQRD8eh2VabCNYDnE,11699
|
305
305
|
sglang/srt/models/olmo2.py,sha256=Wg4mo53c3OIAWmAMZ-TR9VRzSfKqhBZixqvrF8AbIJg,13430
|
306
306
|
sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15147
|
307
307
|
sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
|
308
308
|
sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
|
309
|
-
sglang/srt/models/qwen2.py,sha256=
|
309
|
+
sglang/srt/models/qwen2.py,sha256=igq-a61CQgH26xnim6c3yeWUCHiN_Nboxg4iu7oy7bo,15072
|
310
310
|
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
311
311
|
sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
|
312
|
-
sglang/srt/models/qwen2_vl.py,sha256=
|
312
|
+
sglang/srt/models/qwen2_vl.py,sha256=M-8abTK2Id36Ba6TfvAQHwbdbDNcbgFAxNw6gXkbBCU,23475
|
313
313
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
314
314
|
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
315
315
|
sglang/srt/models/torch_native_llama.py,sha256=X0AvlREIysazwFezqndRza7ZCWQ-R1hePoLW0brH4As,19131
|
@@ -347,8 +347,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
|
|
347
347
|
sglang/test/test_programs.py,sha256=aUV9Ex_B714ph7ytv6W3J7sdGDKC6lGIhUy95Yg6AHQ,18878
|
348
348
|
sglang/test/test_utils.py,sha256=BU6lAX3bu3TNQZqVC9UPnyq3I7iV5kigHQKJx7UNlOQ,26192
|
349
349
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
350
|
-
sglang-0.4.2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
351
|
-
sglang-0.4.2.dist-info/METADATA,sha256=
|
352
|
-
sglang-0.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
353
|
-
sglang-0.4.2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
354
|
-
sglang-0.4.2.dist-info/RECORD,,
|
350
|
+
sglang-0.4.2.post1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
351
|
+
sglang-0.4.2.post1.dist-info/METADATA,sha256=DAQlsgAw08lMBLrkY88_9VfhV50Mt3Vi0H3uhM_RJOU,23241
|
352
|
+
sglang-0.4.2.post1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
353
|
+
sglang-0.4.2.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
354
|
+
sglang-0.4.2.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|