sglang 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +3 -1
- sglang/api.py +5 -0
- sglang/global_config.py +4 -1
- sglang/lang/chat_template.py +9 -2
- sglang/lang/interpreter.py +52 -19
- sglang/lang/ir.py +12 -9
- sglang/lang/tracer.py +1 -1
- sglang/launch_server.py +1 -2
- sglang/launch_server_llavavid.py +31 -0
- sglang/srt/flush_cache.py +16 -0
- sglang/srt/hf_transformers_utils.py +8 -1
- sglang/srt/managers/io_struct.py +15 -3
- sglang/srt/managers/router/infer_batch.py +31 -19
- sglang/srt/managers/router/manager.py +6 -8
- sglang/srt/managers/router/model_rpc.py +59 -23
- sglang/srt/managers/router/model_runner.py +6 -6
- sglang/srt/managers/router/radix_cache.py +47 -17
- sglang/srt/managers/router/scheduler.py +17 -28
- sglang/srt/managers/tokenizer_manager.py +54 -22
- sglang/srt/model_config.py +4 -0
- sglang/srt/models/commandr.py +6 -10
- sglang/srt/models/dbrx.py +14 -15
- sglang/srt/models/gemma.py +7 -10
- sglang/srt/models/llama2.py +7 -10
- sglang/srt/models/llava.py +2 -6
- sglang/srt/models/llavavid.py +307 -0
- sglang/srt/models/mixtral.py +7 -13
- sglang/srt/models/qwen.py +20 -13
- sglang/srt/models/qwen2.py +7 -10
- sglang/srt/models/stablelm.py +13 -12
- sglang/srt/models/yivl.py +1 -4
- sglang/srt/server.py +32 -18
- sglang/srt/server_args.py +9 -6
- sglang/srt/utils.py +126 -17
- sglang/srt/weight_utils.py +66 -51
- sglang/utils.py +77 -26
- {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/METADATA +9 -5
- sglang-0.1.16.dist-info/RECORD +72 -0
- sglang-0.1.15.dist-info/RECORD +0 -69
- {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/LICENSE +0 -0
- {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/WHEEL +0 -0
- {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/top_level.txt +0 -0
sglang/srt/weight_utils.py
CHANGED
@@ -19,11 +19,12 @@ import torch
|
|
19
19
|
from huggingface_hub import HfFileSystem, snapshot_download
|
20
20
|
from safetensors.torch import load_file, safe_open, save_file
|
21
21
|
from tqdm.auto import tqdm
|
22
|
-
|
23
22
|
from vllm.config import ModelConfig
|
24
23
|
from vllm.logger import init_logger
|
25
|
-
from vllm.model_executor.layers.quantization import (
|
26
|
-
|
24
|
+
from vllm.model_executor.layers.quantization import (
|
25
|
+
QuantizationConfig,
|
26
|
+
get_quantization_config,
|
27
|
+
)
|
27
28
|
from vllm.model_executor.layers.quantization.schema import QuantParamSchema
|
28
29
|
|
29
30
|
logger = init_logger(__name__)
|
@@ -32,17 +33,21 @@ logger = init_logger(__name__)
|
|
32
33
|
# can share the same lock without error.
|
33
34
|
# lock files in the temp directory will be automatically deleted when the
|
34
35
|
# system reboots, so users will not complain about annoying lock files
|
35
|
-
temp_dir =
|
36
|
-
|
36
|
+
temp_dir = (
|
37
|
+
os.environ.get("TMPDIR")
|
38
|
+
or os.environ.get("TEMP")
|
39
|
+
or os.environ.get("TMP")
|
40
|
+
or "/tmp/"
|
41
|
+
)
|
37
42
|
|
38
43
|
|
39
44
|
def enable_hf_transfer():
|
40
|
-
"""automatically activates hf_transfer
|
41
|
-
"""
|
45
|
+
"""automatically activates hf_transfer"""
|
42
46
|
if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
|
43
47
|
try:
|
44
48
|
# enable hf hub transfer if available
|
45
49
|
import hf_transfer # type: ignore # noqa
|
50
|
+
|
46
51
|
huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
|
47
52
|
except ImportError:
|
48
53
|
pass
|
@@ -65,8 +70,7 @@ def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
|
|
65
70
|
# add hash to avoid conflict with old users' lock files
|
66
71
|
lock_file_name = hash_name + model_name + ".lock"
|
67
72
|
# mode 0o666 is required for the filelock to be shared across users
|
68
|
-
lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
|
69
|
-
mode=0o666)
|
73
|
+
lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
|
70
74
|
return lock
|
71
75
|
|
72
76
|
|
@@ -104,10 +108,12 @@ def convert_bin_to_safetensor_file(
|
|
104
108
|
sf_size = os.stat(sf_filename).st_size
|
105
109
|
pt_size = os.stat(pt_filename).st_size
|
106
110
|
if (sf_size - pt_size) / pt_size > 0.01:
|
107
|
-
raise RuntimeError(
|
111
|
+
raise RuntimeError(
|
112
|
+
f"""The file size different is more than 1%:
|
108
113
|
- {sf_filename}: {sf_size}
|
109
114
|
- {pt_filename}: {pt_size}
|
110
|
-
"""
|
115
|
+
"""
|
116
|
+
)
|
111
117
|
|
112
118
|
# check if the tensors are the same
|
113
119
|
reloaded = load_file(sf_filename)
|
@@ -122,8 +128,7 @@ def convert_bin_to_safetensor_file(
|
|
122
128
|
def get_quant_config(model_config: ModelConfig) -> QuantizationConfig:
|
123
129
|
quant_cls = get_quantization_config(model_config.quantization)
|
124
130
|
# Read the quantization config from the HF model config, if available.
|
125
|
-
hf_quant_config = getattr(model_config.hf_config, "quantization_config",
|
126
|
-
None)
|
131
|
+
hf_quant_config = getattr(model_config.hf_config, "quantization_config", None)
|
127
132
|
if hf_quant_config is not None:
|
128
133
|
return quant_cls.from_config(hf_quant_config)
|
129
134
|
model_name_or_path = model_config.model
|
@@ -131,26 +136,29 @@ def get_quant_config(model_config: ModelConfig) -> QuantizationConfig:
|
|
131
136
|
if not is_local:
|
132
137
|
# Download the config files.
|
133
138
|
with get_lock(model_name_or_path, model_config.download_dir):
|
134
|
-
hf_folder = snapshot_download(
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
+
hf_folder = snapshot_download(
|
140
|
+
model_name_or_path,
|
141
|
+
revision=model_config.revision,
|
142
|
+
allow_patterns="*.json",
|
143
|
+
cache_dir=model_config.download_dir,
|
144
|
+
tqdm_class=Disabledtqdm,
|
145
|
+
)
|
139
146
|
else:
|
140
147
|
hf_folder = model_name_or_path
|
141
148
|
config_files = glob.glob(os.path.join(hf_folder, "*.json"))
|
142
149
|
|
143
150
|
quant_config_files = [
|
144
|
-
f
|
145
|
-
|
151
|
+
f
|
152
|
+
for f in config_files
|
153
|
+
if any(f.endswith(x) for x in quant_cls.get_config_filenames())
|
146
154
|
]
|
147
155
|
if len(quant_config_files) == 0:
|
148
|
-
raise ValueError(
|
149
|
-
f"Cannot find the config file for {model_config.quantization}")
|
156
|
+
raise ValueError(f"Cannot find the config file for {model_config.quantization}")
|
150
157
|
if len(quant_config_files) > 1:
|
151
158
|
raise ValueError(
|
152
159
|
f"Found multiple config files for {model_config.quantization}: "
|
153
|
-
f"{quant_config_files}"
|
160
|
+
f"{quant_config_files}"
|
161
|
+
)
|
154
162
|
|
155
163
|
quant_config_file = quant_config_files[0]
|
156
164
|
with open(quant_config_file, "r") as f:
|
@@ -166,8 +174,7 @@ def prepare_hf_model_weights(
|
|
166
174
|
revision: Optional[str] = None,
|
167
175
|
) -> Tuple[str, List[str], bool]:
|
168
176
|
# Download model weights from huggingface.
|
169
|
-
is_local = os.path.isdir(model_name_or_path)
|
170
|
-
and load_format != "tensorizer"
|
177
|
+
is_local = os.path.isdir(model_name_or_path) and load_format != "tensorizer"
|
171
178
|
use_safetensors = False
|
172
179
|
# Some quantized models use .pt files for storing the weights.
|
173
180
|
if load_format == "auto":
|
@@ -203,11 +210,13 @@ def prepare_hf_model_weights(
|
|
203
210
|
# Use file lock to prevent multiple processes from
|
204
211
|
# downloading the same model weights at the same time.
|
205
212
|
with get_lock(model_name_or_path, cache_dir):
|
206
|
-
hf_folder = snapshot_download(
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
213
|
+
hf_folder = snapshot_download(
|
214
|
+
model_name_or_path,
|
215
|
+
allow_patterns=allow_patterns,
|
216
|
+
cache_dir=cache_dir,
|
217
|
+
tqdm_class=Disabledtqdm,
|
218
|
+
revision=revision,
|
219
|
+
)
|
211
220
|
else:
|
212
221
|
hf_folder = model_name_or_path
|
213
222
|
hf_weights_files: List[str] = []
|
@@ -228,16 +237,14 @@ def prepare_hf_model_weights(
|
|
228
237
|
"scaler.pt",
|
229
238
|
]
|
230
239
|
hf_weights_files = [
|
231
|
-
f for f in hf_weights_files
|
232
|
-
if not any(f.endswith(x) for x in blacklist)
|
240
|
+
f for f in hf_weights_files if not any(f.endswith(x) for x in blacklist)
|
233
241
|
]
|
234
242
|
|
235
243
|
if load_format == "tensorizer":
|
236
244
|
return hf_folder, hf_weights_files, use_safetensors
|
237
245
|
|
238
246
|
if len(hf_weights_files) == 0:
|
239
|
-
raise RuntimeError(
|
240
|
-
f"Cannot find any model weights with `{model_name_or_path}`")
|
247
|
+
raise RuntimeError(f"Cannot find any model weights with `{model_name_or_path}`")
|
241
248
|
|
242
249
|
return hf_folder, hf_weights_files, use_safetensors
|
243
250
|
|
@@ -254,7 +261,8 @@ def hf_model_weights_iterator(
|
|
254
261
|
cache_dir=cache_dir,
|
255
262
|
load_format=load_format,
|
256
263
|
fall_back_to_pt=fall_back_to_pt,
|
257
|
-
revision=revision
|
264
|
+
revision=revision,
|
265
|
+
)
|
258
266
|
|
259
267
|
if load_format == "npcache":
|
260
268
|
# Currently np_cache only support *.bin checkpoints
|
@@ -289,22 +297,25 @@ def hf_model_weights_iterator(
|
|
289
297
|
param = np.load(f)
|
290
298
|
yield name, torch.from_numpy(param)
|
291
299
|
elif load_format == "tensorizer":
|
292
|
-
from vllm.model_executor.tensorizer_loader import (
|
293
|
-
|
294
|
-
|
300
|
+
from vllm.model_executor.tensorizer_loader import (
|
301
|
+
TensorDeserializer,
|
302
|
+
open_stream,
|
303
|
+
tensorizer_warning,
|
304
|
+
)
|
305
|
+
|
295
306
|
tensorizer_args = load_format.params
|
296
307
|
tensorizer_warning(
|
297
308
|
"Deserializing HuggingFace models is not optimized for "
|
298
309
|
"loading on vLLM, as tensorizer is forced to load to CPU. "
|
299
310
|
"Consider deserializing a vLLM model instead for faster "
|
300
311
|
"load times. See the examples/tensorize_vllm_model.py example "
|
301
|
-
"script for serializing vLLM models."
|
312
|
+
"script for serializing vLLM models."
|
313
|
+
)
|
302
314
|
|
303
315
|
deserializer_args = tensorizer_args.deserializer_params
|
304
316
|
stream_params = tensorizer_args.stream_params
|
305
317
|
stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
|
306
|
-
with TensorDeserializer(stream, **deserializer_args,
|
307
|
-
device="cpu") as state:
|
318
|
+
with TensorDeserializer(stream, **deserializer_args, device="cpu") as state:
|
308
319
|
for name, param in state.items():
|
309
320
|
yield name, param
|
310
321
|
del state
|
@@ -324,8 +335,12 @@ def hf_model_weights_iterator(
|
|
324
335
|
|
325
336
|
|
326
337
|
def kv_cache_scales_loader(
|
327
|
-
|
328
|
-
|
338
|
+
filename: str,
|
339
|
+
tp_rank: int,
|
340
|
+
tp_size: int,
|
341
|
+
num_hidden_layers: int,
|
342
|
+
model_type: Optional[str],
|
343
|
+
) -> Iterable[Tuple[int, float]]:
|
329
344
|
"""
|
330
345
|
A simple utility to read in KV cache scaling factors that have been
|
331
346
|
previously serialized to disk. Used by the model to populate the appropriate
|
@@ -343,8 +358,7 @@ def kv_cache_scales_loader(
|
|
343
358
|
"tp_size": tp_size,
|
344
359
|
}
|
345
360
|
schema_dct = json.load(f)
|
346
|
-
schema = QuantParamSchema.model_validate(schema_dct,
|
347
|
-
context=context)
|
361
|
+
schema = QuantParamSchema.model_validate(schema_dct, context=context)
|
348
362
|
layer_scales_map = schema.kv_cache.scaling_factor[tp_rank]
|
349
363
|
return layer_scales_map.items()
|
350
364
|
|
@@ -357,9 +371,11 @@ def kv_cache_scales_loader(
|
|
357
371
|
# This section is reached if and only if any of the excepts are hit
|
358
372
|
# Return an empty iterable (list) => no KV cache scales are loaded
|
359
373
|
# which ultimately defaults to 1.0 scales
|
360
|
-
logger.warning(
|
361
|
-
|
362
|
-
|
374
|
+
logger.warning(
|
375
|
+
"Defaulting to KV cache scaling factors = 1.0 "
|
376
|
+
f"for all layers in TP rank {tp_rank} "
|
377
|
+
"as an error occurred during loading."
|
378
|
+
)
|
363
379
|
return []
|
364
380
|
|
365
381
|
|
@@ -378,8 +394,7 @@ def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
|
|
378
394
|
return x
|
379
395
|
|
380
396
|
|
381
|
-
def default_weight_loader(param: torch.Tensor,
|
382
|
-
loaded_weight: torch.Tensor) -> None:
|
397
|
+
def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
|
383
398
|
"""Default weight loader."""
|
384
399
|
assert param.size() == loaded_weight.size()
|
385
400
|
param.data.copy_(loaded_weight)
|
@@ -399,4 +414,4 @@ def initialize_dummy_weights(
|
|
399
414
|
"""
|
400
415
|
for param in model.state_dict().values():
|
401
416
|
if torch.is_floating_point(param):
|
402
|
-
param.data.uniform_(low, high)
|
417
|
+
param.data.uniform_(low, high)
|
sglang/utils.py
CHANGED
@@ -2,40 +2,23 @@
|
|
2
2
|
|
3
3
|
import base64
|
4
4
|
import json
|
5
|
+
import os
|
6
|
+
import sys
|
5
7
|
import threading
|
8
|
+
import traceback
|
6
9
|
import urllib.request
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
7
11
|
from io import BytesIO
|
8
12
|
from json import dumps
|
9
13
|
|
14
|
+
import numpy as np
|
10
15
|
import requests
|
11
16
|
|
12
17
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
"""
|
18
|
-
import torch
|
19
|
-
|
20
|
-
num_gpus = torch.cuda.device_count()
|
21
|
-
assert gpu_id < num_gpus
|
22
|
-
|
23
|
-
if torch.cuda.current_device() != gpu_id:
|
24
|
-
print(
|
25
|
-
f"WARNING: current device is not {gpu_id}, but {torch.cuda.current_device()}, ",
|
26
|
-
"which may cause useless memory allocation for torch CUDA context.",
|
27
|
-
)
|
28
|
-
|
29
|
-
free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
|
30
|
-
|
31
|
-
if distributed:
|
32
|
-
tensor = torch.tensor(free_gpu_memory, dtype=torch.float32).to(
|
33
|
-
torch.device("cuda", gpu_id)
|
34
|
-
)
|
35
|
-
torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.MIN)
|
36
|
-
free_gpu_memory = tensor.item()
|
37
|
-
|
38
|
-
return free_gpu_memory / (1 << 30)
|
18
|
+
def get_exception_traceback():
|
19
|
+
etype, value, tb = sys.exc_info()
|
20
|
+
err_str = "".join(traceback.format_exception(etype, value, tb))
|
21
|
+
return err_str
|
39
22
|
|
40
23
|
|
41
24
|
def is_same_type(values):
|
@@ -130,6 +113,74 @@ def encode_image_base64(image_path):
|
|
130
113
|
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
131
114
|
|
132
115
|
|
116
|
+
def encode_frame(frame):
|
117
|
+
import cv2 # pip install opencv-python-headless
|
118
|
+
from PIL import Image
|
119
|
+
|
120
|
+
# Convert the frame to RGB (OpenCV uses BGR by default)
|
121
|
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
122
|
+
|
123
|
+
# Convert the frame to PIL Image to easily convert to bytes
|
124
|
+
im_pil = Image.fromarray(frame)
|
125
|
+
|
126
|
+
# Convert to bytes
|
127
|
+
buffered = BytesIO()
|
128
|
+
|
129
|
+
# frame_format = str(os.getenv('FRAME_FORMAT', "JPEG"))
|
130
|
+
|
131
|
+
im_pil.save(buffered, format="PNG")
|
132
|
+
|
133
|
+
frame_bytes = buffered.getvalue()
|
134
|
+
|
135
|
+
# Return the bytes of the frame
|
136
|
+
return frame_bytes
|
137
|
+
|
138
|
+
|
139
|
+
def encode_video_base64(video_path, num_frames=16):
|
140
|
+
import cv2
|
141
|
+
cap = cv2.VideoCapture(video_path)
|
142
|
+
if not cap.isOpened():
|
143
|
+
raise IOError(f"Could not open video file:{video_path}")
|
144
|
+
|
145
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
146
|
+
print(f"target_frames: {num_frames}")
|
147
|
+
|
148
|
+
frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
|
149
|
+
|
150
|
+
frames = []
|
151
|
+
for i in range(total_frames):
|
152
|
+
ret, frame = cap.read()
|
153
|
+
if ret:
|
154
|
+
frames.append(frame)
|
155
|
+
else:
|
156
|
+
# Handle the case where the frame could not be read
|
157
|
+
# print(f"Warning: Could not read frame at index {i}.")
|
158
|
+
pass
|
159
|
+
|
160
|
+
cap.release()
|
161
|
+
|
162
|
+
# Safely select frames based on frame_indices, avoiding IndexError
|
163
|
+
frames = [frames[i] for i in frame_indices if i < len(frames)]
|
164
|
+
|
165
|
+
# If there are not enough frames, duplicate the last frame until we reach the target
|
166
|
+
while len(frames) < num_frames:
|
167
|
+
frames.append(frames[-1])
|
168
|
+
|
169
|
+
# Use ThreadPoolExecutor to process and encode frames in parallel
|
170
|
+
with ThreadPoolExecutor() as executor:
|
171
|
+
encoded_frames = list(executor.map(encode_frame, frames))
|
172
|
+
|
173
|
+
# encoded_frames = list(map(encode_frame, frames))
|
174
|
+
|
175
|
+
# Concatenate all frames bytes
|
176
|
+
video_bytes = b"".join(encoded_frames)
|
177
|
+
|
178
|
+
# Encode the concatenated bytes to base64
|
179
|
+
video_base64 = "video:" + base64.b64encode(video_bytes).decode("utf-8")
|
180
|
+
|
181
|
+
return video_base64
|
182
|
+
|
183
|
+
|
133
184
|
def _is_chinese_char(cp):
|
134
185
|
"""Checks whether CP is the codepoint of a CJK character."""
|
135
186
|
# This defines a "chinese character" as anything in the CJK Unicode block:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: A structured generation langauge for LLMs.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -237,8 +237,10 @@ Requires-Dist: vllm >=0.4.2 ; extra == 'srt'
|
|
237
237
|
Requires-Dist: interegular ; extra == 'srt'
|
238
238
|
Requires-Dist: pydantic ; extra == 'srt'
|
239
239
|
Requires-Dist: pillow ; extra == 'srt'
|
240
|
-
Requires-Dist: outlines >=0.0.27 ; extra == 'srt'
|
241
240
|
Requires-Dist: packaging ; extra == 'srt'
|
241
|
+
Requires-Dist: huggingface-hub ; extra == 'srt'
|
242
|
+
Requires-Dist: hf-transfer ; extra == 'srt'
|
243
|
+
Requires-Dist: outlines >=0.0.34 ; extra == 'srt'
|
242
244
|
|
243
245
|
<div align="center">
|
244
246
|
<img src="assets/logo.png" alt="logo" width="400"></img>
|
@@ -568,15 +570,17 @@ response = client.chat.completions.create(
|
|
568
570
|
print(response)
|
569
571
|
```
|
570
572
|
|
571
|
-
|
572
|
-
|
573
|
+
|
574
|
+
By default, the server uses the chat template specified in the model tokenizer from Hugging Face. It should just work for most official models such as Llama-2/Llama-3.
|
575
|
+
|
576
|
+
If needed, you can also override the chat template when launching the server:
|
573
577
|
|
574
578
|
```
|
575
579
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2
|
576
580
|
```
|
577
581
|
|
578
582
|
If the chat template you are looking for is missing, you are welcome to contribute it.
|
579
|
-
Meanwhile, you can also
|
583
|
+
Meanwhile, you can also temporarily register your chat template as follows:
|
580
584
|
|
581
585
|
```json
|
582
586
|
{
|
@@ -0,0 +1,72 @@
|
|
1
|
+
sglang/__init__.py,sha256=lKabCNZM2OhtymVLUuW4bpt-Jdxwk81wP1TkhVqIJEg,1058
|
2
|
+
sglang/api.py,sha256=hnVPt_p2ALLrraAKpVbkGocVtgb0MqgOH5NUQKOA6sY,4548
|
3
|
+
sglang/global_config.py,sha256=LxoF7VGCYszeEafC8zBbzUQ5PPFdv2rPzw2zEGPLgfg,961
|
4
|
+
sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
|
5
|
+
sglang/launch_server_llavavid.py,sha256=UWo_qUCJ9yknp1TVPzrz4B_aZtEuQpLQq0l96FMgynI,1058
|
6
|
+
sglang/utils.py,sha256=Xp5mmhLoXNLB5U0NmCg-WMkfV0Ov4KVqzOvGZa3XKmc,7610
|
7
|
+
sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
|
9
|
+
sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
10
|
+
sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
|
11
|
+
sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
|
12
|
+
sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
|
13
|
+
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
sglang/lang/chat_template.py,sha256=ogIT8iMlDcSEgcNBTh5pRLoCkdQI_ec5Hc27wFUFDIg,11532
|
15
|
+
sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
|
16
|
+
sglang/lang/interpreter.py,sha256=GSIbO9N6ThfchdURb7XzQMZ9U6p1xirKHgXGmqLxKtg,28434
|
17
|
+
sglang/lang/ir.py,sha256=NxvIWlUidvtpQpPG4GAXZEN64Y2vLOBjN2Z2JkZVG1U,13350
|
18
|
+
sglang/lang/tracer.py,sha256=QcslAObEjepk8XmiqCobwzWaDpihofEQXjeRs_3B8NQ,8282
|
19
|
+
sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
|
20
|
+
sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
|
21
|
+
sglang/srt/flush_cache.py,sha256=JOXLH4pmweVbuEWDPu3SEDrLYFG82nR2SpzbslW4b-A,381
|
22
|
+
sglang/srt/hf_transformers_utils.py,sha256=UneOMsw3w7taH9EKIi6uHZ-GNUZG0vbZIWN-ZoQZ5gM,5417
|
23
|
+
sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
|
24
|
+
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
25
|
+
sglang/srt/model_config.py,sha256=843L1KxEPZcEk1uwQH10BwSX9L5DYJ3OGUUBo8wMdZg,1695
|
26
|
+
sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
|
27
|
+
sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
|
28
|
+
sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
|
29
|
+
sglang/srt/server.py,sha256=YAUiniJs9ebNrJ0Lweg2TnUL_yZ0P3PtWoT0Z_3d8vk,10371
|
30
|
+
sglang/srt/server_args.py,sha256=TQxIEdF0crqtY6WfZ6q7SKOQcCSomBEVjJ5K4HyTSvQ,9539
|
31
|
+
sglang/srt/utils.py,sha256=cr2uZmEB-Exq-wi3Y8B3yQu7kFUiyV4PAvzouvKYkWg,13090
|
32
|
+
sglang/srt/weight_utils.py,sha256=bFNh9-T8gseB0zKeu1qsMww8FpyrGFxbPcOFSeJtL5Q,15505
|
33
|
+
sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
|
34
|
+
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
35
|
+
sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
|
36
|
+
sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
|
37
|
+
sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
|
38
|
+
sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
|
39
|
+
sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
|
40
|
+
sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
|
41
|
+
sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
|
42
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
|
43
|
+
sglang/srt/managers/io_struct.py,sha256=fFfUQtC-D31xGYdCAfuNVuX3QyaNDgGpfzC8qnKt0YA,4294
|
44
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=TlGyFhWz1b24vkeUVvCwKFBERffi-esxGRhoukBnET8,13116
|
45
|
+
sglang/srt/managers/router/infer_batch.py,sha256=a1F3EjSBdER5pbgZFifuTdrE2Xom8Mt4aT9rmB8n35M,20204
|
46
|
+
sglang/srt/managers/router/manager.py,sha256=tdvYmwGHMeG2MMYZ4ZThdAJ_b4fp94UpemISFWOddno,2697
|
47
|
+
sglang/srt/managers/router/model_rpc.py,sha256=FJFgf1KAJ0Z8Yq4EPyczxZkCmZBjwNwCwXcjwyhU0k4,29775
|
48
|
+
sglang/srt/managers/router/model_runner.py,sha256=fp9wPh4sQY6Q-5PVtv_e9p5GgkkixSDUIqfFt7lVlV8,16527
|
49
|
+
sglang/srt/managers/router/radix_cache.py,sha256=GE6oY8bppRJCIxZWiDKO4P6al58zcqLQe605Y1d2bdo,7924
|
50
|
+
sglang/srt/managers/router/scheduler.py,sha256=pvlKSyCyIXmu14eyy1mvP9-QdG78eLUqMlr4cnfes2Y,2259
|
51
|
+
sglang/srt/models/commandr.py,sha256=DVdUF5C5etm82RoXJTNjYqlS2W2_9awzxzXNMubRoVg,13579
|
52
|
+
sglang/srt/models/dbrx.py,sha256=NIhlJp2B_y_L1ltK_Y7SEenAiHTUUp3p1rf8LIydC0o,14173
|
53
|
+
sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
|
54
|
+
sglang/srt/models/gemma.py,sha256=Wk25zFkqkdG62xVVJEzeIjDES1LnoO0EY2W2p9XMvbA,11637
|
55
|
+
sglang/srt/models/llama2.py,sha256=Y2XwS5XXG77OfPAvbju7zp53CP5izzee_4-laVqu5ZM,11655
|
56
|
+
sglang/srt/models/llava.py,sha256=HtR7lUnAYW39vWw6xmDZkbG7AueswZDJxXeu6rQfpSU,14921
|
57
|
+
sglang/srt/models/llavavid.py,sha256=ueImEwOR4ZlNFUoBvXbwZPNRcrYWg54sPNK7pmGnrp0,13219
|
58
|
+
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
59
|
+
sglang/srt/models/mixtral.py,sha256=1aggGw0P0MVQu5C5D3pMaZpRpY_PmrK_nwBOygOlPEM,13839
|
60
|
+
sglang/srt/models/qwen.py,sha256=cakvxjghKdGg5iGq9TJ_nGlVQaJ4-9V91EyyZnV4rmc,9390
|
61
|
+
sglang/srt/models/qwen2.py,sha256=PyOA8-RA_frRVLXfh8d1Ui1hUd1YmM3GfsPw2q5rCDI,11351
|
62
|
+
sglang/srt/models/stablelm.py,sha256=TCfQumj0acu2lCGujJj_PuzHFp3kFIwENQEfT-hnHUA,10867
|
63
|
+
sglang/srt/models/yivl.py,sha256=q8MUvIFIWpKCQ4pSZBoFpw-pnbdjkfr-M8jBJfGFu7E,4393
|
64
|
+
sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
|
65
|
+
sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
|
66
|
+
sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
|
67
|
+
sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
|
68
|
+
sglang-0.1.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
sglang-0.1.16.dist-info/METADATA,sha256=yiziPDpVr6NPPhX58sA0GaLYKCut4FnBKD7TE50HH6k,28911
|
70
|
+
sglang-0.1.16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
71
|
+
sglang-0.1.16.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
72
|
+
sglang-0.1.16.dist-info/RECORD,,
|
sglang-0.1.15.dist-info/RECORD
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
sglang/__init__.py,sha256=Ef_3iE98hM5y45k97dcEXLqlRcSovIvGxEbTebnOre4,1034
|
2
|
-
sglang/api.py,sha256=c2MIXPgtkmsgDY7BvXPOYkRaaJJRkCSBjGjvUz2zkkM,4455
|
3
|
-
sglang/global_config.py,sha256=TLmmeWsk4mrjNr-ryj0w7irSr8HRekXYrYZON2sABdk,854
|
4
|
-
sglang/launch_server.py,sha256=FteIWF2C73RN1qSPkh7cfIURV5rFvfHyKLHGDRUYJIA,294
|
5
|
-
sglang/utils.py,sha256=2dUXLMPz9VhhzbIRQABmfZnVW5yz61F3UVtb6yKyevM,6237
|
6
|
-
sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
|
8
|
-
sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
9
|
-
sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
|
10
|
-
sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
|
11
|
-
sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
|
12
|
-
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
sglang/lang/chat_template.py,sha256=-pmALN5tV2upD5kb4RKP7DOvQY1s4nuvRdOcXKclXnw,11260
|
14
|
-
sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
|
15
|
-
sglang/lang/interpreter.py,sha256=W1uwgTJqeHXrkG3K7mZfH8JX9Oc9poYIwtCWRIH7lhI,27251
|
16
|
-
sglang/lang/ir.py,sha256=8Ap-uEUz6K9eNQTOKtMixePuLwRFHFKcN0Z5Yn44nKk,13320
|
17
|
-
sglang/lang/tracer.py,sha256=vArGy7RNUP0qzE26kohsIHWRIfB0d88Ph2aiLq_P_fU,8284
|
18
|
-
sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
|
19
|
-
sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
|
20
|
-
sglang/srt/hf_transformers_utils.py,sha256=mwDuBMZcp66U6hZWpiO1KeOmjXXXG9fbX_ZwEqjzzn0,5286
|
21
|
-
sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
|
22
|
-
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
23
|
-
sglang/srt/model_config.py,sha256=Tw13FKY7qA4hJOskl3gmdb_W5gTEoB2m0PEArUiINQU,1546
|
24
|
-
sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
|
25
|
-
sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
|
26
|
-
sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
|
27
|
-
sglang/srt/server.py,sha256=SQXIo9XLG0fuW123-UF4VA0Os75I73upQoAzZ_U2su8,9923
|
28
|
-
sglang/srt/server_args.py,sha256=ySWe8RA4ukJQTnN4rs4_42XoYcVz1XPfeT8Ps551MlY,9510
|
29
|
-
sglang/srt/utils.py,sha256=n8OLrrbdNbA6ow1s2wbJU7a35fHGQmnFfewcgzTBecE,9201
|
30
|
-
sglang/srt/weight_utils.py,sha256=TBNP9jWb32gohPLj4-qWRn_Yn64gqWk1ZGLWrv967uU,15930
|
31
|
-
sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
|
32
|
-
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
33
|
-
sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
|
34
|
-
sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
|
35
|
-
sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
|
36
|
-
sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
|
37
|
-
sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
|
38
|
-
sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
|
39
|
-
sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
|
40
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
|
41
|
-
sglang/srt/managers/io_struct.py,sha256=hdCHrBMoZ_4vc2l6mgbGGOW5b8STd4GSlQm-J_BCmw0,3716
|
42
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=hgLGkZYWs5enyeJzDjht6hOjSjTEBJSvUrFHNmjszbQ,11900
|
43
|
-
sglang/srt/managers/router/infer_batch.py,sha256=CsNErbPt2XxoUxA3MkQeP4Tr3ipNK7eF0_K7IxdEpeY,19920
|
44
|
-
sglang/srt/managers/router/manager.py,sha256=iNmLd-0V0aTU-B3FH6YutmcKJVtuhRcTU28EqbU8PII,2683
|
45
|
-
sglang/srt/managers/router/model_rpc.py,sha256=8fDGBsqyo8lAFhr4_N6rB3D3we7zTfyjeV36IR1M7Ds,28325
|
46
|
-
sglang/srt/managers/router/model_runner.py,sha256=k7YMEvqU3GSIGpaBde2rCoGlWDpVjTOJgO-3xrsz0uI,16545
|
47
|
-
sglang/srt/managers/router/radix_cache.py,sha256=ZXSYyUb2e_xHwXDi_c9U6g2-0zmX3c_wX9UWs33F6u4,6685
|
48
|
-
sglang/srt/managers/router/scheduler.py,sha256=V-LAnVSzgD2ddy2eXW3jWURCeq9Lv7YxCGk4kHyytfM,2818
|
49
|
-
sglang/srt/models/commandr.py,sha256=GHcgyksXAnp4Nlnij1qULpFk0D1iA_lV3SzhLBD6Yus,13599
|
50
|
-
sglang/srt/models/dbrx.py,sha256=OK9xmb9f1m-nrO3yFB7bvy7u6ofyobaKU2fsa0oIteQ,14158
|
51
|
-
sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
|
52
|
-
sglang/srt/models/gemma.py,sha256=Y4iLdmH4U_oySEk2-UrxqXsW3tsT_vnY0bJFywxdRyU,11630
|
53
|
-
sglang/srt/models/llama2.py,sha256=lAYVI5bE1oy_jY0tvSvRSI9wxfalidNtIZc8VXEsaNQ,11648
|
54
|
-
sglang/srt/models/llava.py,sha256=ocaWPocml74UoUHaAKE0oWF7Je5Dw_3fXw1c7b53zKk,14941
|
55
|
-
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
56
|
-
sglang/srt/models/mixtral.py,sha256=jC7LR9NWjeQE9I28TfNeNGy65GdzcH3kxdWfIocpvho,13892
|
57
|
-
sglang/srt/models/qwen.py,sha256=eGkWsgYAhXVNkcS9iR8T3pk65UnIdTRjzSnRveYdigQ,9320
|
58
|
-
sglang/srt/models/qwen2.py,sha256=nXF5UJlgVFuY5TjDL2nqOy4_R1xn73EYpzHj2mL5odU,11344
|
59
|
-
sglang/srt/models/stablelm.py,sha256=d1pP5e-6CtOppWRzUtQar_0ULhGIHDZlXTh9lKMWbv4,10828
|
60
|
-
sglang/srt/models/yivl.py,sha256=Aoo_AlGu9PYMDvj6bQj9PX7Ui7-oIe9MArLe5N6FAno,4406
|
61
|
-
sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
|
62
|
-
sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
|
63
|
-
sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
|
64
|
-
sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
|
65
|
-
sglang-0.1.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
66
|
-
sglang-0.1.15.dist-info/METADATA,sha256=9pKA1HIo0OFpZz-peDJCVekVjaZvqj55sK3n5Dchd4A,28727
|
67
|
-
sglang-0.1.15.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
68
|
-
sglang-0.1.15.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
69
|
-
sglang-0.1.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|