sglang 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. sglang/__init__.py +3 -1
  2. sglang/api.py +5 -0
  3. sglang/global_config.py +4 -1
  4. sglang/lang/chat_template.py +9 -2
  5. sglang/lang/interpreter.py +52 -19
  6. sglang/lang/ir.py +12 -9
  7. sglang/lang/tracer.py +1 -1
  8. sglang/launch_server.py +1 -2
  9. sglang/launch_server_llavavid.py +31 -0
  10. sglang/srt/flush_cache.py +16 -0
  11. sglang/srt/hf_transformers_utils.py +8 -1
  12. sglang/srt/managers/io_struct.py +15 -3
  13. sglang/srt/managers/router/infer_batch.py +31 -19
  14. sglang/srt/managers/router/manager.py +6 -8
  15. sglang/srt/managers/router/model_rpc.py +59 -23
  16. sglang/srt/managers/router/model_runner.py +6 -6
  17. sglang/srt/managers/router/radix_cache.py +47 -17
  18. sglang/srt/managers/router/scheduler.py +17 -28
  19. sglang/srt/managers/tokenizer_manager.py +54 -22
  20. sglang/srt/model_config.py +4 -0
  21. sglang/srt/models/commandr.py +6 -10
  22. sglang/srt/models/dbrx.py +14 -15
  23. sglang/srt/models/gemma.py +7 -10
  24. sglang/srt/models/llama2.py +7 -10
  25. sglang/srt/models/llava.py +2 -6
  26. sglang/srt/models/llavavid.py +307 -0
  27. sglang/srt/models/mixtral.py +7 -13
  28. sglang/srt/models/qwen.py +20 -13
  29. sglang/srt/models/qwen2.py +7 -10
  30. sglang/srt/models/stablelm.py +13 -12
  31. sglang/srt/models/yivl.py +1 -4
  32. sglang/srt/server.py +32 -18
  33. sglang/srt/server_args.py +9 -6
  34. sglang/srt/utils.py +126 -17
  35. sglang/srt/weight_utils.py +66 -51
  36. sglang/utils.py +77 -26
  37. {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/METADATA +9 -5
  38. sglang-0.1.16.dist-info/RECORD +72 -0
  39. sglang-0.1.15.dist-info/RECORD +0 -69
  40. {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/LICENSE +0 -0
  41. {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/WHEEL +0 -0
  42. {sglang-0.1.15.dist-info → sglang-0.1.16.dist-info}/top_level.txt +0 -0
@@ -19,11 +19,12 @@ import torch
19
19
  from huggingface_hub import HfFileSystem, snapshot_download
20
20
  from safetensors.torch import load_file, safe_open, save_file
21
21
  from tqdm.auto import tqdm
22
-
23
22
  from vllm.config import ModelConfig
24
23
  from vllm.logger import init_logger
25
- from vllm.model_executor.layers.quantization import (QuantizationConfig,
26
- get_quantization_config)
24
+ from vllm.model_executor.layers.quantization import (
25
+ QuantizationConfig,
26
+ get_quantization_config,
27
+ )
27
28
  from vllm.model_executor.layers.quantization.schema import QuantParamSchema
28
29
 
29
30
  logger = init_logger(__name__)
@@ -32,17 +33,21 @@ logger = init_logger(__name__)
32
33
  # can share the same lock without error.
33
34
  # lock files in the temp directory will be automatically deleted when the
34
35
  # system reboots, so users will not complain about annoying lock files
35
- temp_dir = os.environ.get('TMPDIR') or os.environ.get(
36
- 'TEMP') or os.environ.get('TMP') or "/tmp/"
36
+ temp_dir = (
37
+ os.environ.get("TMPDIR")
38
+ or os.environ.get("TEMP")
39
+ or os.environ.get("TMP")
40
+ or "/tmp/"
41
+ )
37
42
 
38
43
 
39
44
  def enable_hf_transfer():
40
- """automatically activates hf_transfer
41
- """
45
+ """automatically activates hf_transfer"""
42
46
  if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
43
47
  try:
44
48
  # enable hf hub transfer if available
45
49
  import hf_transfer # type: ignore # noqa
50
+
46
51
  huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
47
52
  except ImportError:
48
53
  pass
@@ -65,8 +70,7 @@ def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
65
70
  # add hash to avoid conflict with old users' lock files
66
71
  lock_file_name = hash_name + model_name + ".lock"
67
72
  # mode 0o666 is required for the filelock to be shared across users
68
- lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name),
69
- mode=0o666)
73
+ lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
70
74
  return lock
71
75
 
72
76
 
@@ -104,10 +108,12 @@ def convert_bin_to_safetensor_file(
104
108
  sf_size = os.stat(sf_filename).st_size
105
109
  pt_size = os.stat(pt_filename).st_size
106
110
  if (sf_size - pt_size) / pt_size > 0.01:
107
- raise RuntimeError(f"""The file size different is more than 1%:
111
+ raise RuntimeError(
112
+ f"""The file size different is more than 1%:
108
113
  - {sf_filename}: {sf_size}
109
114
  - {pt_filename}: {pt_size}
110
- """)
115
+ """
116
+ )
111
117
 
112
118
  # check if the tensors are the same
113
119
  reloaded = load_file(sf_filename)
@@ -122,8 +128,7 @@ def convert_bin_to_safetensor_file(
122
128
  def get_quant_config(model_config: ModelConfig) -> QuantizationConfig:
123
129
  quant_cls = get_quantization_config(model_config.quantization)
124
130
  # Read the quantization config from the HF model config, if available.
125
- hf_quant_config = getattr(model_config.hf_config, "quantization_config",
126
- None)
131
+ hf_quant_config = getattr(model_config.hf_config, "quantization_config", None)
127
132
  if hf_quant_config is not None:
128
133
  return quant_cls.from_config(hf_quant_config)
129
134
  model_name_or_path = model_config.model
@@ -131,26 +136,29 @@ def get_quant_config(model_config: ModelConfig) -> QuantizationConfig:
131
136
  if not is_local:
132
137
  # Download the config files.
133
138
  with get_lock(model_name_or_path, model_config.download_dir):
134
- hf_folder = snapshot_download(model_name_or_path,
135
- revision=model_config.revision,
136
- allow_patterns="*.json",
137
- cache_dir=model_config.download_dir,
138
- tqdm_class=Disabledtqdm)
139
+ hf_folder = snapshot_download(
140
+ model_name_or_path,
141
+ revision=model_config.revision,
142
+ allow_patterns="*.json",
143
+ cache_dir=model_config.download_dir,
144
+ tqdm_class=Disabledtqdm,
145
+ )
139
146
  else:
140
147
  hf_folder = model_name_or_path
141
148
  config_files = glob.glob(os.path.join(hf_folder, "*.json"))
142
149
 
143
150
  quant_config_files = [
144
- f for f in config_files if any(
145
- f.endswith(x) for x in quant_cls.get_config_filenames())
151
+ f
152
+ for f in config_files
153
+ if any(f.endswith(x) for x in quant_cls.get_config_filenames())
146
154
  ]
147
155
  if len(quant_config_files) == 0:
148
- raise ValueError(
149
- f"Cannot find the config file for {model_config.quantization}")
156
+ raise ValueError(f"Cannot find the config file for {model_config.quantization}")
150
157
  if len(quant_config_files) > 1:
151
158
  raise ValueError(
152
159
  f"Found multiple config files for {model_config.quantization}: "
153
- f"{quant_config_files}")
160
+ f"{quant_config_files}"
161
+ )
154
162
 
155
163
  quant_config_file = quant_config_files[0]
156
164
  with open(quant_config_file, "r") as f:
@@ -166,8 +174,7 @@ def prepare_hf_model_weights(
166
174
  revision: Optional[str] = None,
167
175
  ) -> Tuple[str, List[str], bool]:
168
176
  # Download model weights from huggingface.
169
- is_local = os.path.isdir(model_name_or_path) \
170
- and load_format != "tensorizer"
177
+ is_local = os.path.isdir(model_name_or_path) and load_format != "tensorizer"
171
178
  use_safetensors = False
172
179
  # Some quantized models use .pt files for storing the weights.
173
180
  if load_format == "auto":
@@ -203,11 +210,13 @@ def prepare_hf_model_weights(
203
210
  # Use file lock to prevent multiple processes from
204
211
  # downloading the same model weights at the same time.
205
212
  with get_lock(model_name_or_path, cache_dir):
206
- hf_folder = snapshot_download(model_name_or_path,
207
- allow_patterns=allow_patterns,
208
- cache_dir=cache_dir,
209
- tqdm_class=Disabledtqdm,
210
- revision=revision)
213
+ hf_folder = snapshot_download(
214
+ model_name_or_path,
215
+ allow_patterns=allow_patterns,
216
+ cache_dir=cache_dir,
217
+ tqdm_class=Disabledtqdm,
218
+ revision=revision,
219
+ )
211
220
  else:
212
221
  hf_folder = model_name_or_path
213
222
  hf_weights_files: List[str] = []
@@ -228,16 +237,14 @@ def prepare_hf_model_weights(
228
237
  "scaler.pt",
229
238
  ]
230
239
  hf_weights_files = [
231
- f for f in hf_weights_files
232
- if not any(f.endswith(x) for x in blacklist)
240
+ f for f in hf_weights_files if not any(f.endswith(x) for x in blacklist)
233
241
  ]
234
242
 
235
243
  if load_format == "tensorizer":
236
244
  return hf_folder, hf_weights_files, use_safetensors
237
245
 
238
246
  if len(hf_weights_files) == 0:
239
- raise RuntimeError(
240
- f"Cannot find any model weights with `{model_name_or_path}`")
247
+ raise RuntimeError(f"Cannot find any model weights with `{model_name_or_path}`")
241
248
 
242
249
  return hf_folder, hf_weights_files, use_safetensors
243
250
 
@@ -254,7 +261,8 @@ def hf_model_weights_iterator(
254
261
  cache_dir=cache_dir,
255
262
  load_format=load_format,
256
263
  fall_back_to_pt=fall_back_to_pt,
257
- revision=revision)
264
+ revision=revision,
265
+ )
258
266
 
259
267
  if load_format == "npcache":
260
268
  # Currently np_cache only support *.bin checkpoints
@@ -289,22 +297,25 @@ def hf_model_weights_iterator(
289
297
  param = np.load(f)
290
298
  yield name, torch.from_numpy(param)
291
299
  elif load_format == "tensorizer":
292
- from vllm.model_executor.tensorizer_loader import (TensorDeserializer,
293
- open_stream,
294
- tensorizer_warning)
300
+ from vllm.model_executor.tensorizer_loader import (
301
+ TensorDeserializer,
302
+ open_stream,
303
+ tensorizer_warning,
304
+ )
305
+
295
306
  tensorizer_args = load_format.params
296
307
  tensorizer_warning(
297
308
  "Deserializing HuggingFace models is not optimized for "
298
309
  "loading on vLLM, as tensorizer is forced to load to CPU. "
299
310
  "Consider deserializing a vLLM model instead for faster "
300
311
  "load times. See the examples/tensorize_vllm_model.py example "
301
- "script for serializing vLLM models.")
312
+ "script for serializing vLLM models."
313
+ )
302
314
 
303
315
  deserializer_args = tensorizer_args.deserializer_params
304
316
  stream_params = tensorizer_args.stream_params
305
317
  stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
306
- with TensorDeserializer(stream, **deserializer_args,
307
- device="cpu") as state:
318
+ with TensorDeserializer(stream, **deserializer_args, device="cpu") as state:
308
319
  for name, param in state.items():
309
320
  yield name, param
310
321
  del state
@@ -324,8 +335,12 @@ def hf_model_weights_iterator(
324
335
 
325
336
 
326
337
  def kv_cache_scales_loader(
327
- filename: str, tp_rank: int, tp_size: int, num_hidden_layers: int,
328
- model_type: Optional[str]) -> Iterable[Tuple[int, float]]:
338
+ filename: str,
339
+ tp_rank: int,
340
+ tp_size: int,
341
+ num_hidden_layers: int,
342
+ model_type: Optional[str],
343
+ ) -> Iterable[Tuple[int, float]]:
329
344
  """
330
345
  A simple utility to read in KV cache scaling factors that have been
331
346
  previously serialized to disk. Used by the model to populate the appropriate
@@ -343,8 +358,7 @@ def kv_cache_scales_loader(
343
358
  "tp_size": tp_size,
344
359
  }
345
360
  schema_dct = json.load(f)
346
- schema = QuantParamSchema.model_validate(schema_dct,
347
- context=context)
361
+ schema = QuantParamSchema.model_validate(schema_dct, context=context)
348
362
  layer_scales_map = schema.kv_cache.scaling_factor[tp_rank]
349
363
  return layer_scales_map.items()
350
364
 
@@ -357,9 +371,11 @@ def kv_cache_scales_loader(
357
371
  # This section is reached if and only if any of the excepts are hit
358
372
  # Return an empty iterable (list) => no KV cache scales are loaded
359
373
  # which ultimately defaults to 1.0 scales
360
- logger.warning("Defaulting to KV cache scaling factors = 1.0 "
361
- f"for all layers in TP rank {tp_rank} "
362
- "as an error occurred during loading.")
374
+ logger.warning(
375
+ "Defaulting to KV cache scaling factors = 1.0 "
376
+ f"for all layers in TP rank {tp_rank} "
377
+ "as an error occurred during loading."
378
+ )
363
379
  return []
364
380
 
365
381
 
@@ -378,8 +394,7 @@ def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
378
394
  return x
379
395
 
380
396
 
381
- def default_weight_loader(param: torch.Tensor,
382
- loaded_weight: torch.Tensor) -> None:
397
+ def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
383
398
  """Default weight loader."""
384
399
  assert param.size() == loaded_weight.size()
385
400
  param.data.copy_(loaded_weight)
@@ -399,4 +414,4 @@ def initialize_dummy_weights(
399
414
  """
400
415
  for param in model.state_dict().values():
401
416
  if torch.is_floating_point(param):
402
- param.data.uniform_(low, high)
417
+ param.data.uniform_(low, high)
sglang/utils.py CHANGED
@@ -2,40 +2,23 @@
2
2
 
3
3
  import base64
4
4
  import json
5
+ import os
6
+ import sys
5
7
  import threading
8
+ import traceback
6
9
  import urllib.request
10
+ from concurrent.futures import ThreadPoolExecutor
7
11
  from io import BytesIO
8
12
  from json import dumps
9
13
 
14
+ import numpy as np
10
15
  import requests
11
16
 
12
17
 
13
- def get_available_gpu_memory(gpu_id, distributed=True):
14
- """
15
- Get available memory for cuda:gpu_id device.
16
- When distributed is True, the available memory is the minimum available memory of all GPUs.
17
- """
18
- import torch
19
-
20
- num_gpus = torch.cuda.device_count()
21
- assert gpu_id < num_gpus
22
-
23
- if torch.cuda.current_device() != gpu_id:
24
- print(
25
- f"WARNING: current device is not {gpu_id}, but {torch.cuda.current_device()}, ",
26
- "which may cause useless memory allocation for torch CUDA context.",
27
- )
28
-
29
- free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
30
-
31
- if distributed:
32
- tensor = torch.tensor(free_gpu_memory, dtype=torch.float32).to(
33
- torch.device("cuda", gpu_id)
34
- )
35
- torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.MIN)
36
- free_gpu_memory = tensor.item()
37
-
38
- return free_gpu_memory / (1 << 30)
18
+ def get_exception_traceback():
19
+ etype, value, tb = sys.exc_info()
20
+ err_str = "".join(traceback.format_exception(etype, value, tb))
21
+ return err_str
39
22
 
40
23
 
41
24
  def is_same_type(values):
@@ -130,6 +113,74 @@ def encode_image_base64(image_path):
130
113
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
131
114
 
132
115
 
116
+ def encode_frame(frame):
117
+ import cv2 # pip install opencv-python-headless
118
+ from PIL import Image
119
+
120
+ # Convert the frame to RGB (OpenCV uses BGR by default)
121
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
122
+
123
+ # Convert the frame to PIL Image to easily convert to bytes
124
+ im_pil = Image.fromarray(frame)
125
+
126
+ # Convert to bytes
127
+ buffered = BytesIO()
128
+
129
+ # frame_format = str(os.getenv('FRAME_FORMAT', "JPEG"))
130
+
131
+ im_pil.save(buffered, format="PNG")
132
+
133
+ frame_bytes = buffered.getvalue()
134
+
135
+ # Return the bytes of the frame
136
+ return frame_bytes
137
+
138
+
139
+ def encode_video_base64(video_path, num_frames=16):
140
+ import cv2
141
+ cap = cv2.VideoCapture(video_path)
142
+ if not cap.isOpened():
143
+ raise IOError(f"Could not open video file:{video_path}")
144
+
145
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
146
+ print(f"target_frames: {num_frames}")
147
+
148
+ frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
149
+
150
+ frames = []
151
+ for i in range(total_frames):
152
+ ret, frame = cap.read()
153
+ if ret:
154
+ frames.append(frame)
155
+ else:
156
+ # Handle the case where the frame could not be read
157
+ # print(f"Warning: Could not read frame at index {i}.")
158
+ pass
159
+
160
+ cap.release()
161
+
162
+ # Safely select frames based on frame_indices, avoiding IndexError
163
+ frames = [frames[i] for i in frame_indices if i < len(frames)]
164
+
165
+ # If there are not enough frames, duplicate the last frame until we reach the target
166
+ while len(frames) < num_frames:
167
+ frames.append(frames[-1])
168
+
169
+ # Use ThreadPoolExecutor to process and encode frames in parallel
170
+ with ThreadPoolExecutor() as executor:
171
+ encoded_frames = list(executor.map(encode_frame, frames))
172
+
173
+ # encoded_frames = list(map(encode_frame, frames))
174
+
175
+ # Concatenate all frames bytes
176
+ video_bytes = b"".join(encoded_frames)
177
+
178
+ # Encode the concatenated bytes to base64
179
+ video_base64 = "video:" + base64.b64encode(video_bytes).decode("utf-8")
180
+
181
+ return video_base64
182
+
183
+
133
184
  def _is_chinese_char(cp):
134
185
  """Checks whether CP is the codepoint of a CJK character."""
135
186
  # This defines a "chinese character" as anything in the CJK Unicode block:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: A structured generation langauge for LLMs.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -237,8 +237,10 @@ Requires-Dist: vllm >=0.4.2 ; extra == 'srt'
237
237
  Requires-Dist: interegular ; extra == 'srt'
238
238
  Requires-Dist: pydantic ; extra == 'srt'
239
239
  Requires-Dist: pillow ; extra == 'srt'
240
- Requires-Dist: outlines >=0.0.27 ; extra == 'srt'
241
240
  Requires-Dist: packaging ; extra == 'srt'
241
+ Requires-Dist: huggingface-hub ; extra == 'srt'
242
+ Requires-Dist: hf-transfer ; extra == 'srt'
243
+ Requires-Dist: outlines >=0.0.34 ; extra == 'srt'
242
244
 
243
245
  <div align="center">
244
246
  <img src="assets/logo.png" alt="logo" width="400"></img>
@@ -568,15 +570,17 @@ response = client.chat.completions.create(
568
570
  print(response)
569
571
  ```
570
572
 
571
- In above example, the server uses the chat template specified in the model tokenizer.
572
- You can override the chat template if needed when launching the server:
573
+
574
+ By default, the server uses the chat template specified in the model tokenizer from Hugging Face. It should just work for most official models such as Llama-2/Llama-3.
575
+
576
+ If needed, you can also override the chat template when launching the server:
573
577
 
574
578
  ```
575
579
  python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2
576
580
  ```
577
581
 
578
582
  If the chat template you are looking for is missing, you are welcome to contribute it.
579
- Meanwhile, you can also temporary register your chat template as follows:
583
+ Meanwhile, you can also temporarily register your chat template as follows:
580
584
 
581
585
  ```json
582
586
  {
@@ -0,0 +1,72 @@
1
+ sglang/__init__.py,sha256=lKabCNZM2OhtymVLUuW4bpt-Jdxwk81wP1TkhVqIJEg,1058
2
+ sglang/api.py,sha256=hnVPt_p2ALLrraAKpVbkGocVtgb0MqgOH5NUQKOA6sY,4548
3
+ sglang/global_config.py,sha256=LxoF7VGCYszeEafC8zBbzUQ5PPFdv2rPzw2zEGPLgfg,961
4
+ sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
5
+ sglang/launch_server_llavavid.py,sha256=UWo_qUCJ9yknp1TVPzrz4B_aZtEuQpLQq0l96FMgynI,1058
6
+ sglang/utils.py,sha256=Xp5mmhLoXNLB5U0NmCg-WMkfV0Ov4KVqzOvGZa3XKmc,7610
7
+ sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
9
+ sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
10
+ sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
11
+ sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
12
+ sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
13
+ sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ sglang/lang/chat_template.py,sha256=ogIT8iMlDcSEgcNBTh5pRLoCkdQI_ec5Hc27wFUFDIg,11532
15
+ sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
16
+ sglang/lang/interpreter.py,sha256=GSIbO9N6ThfchdURb7XzQMZ9U6p1xirKHgXGmqLxKtg,28434
17
+ sglang/lang/ir.py,sha256=NxvIWlUidvtpQpPG4GAXZEN64Y2vLOBjN2Z2JkZVG1U,13350
18
+ sglang/lang/tracer.py,sha256=QcslAObEjepk8XmiqCobwzWaDpihofEQXjeRs_3B8NQ,8282
19
+ sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
20
+ sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
21
+ sglang/srt/flush_cache.py,sha256=JOXLH4pmweVbuEWDPu3SEDrLYFG82nR2SpzbslW4b-A,381
22
+ sglang/srt/hf_transformers_utils.py,sha256=UneOMsw3w7taH9EKIi6uHZ-GNUZG0vbZIWN-ZoQZ5gM,5417
23
+ sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
24
+ sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
25
+ sglang/srt/model_config.py,sha256=843L1KxEPZcEk1uwQH10BwSX9L5DYJ3OGUUBo8wMdZg,1695
26
+ sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
27
+ sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
28
+ sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
29
+ sglang/srt/server.py,sha256=YAUiniJs9ebNrJ0Lweg2TnUL_yZ0P3PtWoT0Z_3d8vk,10371
30
+ sglang/srt/server_args.py,sha256=TQxIEdF0crqtY6WfZ6q7SKOQcCSomBEVjJ5K4HyTSvQ,9539
31
+ sglang/srt/utils.py,sha256=cr2uZmEB-Exq-wi3Y8B3yQu7kFUiyV4PAvzouvKYkWg,13090
32
+ sglang/srt/weight_utils.py,sha256=bFNh9-T8gseB0zKeu1qsMww8FpyrGFxbPcOFSeJtL5Q,15505
33
+ sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
34
+ sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
35
+ sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
36
+ sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
37
+ sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
38
+ sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
39
+ sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
40
+ sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
41
+ sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
42
+ sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
43
+ sglang/srt/managers/io_struct.py,sha256=fFfUQtC-D31xGYdCAfuNVuX3QyaNDgGpfzC8qnKt0YA,4294
44
+ sglang/srt/managers/tokenizer_manager.py,sha256=TlGyFhWz1b24vkeUVvCwKFBERffi-esxGRhoukBnET8,13116
45
+ sglang/srt/managers/router/infer_batch.py,sha256=a1F3EjSBdER5pbgZFifuTdrE2Xom8Mt4aT9rmB8n35M,20204
46
+ sglang/srt/managers/router/manager.py,sha256=tdvYmwGHMeG2MMYZ4ZThdAJ_b4fp94UpemISFWOddno,2697
47
+ sglang/srt/managers/router/model_rpc.py,sha256=FJFgf1KAJ0Z8Yq4EPyczxZkCmZBjwNwCwXcjwyhU0k4,29775
48
+ sglang/srt/managers/router/model_runner.py,sha256=fp9wPh4sQY6Q-5PVtv_e9p5GgkkixSDUIqfFt7lVlV8,16527
49
+ sglang/srt/managers/router/radix_cache.py,sha256=GE6oY8bppRJCIxZWiDKO4P6al58zcqLQe605Y1d2bdo,7924
50
+ sglang/srt/managers/router/scheduler.py,sha256=pvlKSyCyIXmu14eyy1mvP9-QdG78eLUqMlr4cnfes2Y,2259
51
+ sglang/srt/models/commandr.py,sha256=DVdUF5C5etm82RoXJTNjYqlS2W2_9awzxzXNMubRoVg,13579
52
+ sglang/srt/models/dbrx.py,sha256=NIhlJp2B_y_L1ltK_Y7SEenAiHTUUp3p1rf8LIydC0o,14173
53
+ sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
54
+ sglang/srt/models/gemma.py,sha256=Wk25zFkqkdG62xVVJEzeIjDES1LnoO0EY2W2p9XMvbA,11637
55
+ sglang/srt/models/llama2.py,sha256=Y2XwS5XXG77OfPAvbju7zp53CP5izzee_4-laVqu5ZM,11655
56
+ sglang/srt/models/llava.py,sha256=HtR7lUnAYW39vWw6xmDZkbG7AueswZDJxXeu6rQfpSU,14921
57
+ sglang/srt/models/llavavid.py,sha256=ueImEwOR4ZlNFUoBvXbwZPNRcrYWg54sPNK7pmGnrp0,13219
58
+ sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
59
+ sglang/srt/models/mixtral.py,sha256=1aggGw0P0MVQu5C5D3pMaZpRpY_PmrK_nwBOygOlPEM,13839
60
+ sglang/srt/models/qwen.py,sha256=cakvxjghKdGg5iGq9TJ_nGlVQaJ4-9V91EyyZnV4rmc,9390
61
+ sglang/srt/models/qwen2.py,sha256=PyOA8-RA_frRVLXfh8d1Ui1hUd1YmM3GfsPw2q5rCDI,11351
62
+ sglang/srt/models/stablelm.py,sha256=TCfQumj0acu2lCGujJj_PuzHFp3kFIwENQEfT-hnHUA,10867
63
+ sglang/srt/models/yivl.py,sha256=q8MUvIFIWpKCQ4pSZBoFpw-pnbdjkfr-M8jBJfGFu7E,4393
64
+ sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
65
+ sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
66
+ sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
67
+ sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
68
+ sglang-0.1.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ sglang-0.1.16.dist-info/METADATA,sha256=yiziPDpVr6NPPhX58sA0GaLYKCut4FnBKD7TE50HH6k,28911
70
+ sglang-0.1.16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
+ sglang-0.1.16.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
72
+ sglang-0.1.16.dist-info/RECORD,,
@@ -1,69 +0,0 @@
1
- sglang/__init__.py,sha256=Ef_3iE98hM5y45k97dcEXLqlRcSovIvGxEbTebnOre4,1034
2
- sglang/api.py,sha256=c2MIXPgtkmsgDY7BvXPOYkRaaJJRkCSBjGjvUz2zkkM,4455
3
- sglang/global_config.py,sha256=TLmmeWsk4mrjNr-ryj0w7irSr8HRekXYrYZON2sABdk,854
4
- sglang/launch_server.py,sha256=FteIWF2C73RN1qSPkh7cfIURV5rFvfHyKLHGDRUYJIA,294
5
- sglang/utils.py,sha256=2dUXLMPz9VhhzbIRQABmfZnVW5yz61F3UVtb6yKyevM,6237
6
- sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
8
- sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
9
- sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
10
- sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
11
- sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
12
- sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- sglang/lang/chat_template.py,sha256=-pmALN5tV2upD5kb4RKP7DOvQY1s4nuvRdOcXKclXnw,11260
14
- sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
15
- sglang/lang/interpreter.py,sha256=W1uwgTJqeHXrkG3K7mZfH8JX9Oc9poYIwtCWRIH7lhI,27251
16
- sglang/lang/ir.py,sha256=8Ap-uEUz6K9eNQTOKtMixePuLwRFHFKcN0Z5Yn44nKk,13320
17
- sglang/lang/tracer.py,sha256=vArGy7RNUP0qzE26kohsIHWRIfB0d88Ph2aiLq_P_fU,8284
18
- sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
19
- sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
20
- sglang/srt/hf_transformers_utils.py,sha256=mwDuBMZcp66U6hZWpiO1KeOmjXXXG9fbX_ZwEqjzzn0,5286
21
- sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
22
- sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
23
- sglang/srt/model_config.py,sha256=Tw13FKY7qA4hJOskl3gmdb_W5gTEoB2m0PEArUiINQU,1546
24
- sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
25
- sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
26
- sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
27
- sglang/srt/server.py,sha256=SQXIo9XLG0fuW123-UF4VA0Os75I73upQoAzZ_U2su8,9923
28
- sglang/srt/server_args.py,sha256=ySWe8RA4ukJQTnN4rs4_42XoYcVz1XPfeT8Ps551MlY,9510
29
- sglang/srt/utils.py,sha256=n8OLrrbdNbA6ow1s2wbJU7a35fHGQmnFfewcgzTBecE,9201
30
- sglang/srt/weight_utils.py,sha256=TBNP9jWb32gohPLj4-qWRn_Yn64gqWk1ZGLWrv967uU,15930
31
- sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
32
- sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
33
- sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
34
- sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
35
- sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
36
- sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
37
- sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
38
- sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
39
- sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
40
- sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
41
- sglang/srt/managers/io_struct.py,sha256=hdCHrBMoZ_4vc2l6mgbGGOW5b8STd4GSlQm-J_BCmw0,3716
42
- sglang/srt/managers/tokenizer_manager.py,sha256=hgLGkZYWs5enyeJzDjht6hOjSjTEBJSvUrFHNmjszbQ,11900
43
- sglang/srt/managers/router/infer_batch.py,sha256=CsNErbPt2XxoUxA3MkQeP4Tr3ipNK7eF0_K7IxdEpeY,19920
44
- sglang/srt/managers/router/manager.py,sha256=iNmLd-0V0aTU-B3FH6YutmcKJVtuhRcTU28EqbU8PII,2683
45
- sglang/srt/managers/router/model_rpc.py,sha256=8fDGBsqyo8lAFhr4_N6rB3D3we7zTfyjeV36IR1M7Ds,28325
46
- sglang/srt/managers/router/model_runner.py,sha256=k7YMEvqU3GSIGpaBde2rCoGlWDpVjTOJgO-3xrsz0uI,16545
47
- sglang/srt/managers/router/radix_cache.py,sha256=ZXSYyUb2e_xHwXDi_c9U6g2-0zmX3c_wX9UWs33F6u4,6685
48
- sglang/srt/managers/router/scheduler.py,sha256=V-LAnVSzgD2ddy2eXW3jWURCeq9Lv7YxCGk4kHyytfM,2818
49
- sglang/srt/models/commandr.py,sha256=GHcgyksXAnp4Nlnij1qULpFk0D1iA_lV3SzhLBD6Yus,13599
50
- sglang/srt/models/dbrx.py,sha256=OK9xmb9f1m-nrO3yFB7bvy7u6ofyobaKU2fsa0oIteQ,14158
51
- sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
52
- sglang/srt/models/gemma.py,sha256=Y4iLdmH4U_oySEk2-UrxqXsW3tsT_vnY0bJFywxdRyU,11630
53
- sglang/srt/models/llama2.py,sha256=lAYVI5bE1oy_jY0tvSvRSI9wxfalidNtIZc8VXEsaNQ,11648
54
- sglang/srt/models/llava.py,sha256=ocaWPocml74UoUHaAKE0oWF7Je5Dw_3fXw1c7b53zKk,14941
55
- sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
56
- sglang/srt/models/mixtral.py,sha256=jC7LR9NWjeQE9I28TfNeNGy65GdzcH3kxdWfIocpvho,13892
57
- sglang/srt/models/qwen.py,sha256=eGkWsgYAhXVNkcS9iR8T3pk65UnIdTRjzSnRveYdigQ,9320
58
- sglang/srt/models/qwen2.py,sha256=nXF5UJlgVFuY5TjDL2nqOy4_R1xn73EYpzHj2mL5odU,11344
59
- sglang/srt/models/stablelm.py,sha256=d1pP5e-6CtOppWRzUtQar_0ULhGIHDZlXTh9lKMWbv4,10828
60
- sglang/srt/models/yivl.py,sha256=Aoo_AlGu9PYMDvj6bQj9PX7Ui7-oIe9MArLe5N6FAno,4406
61
- sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
62
- sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
63
- sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
64
- sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
65
- sglang-0.1.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- sglang-0.1.15.dist-info/METADATA,sha256=9pKA1HIo0OFpZz-peDJCVekVjaZvqj55sK3n5Dchd4A,28727
67
- sglang-0.1.15.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
68
- sglang-0.1.15.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
69
- sglang-0.1.15.dist-info/RECORD,,