sglang 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sglang/__init__.py +3 -1
  2. sglang/api.py +7 -7
  3. sglang/backend/anthropic.py +1 -1
  4. sglang/backend/litellm.py +90 -0
  5. sglang/backend/openai.py +158 -11
  6. sglang/backend/runtime_endpoint.py +18 -10
  7. sglang/bench_latency.py +299 -0
  8. sglang/global_config.py +12 -2
  9. sglang/lang/compiler.py +2 -2
  10. sglang/lang/interpreter.py +114 -67
  11. sglang/lang/ir.py +28 -3
  12. sglang/launch_server.py +4 -1
  13. sglang/launch_server_llavavid.py +2 -1
  14. sglang/srt/constrained/__init__.py +13 -6
  15. sglang/srt/constrained/fsm_cache.py +8 -2
  16. sglang/srt/constrained/jump_forward.py +113 -25
  17. sglang/srt/conversation.py +2 -0
  18. sglang/srt/flush_cache.py +3 -1
  19. sglang/srt/hf_transformers_utils.py +130 -1
  20. sglang/srt/layers/extend_attention.py +17 -0
  21. sglang/srt/layers/fused_moe.py +582 -0
  22. sglang/srt/layers/logits_processor.py +65 -32
  23. sglang/srt/layers/radix_attention.py +41 -7
  24. sglang/srt/layers/token_attention.py +16 -1
  25. sglang/srt/managers/controller/dp_worker.py +113 -0
  26. sglang/srt/managers/{router → controller}/infer_batch.py +242 -100
  27. sglang/srt/managers/controller/manager_multi.py +191 -0
  28. sglang/srt/managers/{router/manager.py → controller/manager_single.py} +34 -14
  29. sglang/srt/managers/{router → controller}/model_runner.py +262 -158
  30. sglang/srt/managers/{router → controller}/radix_cache.py +11 -1
  31. sglang/srt/managers/{router/scheduler.py → controller/schedule_heuristic.py} +9 -7
  32. sglang/srt/managers/{router/model_rpc.py → controller/tp_worker.py} +298 -267
  33. sglang/srt/managers/detokenizer_manager.py +42 -46
  34. sglang/srt/managers/io_struct.py +22 -12
  35. sglang/srt/managers/tokenizer_manager.py +151 -87
  36. sglang/srt/model_config.py +83 -5
  37. sglang/srt/models/chatglm.py +399 -0
  38. sglang/srt/models/commandr.py +10 -13
  39. sglang/srt/models/dbrx.py +9 -15
  40. sglang/srt/models/gemma.py +12 -15
  41. sglang/srt/models/grok.py +738 -0
  42. sglang/srt/models/llama2.py +26 -15
  43. sglang/srt/models/llama_classification.py +104 -0
  44. sglang/srt/models/llava.py +86 -19
  45. sglang/srt/models/llavavid.py +11 -20
  46. sglang/srt/models/mixtral.py +282 -103
  47. sglang/srt/models/mixtral_quant.py +372 -0
  48. sglang/srt/models/qwen.py +9 -13
  49. sglang/srt/models/qwen2.py +11 -13
  50. sglang/srt/models/stablelm.py +9 -15
  51. sglang/srt/models/yivl.py +17 -22
  52. sglang/srt/openai_api_adapter.py +150 -95
  53. sglang/srt/openai_protocol.py +11 -2
  54. sglang/srt/server.py +124 -48
  55. sglang/srt/server_args.py +128 -48
  56. sglang/srt/utils.py +234 -67
  57. sglang/test/test_programs.py +65 -3
  58. sglang/test/test_utils.py +32 -1
  59. sglang/utils.py +23 -4
  60. {sglang-0.1.16.dist-info → sglang-0.1.18.dist-info}/METADATA +40 -27
  61. sglang-0.1.18.dist-info/RECORD +78 -0
  62. {sglang-0.1.16.dist-info → sglang-0.1.18.dist-info}/WHEEL +1 -1
  63. sglang/srt/backend_config.py +0 -13
  64. sglang/srt/models/dbrx_config.py +0 -281
  65. sglang/srt/weight_utils.py +0 -417
  66. sglang-0.1.16.dist-info/RECORD +0 -72
  67. {sglang-0.1.16.dist-info → sglang-0.1.18.dist-info}/LICENSE +0 -0
  68. {sglang-0.1.16.dist-info → sglang-0.1.18.dist-info}/top_level.txt +0 -0
@@ -1,417 +0,0 @@
1
- # The PR(https://github.com/vllm-project/vllm/pull/4097) of vllm borken the sglang code.
2
- # In order to adapt to the latest code without modifying too much code,
3
- # copied the previous vllm/model_executor/weight_utils.py
4
- # Copied in https://github.com/vllm-project/vllm/blob/05434764cd99990035779cf9a4ed86623b528825/vllm/model_executor/weight_utils.py
5
-
6
- """Utilities for downloading and initializing model weights."""
7
- import fnmatch
8
- import glob
9
- import hashlib
10
- import json
11
- import os
12
- from collections import defaultdict
13
- from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
14
-
15
- import filelock
16
- import huggingface_hub.constants
17
- import numpy as np
18
- import torch
19
- from huggingface_hub import HfFileSystem, snapshot_download
20
- from safetensors.torch import load_file, safe_open, save_file
21
- from tqdm.auto import tqdm
22
- from vllm.config import ModelConfig
23
- from vllm.logger import init_logger
24
- from vllm.model_executor.layers.quantization import (
25
- QuantizationConfig,
26
- get_quantization_config,
27
- )
28
- from vllm.model_executor.layers.quantization.schema import QuantParamSchema
29
-
30
- logger = init_logger(__name__)
31
-
32
- # use system-level temp directory for file locks, so that multiple users
33
- # can share the same lock without error.
34
- # lock files in the temp directory will be automatically deleted when the
35
- # system reboots, so users will not complain about annoying lock files
36
- temp_dir = (
37
- os.environ.get("TMPDIR")
38
- or os.environ.get("TEMP")
39
- or os.environ.get("TMP")
40
- or "/tmp/"
41
- )
42
-
43
-
44
- def enable_hf_transfer():
45
- """automatically activates hf_transfer"""
46
- if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
47
- try:
48
- # enable hf hub transfer if available
49
- import hf_transfer # type: ignore # noqa
50
-
51
- huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
52
- except ImportError:
53
- pass
54
-
55
-
56
- enable_hf_transfer()
57
-
58
-
59
- class Disabledtqdm(tqdm):
60
-
61
- def __init__(self, *args, **kwargs):
62
- super().__init__(*args, **kwargs, disable=True)
63
-
64
-
65
- def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
66
- lock_dir = cache_dir or temp_dir
67
- os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
68
- model_name = model_name_or_path.replace("/", "-")
69
- hash_name = hashlib.sha256(model_name.encode()).hexdigest()
70
- # add hash to avoid conflict with old users' lock files
71
- lock_file_name = hash_name + model_name + ".lock"
72
- # mode 0o666 is required for the filelock to be shared across users
73
- lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
74
- return lock
75
-
76
-
77
- def _shared_pointers(tensors):
78
- ptrs = defaultdict(list)
79
- for k, v in tensors.items():
80
- ptrs[v.data_ptr()].append(k)
81
- failing = []
82
- for _, names in ptrs.items():
83
- if len(names) > 1:
84
- failing.append(names)
85
- return failing
86
-
87
-
88
- def convert_bin_to_safetensor_file(
89
- pt_filename: str,
90
- sf_filename: str,
91
- ) -> None:
92
- loaded = torch.load(pt_filename, map_location="cpu")
93
- if "state_dict" in loaded:
94
- loaded = loaded["state_dict"]
95
- shared = _shared_pointers(loaded)
96
- for shared_weights in shared:
97
- for name in shared_weights[1:]:
98
- loaded.pop(name)
99
-
100
- # For tensors to be contiguous
101
- loaded = {k: v.contiguous() for k, v in loaded.items()}
102
-
103
- dirname = os.path.dirname(sf_filename)
104
- os.makedirs(dirname, exist_ok=True)
105
- save_file(loaded, sf_filename, metadata={"format": "pt"})
106
-
107
- # check file size
108
- sf_size = os.stat(sf_filename).st_size
109
- pt_size = os.stat(pt_filename).st_size
110
- if (sf_size - pt_size) / pt_size > 0.01:
111
- raise RuntimeError(
112
- f"""The file size different is more than 1%:
113
- - {sf_filename}: {sf_size}
114
- - {pt_filename}: {pt_size}
115
- """
116
- )
117
-
118
- # check if the tensors are the same
119
- reloaded = load_file(sf_filename)
120
- for k in loaded:
121
- pt_tensor = loaded[k]
122
- sf_tensor = reloaded[k]
123
- if not torch.equal(pt_tensor, sf_tensor):
124
- raise RuntimeError(f"The output tensors do not match for key {k}")
125
-
126
-
127
- # TODO(woosuk): Move this to other place.
128
- def get_quant_config(model_config: ModelConfig) -> QuantizationConfig:
129
- quant_cls = get_quantization_config(model_config.quantization)
130
- # Read the quantization config from the HF model config, if available.
131
- hf_quant_config = getattr(model_config.hf_config, "quantization_config", None)
132
- if hf_quant_config is not None:
133
- return quant_cls.from_config(hf_quant_config)
134
- model_name_or_path = model_config.model
135
- is_local = os.path.isdir(model_name_or_path)
136
- if not is_local:
137
- # Download the config files.
138
- with get_lock(model_name_or_path, model_config.download_dir):
139
- hf_folder = snapshot_download(
140
- model_name_or_path,
141
- revision=model_config.revision,
142
- allow_patterns="*.json",
143
- cache_dir=model_config.download_dir,
144
- tqdm_class=Disabledtqdm,
145
- )
146
- else:
147
- hf_folder = model_name_or_path
148
- config_files = glob.glob(os.path.join(hf_folder, "*.json"))
149
-
150
- quant_config_files = [
151
- f
152
- for f in config_files
153
- if any(f.endswith(x) for x in quant_cls.get_config_filenames())
154
- ]
155
- if len(quant_config_files) == 0:
156
- raise ValueError(f"Cannot find the config file for {model_config.quantization}")
157
- if len(quant_config_files) > 1:
158
- raise ValueError(
159
- f"Found multiple config files for {model_config.quantization}: "
160
- f"{quant_config_files}"
161
- )
162
-
163
- quant_config_file = quant_config_files[0]
164
- with open(quant_config_file, "r") as f:
165
- config = json.load(f)
166
- return quant_cls.from_config(config)
167
-
168
-
169
- def prepare_hf_model_weights(
170
- model_name_or_path: str,
171
- cache_dir: Optional[str] = None,
172
- load_format: str = "auto",
173
- fall_back_to_pt: bool = True,
174
- revision: Optional[str] = None,
175
- ) -> Tuple[str, List[str], bool]:
176
- # Download model weights from huggingface.
177
- is_local = os.path.isdir(model_name_or_path) and load_format != "tensorizer"
178
- use_safetensors = False
179
- # Some quantized models use .pt files for storing the weights.
180
- if load_format == "auto":
181
- allow_patterns = ["*.safetensors", "*.bin"]
182
- elif load_format == "safetensors":
183
- use_safetensors = True
184
- allow_patterns = ["*.safetensors"]
185
- elif load_format == "pt":
186
- allow_patterns = ["*.pt"]
187
- elif load_format == "npcache":
188
- allow_patterns = ["*.bin"]
189
- elif load_format == "tensorizer":
190
- allow_patterns = ["*.tensors"]
191
- else:
192
- raise ValueError(f"Unknown load_format: {load_format}")
193
-
194
- if fall_back_to_pt:
195
- allow_patterns += ["*.pt"]
196
-
197
- if not is_local and load_format != "tensorizer":
198
- # Before we download we look at that is available:
199
- fs = HfFileSystem()
200
- file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
201
-
202
- # depending on what is available we download different things
203
- for pattern in allow_patterns:
204
- matching = fnmatch.filter(file_list, pattern)
205
- if len(matching) > 0:
206
- allow_patterns = [pattern]
207
- break
208
-
209
- logger.info(f"Using model weights format {allow_patterns}")
210
- # Use file lock to prevent multiple processes from
211
- # downloading the same model weights at the same time.
212
- with get_lock(model_name_or_path, cache_dir):
213
- hf_folder = snapshot_download(
214
- model_name_or_path,
215
- allow_patterns=allow_patterns,
216
- cache_dir=cache_dir,
217
- tqdm_class=Disabledtqdm,
218
- revision=revision,
219
- )
220
- else:
221
- hf_folder = model_name_or_path
222
- hf_weights_files: List[str] = []
223
- for pattern in allow_patterns:
224
- hf_weights_files += glob.glob(os.path.join(hf_folder, pattern))
225
- if len(hf_weights_files) > 0:
226
- if pattern == "*.safetensors":
227
- use_safetensors = True
228
- break
229
- if not use_safetensors:
230
- # Exclude files that are not needed for inference.
231
- # https://github.com/huggingface/transformers/blob/v4.34.0/src/transformers/trainer.py#L227-L233
232
- blacklist = [
233
- "training_args.bin",
234
- "optimizer.bin",
235
- "optimizer.pt",
236
- "scheduler.pt",
237
- "scaler.pt",
238
- ]
239
- hf_weights_files = [
240
- f for f in hf_weights_files if not any(f.endswith(x) for x in blacklist)
241
- ]
242
-
243
- if load_format == "tensorizer":
244
- return hf_folder, hf_weights_files, use_safetensors
245
-
246
- if len(hf_weights_files) == 0:
247
- raise RuntimeError(f"Cannot find any model weights with `{model_name_or_path}`")
248
-
249
- return hf_folder, hf_weights_files, use_safetensors
250
-
251
-
252
- def hf_model_weights_iterator(
253
- model_name_or_path: str,
254
- cache_dir: Optional[str] = None,
255
- load_format: Union[Tuple, str] = "auto",
256
- revision: Optional[str] = None,
257
- fall_back_to_pt: Optional[bool] = True,
258
- ) -> Iterator[Tuple[str, torch.Tensor]]:
259
- hf_folder, hf_weights_files, use_safetensors = prepare_hf_model_weights(
260
- model_name_or_path,
261
- cache_dir=cache_dir,
262
- load_format=load_format,
263
- fall_back_to_pt=fall_back_to_pt,
264
- revision=revision,
265
- )
266
-
267
- if load_format == "npcache":
268
- # Currently np_cache only support *.bin checkpoints
269
- assert use_safetensors is False
270
-
271
- # Convert the model weights from torch tensors to numpy arrays for
272
- # faster loading.
273
- np_folder = os.path.join(hf_folder, "np")
274
- os.makedirs(np_folder, exist_ok=True)
275
- weight_names_file = os.path.join(np_folder, "weight_names.json")
276
- # Use file lock to prevent multiple processes from
277
- # dumping the same model weights to numpy at the same time.
278
- with get_lock(model_name_or_path, cache_dir):
279
- if not os.path.exists(weight_names_file):
280
- weight_names = []
281
- for bin_file in hf_weights_files:
282
- state = torch.load(bin_file, map_location="cpu")
283
- for name, param in state.items():
284
- param_path = os.path.join(np_folder, name)
285
- with open(param_path, "wb") as f:
286
- np.save(f, param.cpu().detach().numpy())
287
- weight_names.append(name)
288
- with open(weight_names_file, "w") as f:
289
- json.dump(weight_names, f)
290
-
291
- with open(weight_names_file, "r") as f:
292
- weight_names = json.load(f)
293
-
294
- for name in weight_names:
295
- param_path = os.path.join(np_folder, name)
296
- with open(param_path, "rb") as f:
297
- param = np.load(f)
298
- yield name, torch.from_numpy(param)
299
- elif load_format == "tensorizer":
300
- from vllm.model_executor.tensorizer_loader import (
301
- TensorDeserializer,
302
- open_stream,
303
- tensorizer_warning,
304
- )
305
-
306
- tensorizer_args = load_format.params
307
- tensorizer_warning(
308
- "Deserializing HuggingFace models is not optimized for "
309
- "loading on vLLM, as tensorizer is forced to load to CPU. "
310
- "Consider deserializing a vLLM model instead for faster "
311
- "load times. See the examples/tensorize_vllm_model.py example "
312
- "script for serializing vLLM models."
313
- )
314
-
315
- deserializer_args = tensorizer_args.deserializer_params
316
- stream_params = tensorizer_args.stream_params
317
- stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
318
- with TensorDeserializer(stream, **deserializer_args, device="cpu") as state:
319
- for name, param in state.items():
320
- yield name, param
321
- del state
322
- elif use_safetensors:
323
- for st_file in hf_weights_files:
324
- with safe_open(st_file, framework="pt") as f:
325
- for name in f.keys(): # noqa: SIM118
326
- param = f.get_tensor(name)
327
- yield name, param
328
- else:
329
- for bin_file in hf_weights_files:
330
- state = torch.load(bin_file, map_location="cpu")
331
- for name, param in state.items():
332
- yield name, param
333
- del state
334
- torch.cuda.empty_cache()
335
-
336
-
337
- def kv_cache_scales_loader(
338
- filename: str,
339
- tp_rank: int,
340
- tp_size: int,
341
- num_hidden_layers: int,
342
- model_type: Optional[str],
343
- ) -> Iterable[Tuple[int, float]]:
344
- """
345
- A simple utility to read in KV cache scaling factors that have been
346
- previously serialized to disk. Used by the model to populate the appropriate
347
- KV cache scaling factors. The serialization should represent a dictionary
348
- whose keys are the TP ranks and values are another dictionary mapping layers
349
- to their KV cache scaling factors.
350
- Keep this function in sync with the output of examples/fp8/extract_scales.py
351
- """
352
- try:
353
- with open(filename) as f:
354
- context = {
355
- "model_type": model_type,
356
- "num_hidden_layers": num_hidden_layers,
357
- "tp_rank": tp_rank,
358
- "tp_size": tp_size,
359
- }
360
- schema_dct = json.load(f)
361
- schema = QuantParamSchema.model_validate(schema_dct, context=context)
362
- layer_scales_map = schema.kv_cache.scaling_factor[tp_rank]
363
- return layer_scales_map.items()
364
-
365
- except FileNotFoundError:
366
- logger.error(f"File or directory '{filename}' not found.")
367
- except json.JSONDecodeError:
368
- logger.error(f"Error decoding JSON in file '{filename}'.")
369
- except Exception as e:
370
- logger.error(f"An error occurred while reading '{filename}': {e}")
371
- # This section is reached if and only if any of the excepts are hit
372
- # Return an empty iterable (list) => no KV cache scales are loaded
373
- # which ultimately defaults to 1.0 scales
374
- logger.warning(
375
- "Defaulting to KV cache scaling factors = 1.0 "
376
- f"for all layers in TP rank {tp_rank} "
377
- "as an error occurred during loading."
378
- )
379
- return []
380
-
381
-
382
- def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
383
- """convert PySafeSlice object from safetensors to torch.Tensor
384
-
385
- PySafeSlice object supports indexing, which is done before loading the
386
- actual tensor and can reduce the amount of memory being read into the
387
- memory. However, it does not support more advanced functionalities
388
- like `.view()` or `.t()`. Therefore, if we need to modify the loaded
389
- tensor with these more complicated operators, we need to convert to
390
- tensor first.
391
- """
392
- if not isinstance(x, torch.Tensor):
393
- x = x[:]
394
- return x
395
-
396
-
397
- def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
398
- """Default weight loader."""
399
- assert param.size() == loaded_weight.size()
400
- param.data.copy_(loaded_weight)
401
-
402
-
403
- def initialize_dummy_weights(
404
- model: torch.nn.Module,
405
- low: float = -1e-3,
406
- high: float = 1e-3,
407
- ) -> None:
408
- """Initialize model weights with random values.
409
-
410
- The model weights must be randomly initialized for accurate performance
411
- measurements. Additionally, the model weights should not cause NaNs in the
412
- forward pass. We empirically found that initializing the weights with
413
- values between -1e-3 and 1e-3 works well for most models.
414
- """
415
- for param in model.state_dict().values():
416
- if torch.is_floating_point(param):
417
- param.data.uniform_(low, high)
@@ -1,72 +0,0 @@
1
- sglang/__init__.py,sha256=lKabCNZM2OhtymVLUuW4bpt-Jdxwk81wP1TkhVqIJEg,1058
2
- sglang/api.py,sha256=hnVPt_p2ALLrraAKpVbkGocVtgb0MqgOH5NUQKOA6sY,4548
3
- sglang/global_config.py,sha256=LxoF7VGCYszeEafC8zBbzUQ5PPFdv2rPzw2zEGPLgfg,961
4
- sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
5
- sglang/launch_server_llavavid.py,sha256=UWo_qUCJ9yknp1TVPzrz4B_aZtEuQpLQq0l96FMgynI,1058
6
- sglang/utils.py,sha256=Xp5mmhLoXNLB5U0NmCg-WMkfV0Ov4KVqzOvGZa3XKmc,7610
7
- sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
9
- sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
10
- sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
11
- sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
12
- sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
13
- sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- sglang/lang/chat_template.py,sha256=ogIT8iMlDcSEgcNBTh5pRLoCkdQI_ec5Hc27wFUFDIg,11532
15
- sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
16
- sglang/lang/interpreter.py,sha256=GSIbO9N6ThfchdURb7XzQMZ9U6p1xirKHgXGmqLxKtg,28434
17
- sglang/lang/ir.py,sha256=NxvIWlUidvtpQpPG4GAXZEN64Y2vLOBjN2Z2JkZVG1U,13350
18
- sglang/lang/tracer.py,sha256=QcslAObEjepk8XmiqCobwzWaDpihofEQXjeRs_3B8NQ,8282
19
- sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
20
- sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
21
- sglang/srt/flush_cache.py,sha256=JOXLH4pmweVbuEWDPu3SEDrLYFG82nR2SpzbslW4b-A,381
22
- sglang/srt/hf_transformers_utils.py,sha256=UneOMsw3w7taH9EKIi6uHZ-GNUZG0vbZIWN-ZoQZ5gM,5417
23
- sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
24
- sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
25
- sglang/srt/model_config.py,sha256=843L1KxEPZcEk1uwQH10BwSX9L5DYJ3OGUUBo8wMdZg,1695
26
- sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
27
- sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
28
- sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
29
- sglang/srt/server.py,sha256=YAUiniJs9ebNrJ0Lweg2TnUL_yZ0P3PtWoT0Z_3d8vk,10371
30
- sglang/srt/server_args.py,sha256=TQxIEdF0crqtY6WfZ6q7SKOQcCSomBEVjJ5K4HyTSvQ,9539
31
- sglang/srt/utils.py,sha256=cr2uZmEB-Exq-wi3Y8B3yQu7kFUiyV4PAvzouvKYkWg,13090
32
- sglang/srt/weight_utils.py,sha256=bFNh9-T8gseB0zKeu1qsMww8FpyrGFxbPcOFSeJtL5Q,15505
33
- sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
34
- sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
35
- sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
36
- sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
37
- sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
38
- sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
39
- sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
40
- sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
41
- sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
42
- sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
43
- sglang/srt/managers/io_struct.py,sha256=fFfUQtC-D31xGYdCAfuNVuX3QyaNDgGpfzC8qnKt0YA,4294
44
- sglang/srt/managers/tokenizer_manager.py,sha256=TlGyFhWz1b24vkeUVvCwKFBERffi-esxGRhoukBnET8,13116
45
- sglang/srt/managers/router/infer_batch.py,sha256=a1F3EjSBdER5pbgZFifuTdrE2Xom8Mt4aT9rmB8n35M,20204
46
- sglang/srt/managers/router/manager.py,sha256=tdvYmwGHMeG2MMYZ4ZThdAJ_b4fp94UpemISFWOddno,2697
47
- sglang/srt/managers/router/model_rpc.py,sha256=FJFgf1KAJ0Z8Yq4EPyczxZkCmZBjwNwCwXcjwyhU0k4,29775
48
- sglang/srt/managers/router/model_runner.py,sha256=fp9wPh4sQY6Q-5PVtv_e9p5GgkkixSDUIqfFt7lVlV8,16527
49
- sglang/srt/managers/router/radix_cache.py,sha256=GE6oY8bppRJCIxZWiDKO4P6al58zcqLQe605Y1d2bdo,7924
50
- sglang/srt/managers/router/scheduler.py,sha256=pvlKSyCyIXmu14eyy1mvP9-QdG78eLUqMlr4cnfes2Y,2259
51
- sglang/srt/models/commandr.py,sha256=DVdUF5C5etm82RoXJTNjYqlS2W2_9awzxzXNMubRoVg,13579
52
- sglang/srt/models/dbrx.py,sha256=NIhlJp2B_y_L1ltK_Y7SEenAiHTUUp3p1rf8LIydC0o,14173
53
- sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
54
- sglang/srt/models/gemma.py,sha256=Wk25zFkqkdG62xVVJEzeIjDES1LnoO0EY2W2p9XMvbA,11637
55
- sglang/srt/models/llama2.py,sha256=Y2XwS5XXG77OfPAvbju7zp53CP5izzee_4-laVqu5ZM,11655
56
- sglang/srt/models/llava.py,sha256=HtR7lUnAYW39vWw6xmDZkbG7AueswZDJxXeu6rQfpSU,14921
57
- sglang/srt/models/llavavid.py,sha256=ueImEwOR4ZlNFUoBvXbwZPNRcrYWg54sPNK7pmGnrp0,13219
58
- sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
59
- sglang/srt/models/mixtral.py,sha256=1aggGw0P0MVQu5C5D3pMaZpRpY_PmrK_nwBOygOlPEM,13839
60
- sglang/srt/models/qwen.py,sha256=cakvxjghKdGg5iGq9TJ_nGlVQaJ4-9V91EyyZnV4rmc,9390
61
- sglang/srt/models/qwen2.py,sha256=PyOA8-RA_frRVLXfh8d1Ui1hUd1YmM3GfsPw2q5rCDI,11351
62
- sglang/srt/models/stablelm.py,sha256=TCfQumj0acu2lCGujJj_PuzHFp3kFIwENQEfT-hnHUA,10867
63
- sglang/srt/models/yivl.py,sha256=q8MUvIFIWpKCQ4pSZBoFpw-pnbdjkfr-M8jBJfGFu7E,4393
64
- sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
65
- sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
66
- sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
67
- sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
68
- sglang-0.1.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- sglang-0.1.16.dist-info/METADATA,sha256=yiziPDpVr6NPPhX58sA0GaLYKCut4FnBKD7TE50HH6k,28911
70
- sglang-0.1.16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
- sglang-0.1.16.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
72
- sglang-0.1.16.dist-info/RECORD,,