sglang 0.2.9.post1__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +8 -0
- sglang/api.py +10 -2
- sglang/bench_latency.py +234 -74
- sglang/check_env.py +25 -2
- sglang/global_config.py +0 -1
- sglang/lang/backend/base_backend.py +3 -1
- sglang/lang/backend/openai.py +8 -3
- sglang/lang/backend/runtime_endpoint.py +46 -40
- sglang/lang/choices.py +164 -0
- sglang/lang/interpreter.py +6 -13
- sglang/lang/ir.py +11 -2
- sglang/srt/hf_transformers_utils.py +2 -2
- sglang/srt/layers/extend_attention.py +59 -7
- sglang/srt/layers/logits_processor.py +1 -1
- sglang/srt/layers/radix_attention.py +24 -14
- sglang/srt/layers/token_attention.py +28 -2
- sglang/srt/managers/io_struct.py +9 -4
- sglang/srt/managers/schedule_batch.py +98 -323
- sglang/srt/managers/tokenizer_manager.py +34 -16
- sglang/srt/managers/tp_worker.py +20 -22
- sglang/srt/mem_cache/memory_pool.py +74 -38
- sglang/srt/model_config.py +11 -0
- sglang/srt/model_executor/cuda_graph_runner.py +3 -3
- sglang/srt/model_executor/forward_batch_info.py +256 -0
- sglang/srt/model_executor/model_runner.py +51 -26
- sglang/srt/models/chatglm.py +1 -1
- sglang/srt/models/commandr.py +1 -1
- sglang/srt/models/dbrx.py +1 -1
- sglang/srt/models/deepseek.py +1 -1
- sglang/srt/models/deepseek_v2.py +199 -17
- sglang/srt/models/gemma.py +1 -1
- sglang/srt/models/gemma2.py +1 -1
- sglang/srt/models/gpt_bigcode.py +1 -1
- sglang/srt/models/grok.py +1 -1
- sglang/srt/models/internlm2.py +1 -1
- sglang/srt/models/llama2.py +1 -1
- sglang/srt/models/llama_classification.py +1 -1
- sglang/srt/models/llava.py +1 -2
- sglang/srt/models/llavavid.py +1 -2
- sglang/srt/models/minicpm.py +1 -1
- sglang/srt/models/mixtral.py +1 -1
- sglang/srt/models/mixtral_quant.py +1 -1
- sglang/srt/models/qwen.py +1 -1
- sglang/srt/models/qwen2.py +1 -1
- sglang/srt/models/qwen2_moe.py +1 -1
- sglang/srt/models/stablelm.py +1 -1
- sglang/srt/openai_api/adapter.py +151 -29
- sglang/srt/openai_api/protocol.py +7 -1
- sglang/srt/server.py +111 -84
- sglang/srt/server_args.py +12 -2
- sglang/srt/utils.py +25 -20
- sglang/test/run_eval.py +21 -10
- sglang/test/runners.py +237 -0
- sglang/test/simple_eval_common.py +12 -12
- sglang/test/simple_eval_gpqa.py +92 -0
- sglang/test/simple_eval_humaneval.py +5 -5
- sglang/test/simple_eval_math.py +72 -0
- sglang/test/test_utils.py +95 -14
- sglang/utils.py +15 -37
- sglang/version.py +1 -1
- {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/METADATA +59 -48
- sglang-0.2.11.dist-info/RECORD +102 -0
- sglang-0.2.9.post1.dist-info/RECORD +0 -97
- {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/LICENSE +0 -0
- {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/WHEEL +0 -0
- {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/top_level.txt +0 -0
sglang/utils.py
CHANGED
@@ -12,6 +12,7 @@ import urllib.request
|
|
12
12
|
from concurrent.futures import ThreadPoolExecutor
|
13
13
|
from io import BytesIO
|
14
14
|
from json import dumps
|
15
|
+
from typing import Union
|
15
16
|
|
16
17
|
import numpy as np
|
17
18
|
import requests
|
@@ -25,7 +26,7 @@ def get_exception_traceback():
|
|
25
26
|
return err_str
|
26
27
|
|
27
28
|
|
28
|
-
def is_same_type(values):
|
29
|
+
def is_same_type(values: list):
|
29
30
|
"""Return whether the elements in values are of the same type."""
|
30
31
|
if len(values) <= 1:
|
31
32
|
return True
|
@@ -45,7 +46,7 @@ def read_jsonl(filename: str):
|
|
45
46
|
return rets
|
46
47
|
|
47
48
|
|
48
|
-
def dump_state_text(filename, states, mode="w"):
|
49
|
+
def dump_state_text(filename: str, states: list, mode: str = "w"):
|
49
50
|
"""Dump program state in a text file."""
|
50
51
|
from sglang.lang.interpreter import ProgramState
|
51
52
|
|
@@ -75,19 +76,13 @@ class HttpResponse:
|
|
75
76
|
return self.resp.status
|
76
77
|
|
77
78
|
|
78
|
-
def http_request(
|
79
|
-
url, json=None, stream=False, auth_token=None, api_key=None, verify=None
|
80
|
-
):
|
79
|
+
def http_request(url, json=None, stream=False, api_key=None, verify=None):
|
81
80
|
"""A faster version of requests.post with low-level urllib API."""
|
82
81
|
headers = {"Content-Type": "application/json; charset=utf-8"}
|
83
82
|
|
84
|
-
# add the Authorization header if an
|
85
|
-
if auth_token is not None:
|
86
|
-
headers["Authorization"] = f"Bearer {auth_token}"
|
87
|
-
|
88
|
-
# add the API Key header if an API key is provided
|
83
|
+
# add the Authorization header if an api key is provided
|
89
84
|
if api_key is not None:
|
90
|
-
headers["
|
85
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
91
86
|
|
92
87
|
if stream:
|
93
88
|
return requests.post(url, json=json, stream=True, headers=headers)
|
@@ -105,7 +100,7 @@ def http_request(
|
|
105
100
|
return HttpResponse(e)
|
106
101
|
|
107
102
|
|
108
|
-
def encode_image_base64(image_path):
|
103
|
+
def encode_image_base64(image_path: Union[str, bytes]):
|
109
104
|
"""Encode an image in base64."""
|
110
105
|
if isinstance(image_path, str):
|
111
106
|
with open(image_path, "rb") as image_file:
|
@@ -144,7 +139,7 @@ def encode_frame(frame):
|
|
144
139
|
return frame_bytes
|
145
140
|
|
146
141
|
|
147
|
-
def encode_video_base64(video_path, num_frames=16):
|
142
|
+
def encode_video_base64(video_path: str, num_frames: int = 16):
|
148
143
|
import cv2 # pip install opencv-python-headless
|
149
144
|
|
150
145
|
cap = cv2.VideoCapture(video_path)
|
@@ -190,7 +185,7 @@ def encode_video_base64(video_path, num_frames=16):
|
|
190
185
|
return video_base64
|
191
186
|
|
192
187
|
|
193
|
-
def _is_chinese_char(cp):
|
188
|
+
def _is_chinese_char(cp: int):
|
194
189
|
"""Checks whether CP is the codepoint of a CJK character."""
|
195
190
|
# This defines a "chinese character" as anything in the CJK Unicode block:
|
196
191
|
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
|
@@ -215,7 +210,7 @@ def _is_chinese_char(cp):
|
|
215
210
|
return False
|
216
211
|
|
217
212
|
|
218
|
-
def find_printable_text(text):
|
213
|
+
def find_printable_text(text: str):
|
219
214
|
"""Returns the longest printable substring of text that contains only entire words."""
|
220
215
|
# Borrowed from https://github.com/huggingface/transformers/blob/061580c82c2db1de9139528243e105953793f7a2/src/transformers/generation/streamers.py#L99
|
221
216
|
|
@@ -234,26 +229,7 @@ def find_printable_text(text):
|
|
234
229
|
return text[: text.rfind(" ") + 1]
|
235
230
|
|
236
231
|
|
237
|
-
def
|
238
|
-
"""Run a function with timeout."""
|
239
|
-
ret_value = []
|
240
|
-
|
241
|
-
def _target_func():
|
242
|
-
ret_value.append(func(*args, **(kwargs or {})))
|
243
|
-
|
244
|
-
t = threading.Thread(target=_target_func)
|
245
|
-
t.start()
|
246
|
-
t.join(timeout=timeout)
|
247
|
-
if t.is_alive():
|
248
|
-
raise TimeoutError()
|
249
|
-
|
250
|
-
if not ret_value:
|
251
|
-
raise RuntimeError()
|
252
|
-
|
253
|
-
return ret_value[0]
|
254
|
-
|
255
|
-
|
256
|
-
def graceful_registry(sub_module_name):
|
232
|
+
def graceful_registry(sub_module_name: str):
|
257
233
|
def graceful_shutdown(signum, frame):
|
258
234
|
logger.info(
|
259
235
|
f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
|
@@ -265,7 +241,9 @@ def graceful_registry(sub_module_name):
|
|
265
241
|
|
266
242
|
|
267
243
|
class LazyImport:
|
268
|
-
|
244
|
+
"""Lazy import to make `import sglang` run faster."""
|
245
|
+
|
246
|
+
def __init__(self, module_name: str, class_name: str):
|
269
247
|
self.module_name = module_name
|
270
248
|
self.class_name = class_name
|
271
249
|
self._module = None
|
@@ -276,7 +254,7 @@ class LazyImport:
|
|
276
254
|
self._module = getattr(module, self.class_name)
|
277
255
|
return self._module
|
278
256
|
|
279
|
-
def __getattr__(self, name):
|
257
|
+
def __getattr__(self, name: str):
|
280
258
|
module = self._load()
|
281
259
|
return getattr(module, name)
|
282
260
|
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.11"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.11
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -215,34 +215,41 @@ Requires-Dist: requests
|
|
215
215
|
Requires-Dist: tqdm
|
216
216
|
Requires-Dist: numpy
|
217
217
|
Provides-Extra: all
|
218
|
-
Requires-Dist: sglang[srt]
|
219
|
-
Requires-Dist: sglang[openai]
|
220
|
-
Requires-Dist: sglang[anthropic]
|
221
|
-
Requires-Dist: sglang[litellm]
|
218
|
+
Requires-Dist: sglang[srt]; extra == "all"
|
219
|
+
Requires-Dist: sglang[openai]; extra == "all"
|
220
|
+
Requires-Dist: sglang[anthropic]; extra == "all"
|
221
|
+
Requires-Dist: sglang[litellm]; extra == "all"
|
222
222
|
Provides-Extra: anthropic
|
223
|
-
Requires-Dist: anthropic
|
223
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
224
|
+
Provides-Extra: dev
|
225
|
+
Requires-Dist: sglang[all]; extra == "dev"
|
226
|
+
Requires-Dist: sglang[test]; extra == "dev"
|
224
227
|
Provides-Extra: litellm
|
225
|
-
Requires-Dist: litellm
|
228
|
+
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
226
229
|
Provides-Extra: openai
|
227
|
-
Requires-Dist: openai
|
228
|
-
Requires-Dist: tiktoken
|
230
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
231
|
+
Requires-Dist: tiktoken; extra == "openai"
|
229
232
|
Provides-Extra: srt
|
230
|
-
Requires-Dist: aiohttp
|
231
|
-
Requires-Dist: fastapi
|
232
|
-
Requires-Dist: hf-transfer
|
233
|
-
Requires-Dist: huggingface-hub
|
234
|
-
Requires-Dist: interegular
|
235
|
-
Requires-Dist: packaging
|
236
|
-
Requires-Dist: pillow
|
237
|
-
Requires-Dist: psutil
|
238
|
-
Requires-Dist: pydantic
|
239
|
-
Requires-Dist:
|
240
|
-
Requires-Dist:
|
241
|
-
Requires-Dist:
|
242
|
-
Requires-Dist:
|
243
|
-
Requires-Dist:
|
244
|
-
Requires-Dist:
|
245
|
-
Requires-Dist:
|
233
|
+
Requires-Dist: aiohttp; extra == "srt"
|
234
|
+
Requires-Dist: fastapi; extra == "srt"
|
235
|
+
Requires-Dist: hf-transfer; extra == "srt"
|
236
|
+
Requires-Dist: huggingface-hub; extra == "srt"
|
237
|
+
Requires-Dist: interegular; extra == "srt"
|
238
|
+
Requires-Dist: packaging; extra == "srt"
|
239
|
+
Requires-Dist: pillow; extra == "srt"
|
240
|
+
Requires-Dist: psutil; extra == "srt"
|
241
|
+
Requires-Dist: pydantic; extra == "srt"
|
242
|
+
Requires-Dist: python-multipart; extra == "srt"
|
243
|
+
Requires-Dist: torch; extra == "srt"
|
244
|
+
Requires-Dist: uvicorn; extra == "srt"
|
245
|
+
Requires-Dist: uvloop; extra == "srt"
|
246
|
+
Requires-Dist: zmq; extra == "srt"
|
247
|
+
Requires-Dist: vllm==0.5.4; extra == "srt"
|
248
|
+
Requires-Dist: outlines>=0.0.44; extra == "srt"
|
249
|
+
Provides-Extra: test
|
250
|
+
Requires-Dist: jsonlines; extra == "test"
|
251
|
+
Requires-Dist: matplotlib; extra == "test"
|
252
|
+
Requires-Dist: pandas; extra == "test"
|
246
253
|
|
247
254
|
<div align="center">
|
248
255
|
<img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400"></img>
|
@@ -295,20 +302,20 @@ pip install --upgrade pip
|
|
295
302
|
pip install "sglang[all]"
|
296
303
|
|
297
304
|
# Install FlashInfer CUDA kernels
|
298
|
-
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
|
305
|
+
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
299
306
|
```
|
300
307
|
|
301
308
|
### Method 2: From source
|
302
309
|
```
|
303
|
-
# Use the
|
304
|
-
git clone -b v0.2.
|
310
|
+
# Use the last release branch
|
311
|
+
git clone -b v0.2.11 https://github.com/sgl-project/sglang.git
|
305
312
|
cd sglang
|
306
313
|
|
307
314
|
pip install --upgrade pip
|
308
315
|
pip install -e "python[all]"
|
309
316
|
|
310
317
|
# Install FlashInfer CUDA kernels
|
311
|
-
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.
|
318
|
+
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
312
319
|
```
|
313
320
|
|
314
321
|
### Method 3: Using docker
|
@@ -382,7 +389,7 @@ response = client.chat.completions.create(
|
|
382
389
|
print(response)
|
383
390
|
```
|
384
391
|
|
385
|
-
It supports streaming, vision, and most features of the Chat/Completions/Models endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/).
|
392
|
+
It supports streaming, vision, and most features of the Chat/Completions/Models/Batch endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/).
|
386
393
|
|
387
394
|
### Additional Server Arguments
|
388
395
|
- Add `--tp 2` to enable tensor parallelism. If it indicates `peer access is not supported between these two devices`, add `--enable-p2p-check` option.
|
@@ -393,10 +400,14 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
393
400
|
```
|
394
401
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --dp 2 --tp 2
|
395
402
|
```
|
396
|
-
- If you see out-of-memory errors during serving, please try to reduce the memory usage of the KV cache pool by setting a smaller value of `--mem-fraction-static`. The default value is `0.9
|
403
|
+
- If you see out-of-memory errors during serving, please try to reduce the memory usage of the KV cache pool by setting a smaller value of `--mem-fraction-static`. The default value is `0.9`.
|
397
404
|
```
|
398
405
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
|
399
406
|
```
|
407
|
+
- If you see out-of-memory errors during prefill for long prompts on a model that supports long context, consider using chunked prefill.
|
408
|
+
```
|
409
|
+
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --chunked-prefill-size 8192
|
410
|
+
```
|
400
411
|
- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
401
412
|
- Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
|
402
413
|
```
|
@@ -410,22 +421,6 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
410
421
|
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
411
422
|
- To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
|
412
423
|
|
413
|
-
### Run Llama 3.1 405B
|
414
|
-
|
415
|
-
```bash
|
416
|
-
## Run 405B (fp8) on a single node
|
417
|
-
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
|
418
|
-
|
419
|
-
## Run 405B (fp16) on two nodes
|
420
|
-
# replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
|
421
|
-
|
422
|
-
# on the first node
|
423
|
-
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
|
424
|
-
|
425
|
-
# on the second
|
426
|
-
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
|
427
|
-
```
|
428
|
-
|
429
424
|
### Supported Models
|
430
425
|
|
431
426
|
- Llama / Llama 2 / Llama 3 / Llama 3.1
|
@@ -451,9 +446,25 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
|
|
451
446
|
|
452
447
|
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
|
453
448
|
|
449
|
+
### Run Llama 3.1 405B
|
450
|
+
|
451
|
+
```bash
|
452
|
+
## Run 405B (fp8) on a single node
|
453
|
+
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
|
454
|
+
|
455
|
+
## Run 405B (fp16) on two nodes
|
456
|
+
# replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
|
457
|
+
|
458
|
+
# on the first node
|
459
|
+
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
|
460
|
+
|
461
|
+
# on the second
|
462
|
+
GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
|
463
|
+
```
|
464
|
+
|
454
465
|
### Benchmark Performance
|
455
466
|
|
456
|
-
- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as
|
467
|
+
- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`.
|
457
468
|
```
|
458
469
|
python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
|
459
470
|
```
|
@@ -0,0 +1,102 @@
|
|
1
|
+
sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
|
2
|
+
sglang/api.py,sha256=gAY9JhqWXjrYoWnMvR-iiuuY1YSN94We-lc1LH0z3cw,6030
|
3
|
+
sglang/bench_latency.py,sha256=CXvukEW0IeoH2IwN2vuriC0eHBdJsz3lgT7OwwNo_7A,16146
|
4
|
+
sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
|
5
|
+
sglang/check_env.py,sha256=oU8VmjjPK2SviRhr41cF1953soBu-eTT5E0Hf04zMzo,4974
|
6
|
+
sglang/global_config.py,sha256=9JxaFkBKSgep6BVeEl_kx9tuW9PqdijYELyBGTryl6o,1704
|
7
|
+
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
|
+
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
|
+
sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
|
10
|
+
sglang/version.py,sha256=_MLx4ac1juJPWEEiC9kMQISX3x3jFBr507jM2P_hxMg,23
|
11
|
+
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
|
+
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
14
|
+
sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
|
15
|
+
sglang/lang/interpreter.py,sha256=3RIeSGdKlKTq2Ixg_Tyo0fGEDTvBKS2f9FaJYODBHzA,30102
|
16
|
+
sglang/lang/ir.py,sha256=FGWghAfVW9IcxcrVqHiqpf7vmWzuNYoVTMSbBZkYVRk,16839
|
17
|
+
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
18
|
+
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
20
|
+
sglang/lang/backend/base_backend.py,sha256=Q5HdiDtyBewQeoYH0kDtBRVL8KFiEPNq9dw7XmauHQ8,1985
|
21
|
+
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
22
|
+
sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
|
23
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=AaBc5yczchX7mkwiKDMyjLjBkJsh2Lubrfd9lvCOlDo,9544
|
24
|
+
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
25
|
+
sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
|
26
|
+
sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
|
27
|
+
sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
|
28
|
+
sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
|
29
|
+
sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
|
30
|
+
sglang/srt/server.py,sha256=hUNnTvH4c1AI2JJzoBUf9TQuTelx-vulcqwkEplw7Gk,16699
|
31
|
+
sglang/srt/server_args.py,sha256=SmvnebtDTsvPNDyW6lltuJKC7h8eVdYmurY1ieIMySA,16475
|
32
|
+
sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
|
33
|
+
sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
|
34
|
+
sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
|
35
|
+
sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
|
36
|
+
sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
|
37
|
+
sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
|
38
|
+
sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
|
39
|
+
sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
|
40
|
+
sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
|
41
|
+
sglang/srt/layers/logits_processor.py,sha256=wHKB1FjbfY0a7KGw5dCsEhmO4sc7VMy3gYtSPv4oQYM,11097
|
42
|
+
sglang/srt/layers/radix_attention.py,sha256=lXwm-qs7hPy_EFV1Zf2pPQ0-drAdrO8V5J4eX0LwLtU,7505
|
43
|
+
sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
|
44
|
+
sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
|
45
|
+
sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
|
46
|
+
sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
|
47
|
+
sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
|
48
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
|
49
|
+
sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
|
50
|
+
sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
|
51
|
+
sglang/srt/managers/schedule_batch.py,sha256=sKQAHRL6VoapGiO7yQV796gW4sVGAgVVBMtmENbKtvg,29641
|
52
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=wqb6zQbkHYcSNU14Auuh5519CVMmfbKGBQvn_IwDSAo,21408
|
53
|
+
sglang/srt/managers/tp_worker.py,sha256=3sHlN4hxksF22lkOJ8i3X6WSH4_5POy74BfbIAzIDtM,35216
|
54
|
+
sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
|
55
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
|
56
|
+
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
57
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=oOKtPTgzujo9gHXykSuER7VKqQRuwNKlXyXlaK-3dxo,5280
|
58
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
|
59
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=EyI8sMMoVlOjdTT2Y3cfwo1-uQ43QCQ1skx5BNgchjE,9433
|
60
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=P5bGeLsnFbEqgWLI5X5Eg0XFCG1j2oWZOsIAMZNkZW4,9022
|
61
|
+
sglang/srt/model_executor/model_runner.py,sha256=yzkJLIM41mhbfgfq87ToskAaA1PS67YzhmoSMbflkZI,17479
|
62
|
+
sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
|
63
|
+
sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
|
64
|
+
sglang/srt/models/chatglm.py,sha256=7bHU2AFoppINDZm0EdxgtAJe7rwr9OPkhOCfq2qNrIA,13862
|
65
|
+
sglang/srt/models/commandr.py,sha256=5BEtIS2uUQJANkkY-6ZeDqlrpUK5yXVYHiztU3vsTKY,14172
|
66
|
+
sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
|
67
|
+
sglang/srt/models/deepseek.py,sha256=E5W4nkH-Ne449rAIwQZgz-FAH2Qqp2r1vNfboyk5wEg,16024
|
68
|
+
sglang/srt/models/deepseek_v2.py,sha256=NMcckZb48kVUwAmDA2l8wO19T6DNkJOkKAhHa6utBZM,26968
|
69
|
+
sglang/srt/models/gemma.py,sha256=ilfN_NOcz7hpwEJ2y7NW3fBFmFO7YfjhdFDbfzl2qww,12285
|
70
|
+
sglang/srt/models/gemma2.py,sha256=D8GZOI1tAbEV9PaBmJSsJRzCmvaK3tGXttIbrMb5yiQ,16426
|
71
|
+
sglang/srt/models/gpt_bigcode.py,sha256=OKk9UP67as3T5bePlTRGHTCD-1wqaUEk92AowXPm6dg,10204
|
72
|
+
sglang/srt/models/grok.py,sha256=M9rtdXslqYBle5VyZqFVHiJUXq_q_aHbza63xa03zqI,27861
|
73
|
+
sglang/srt/models/internlm2.py,sha256=CKWBL0dBvLdaEUeJOUvLUNPb8BLrAZ8_BSf2mfFQhfU,12225
|
74
|
+
sglang/srt/models/llama2.py,sha256=3ZEWi0PVCDNjTrVNvLs1ESdyTcZhJlZjaH5uyS46JyM,14288
|
75
|
+
sglang/srt/models/llama_classification.py,sha256=Dvzy3PfETiJtnKFOk8qDDLUoZECf_cpSrNeA60PaDo4,4932
|
76
|
+
sglang/srt/models/llava.py,sha256=-ysi192vpBDxNaMS8qaLOhC34lXQyRtbG_0niVaceSo,18436
|
77
|
+
sglang/srt/models/llavavid.py,sha256=MX7YpqYh5J4BoOnV7vVAIfoOlBFQXYpp8Kpe7WK0ejk,13562
|
78
|
+
sglang/srt/models/minicpm.py,sha256=ea_OyiwVTo6Tg9jNRAwqxETnA6FFeAqlIbiUS-xViEI,13843
|
79
|
+
sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
|
80
|
+
sglang/srt/models/mixtral.py,sha256=raSLbp6AfWg5_u-f-lYeRejE9koAjbHt8iIHXd3nURM,21397
|
81
|
+
sglang/srt/models/mixtral_quant.py,sha256=xYeeatZ9OfwCTas_KbH9nl6lnUT4YqSY7NAxpgLp5LE,14222
|
82
|
+
sglang/srt/models/qwen.py,sha256=43ea6gn4wHzAaI3JTDLtl08aEm0vIqgzbVH9M8oeuY0,10006
|
83
|
+
sglang/srt/models/qwen2.py,sha256=Hyhks2r4KHpKeb9iHZpnvEVc5klmnrPwcLohqg8j1kw,12284
|
84
|
+
sglang/srt/models/qwen2_moe.py,sha256=PZdhEf0DUuGWsld3TyDWlIqSbrrOdqvCD4lAtCPWXeg,18147
|
85
|
+
sglang/srt/models/stablelm.py,sha256=yPrdzPEoUD2s_Q3RgOq7BBC7z-UtEaACzabqbDRs2tA,11368
|
86
|
+
sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
|
87
|
+
sglang/srt/openai_api/adapter.py,sha256=Eq44_hGwHcglCKOc6WqWDxBsgyRqtuC6VR4HB4GLfUY,38193
|
88
|
+
sglang/srt/openai_api/protocol.py,sha256=pcRgmDM3Kozh74Aj-qEo8q64BI6hEjrdhYDU4m9srdI,8294
|
89
|
+
sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
|
90
|
+
sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
|
91
|
+
sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
|
92
|
+
sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
|
93
|
+
sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
|
94
|
+
sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
|
95
|
+
sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
|
96
|
+
sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
|
97
|
+
sglang/test/test_utils.py,sha256=ITQcY3WGV4kLGWEkfU-AeuFX8yGLmq9LEK5jHiuW7Sw,13991
|
98
|
+
sglang-0.2.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
99
|
+
sglang-0.2.11.dist-info/METADATA,sha256=gSQA5-Hf9y41ulOKiMeHRu4Nf-c9Nbt6xhmlCGzvhNY,33783
|
100
|
+
sglang-0.2.11.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
101
|
+
sglang-0.2.11.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
102
|
+
sglang-0.2.11.dist-info/RECORD,,
|
@@ -1,97 +0,0 @@
|
|
1
|
-
sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
|
2
|
-
sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
|
3
|
-
sglang/bench_latency.py,sha256=JPatRvstM3nXb-ViVgtR-TaRrFHpcHzqoDG7BQmRYK8,10539
|
4
|
-
sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
|
5
|
-
sglang/check_env.py,sha256=M4hWWl9IAmrhVATj6nbPlGiZ4FtLI3K9ETL9hFzxh8Q,4138
|
6
|
-
sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
|
7
|
-
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
|
-
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
|
-
sglang/utils.py,sha256=r0Z7hY_bFFk-b6WeQJir9br-hCW2-p7n5E7Et2WziaQ,8776
|
10
|
-
sglang/version.py,sha256=xiigmrV7lCZAjfifluudBrG9T_WlhKAusKo0iARTZm0,28
|
11
|
-
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
|
-
sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
|
14
|
-
sglang/lang/interpreter.py,sha256=_MbvYB0vweCgALklpM2DlofiCXuITCmX_fl8rPPcp5U,30340
|
15
|
-
sglang/lang/ir.py,sha256=0r-mhA4aO-uuS97Dvkw99ERTcJXfzuV6jJQMmuCwHEg,16615
|
16
|
-
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
17
|
-
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
19
|
-
sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
20
|
-
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
21
|
-
sglang/lang/backend/openai.py,sha256=6ww2rwKouWgtmjaCf4hk-kXXJ6bY6n9Xnbm3UTFZvl4,14808
|
22
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
|
23
|
-
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
24
|
-
sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
|
25
|
-
sglang/srt/hf_transformers_utils.py,sha256=Fg-3panb6lsqOhHmAYA0ivkXyBjdnvY5mqvilDv2xF4,11919
|
26
|
-
sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
|
27
|
-
sglang/srt/model_config.py,sha256=DO7m84WiT3dzPWmyKz_UXDAHEdqEjq8Lq5wCjzjYMME,6023
|
28
|
-
sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
|
29
|
-
sglang/srt/server.py,sha256=cDHUmLqj7MjF-3L9WcfA-4z9dRl55cwF5ygXuncMl-Q,15852
|
30
|
-
sglang/srt/server_args.py,sha256=wdRlxR-509RfNYuMQoxUAefMwoc5eme6sYwEMyRBHmk,16034
|
31
|
-
sglang/srt/utils.py,sha256=5wgGe6kI59JAmf8kxLsItulJ4xQaOJHHYaWWd6_WWmo,23384
|
32
|
-
sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
|
33
|
-
sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
|
34
|
-
sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
|
35
|
-
sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
|
36
|
-
sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
|
37
|
-
sglang/srt/layers/extend_attention.py,sha256=zuNnAdL_wF6BX0Mwn1dgDJvh3YJjYwqa5Fbzp8muOVc,12573
|
38
|
-
sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
|
39
|
-
sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
|
40
|
-
sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
|
41
|
-
sglang/srt/layers/radix_attention.py,sha256=tdA-kdd9LQY1wbw3iYuy-9cikVJYmy3EctwAlUfN-Uo,6945
|
42
|
-
sglang/srt/layers/token_attention.py,sha256=ylUqUnozJCCohxTGAiiP3sxgUrcXfEVic8-qgcHYDj4,7968
|
43
|
-
sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
|
44
|
-
sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
|
45
|
-
sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
|
46
|
-
sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
|
47
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
|
48
|
-
sglang/srt/managers/io_struct.py,sha256=Rz7Ur9Yw6prDGdy6XjsSiUmVBccS6cef-G_9TW7HA_4,7105
|
49
|
-
sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
|
50
|
-
sglang/srt/managers/schedule_batch.py,sha256=LIoVCPNivh0u1dOrrWRgFD6a4ywq3nrG_4dNgCK0kIw,37697
|
51
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=rtZ44aiZOMHLHkXDhMgj0HDR3gExpeGjWfoCD0PfG_o,20574
|
52
|
-
sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
|
53
|
-
sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
|
54
|
-
sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
|
55
|
-
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
56
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=wkhjyYLbAZrl2FB5i4ODkxgMufBuDpe4N0kbXhu6ZO0,4509
|
57
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
|
58
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
|
59
|
-
sglang/srt/model_executor/model_runner.py,sha256=RadluuL2Ou_BTOgo2IrLfMMEX7Z_DRgPL3JVBIr2KaU,16189
|
60
|
-
sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
|
61
|
-
sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
|
62
|
-
sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
|
63
|
-
sglang/srt/models/commandr.py,sha256=gaTI77hgitPlcUNyxMEdGu_XZQj2DuAMnh3KbZQ9HFg,14166
|
64
|
-
sglang/srt/models/dbrx.py,sha256=LQu7I2KH-XzY9iBlaK7IQsM1o3kzsuI1vTCspK2C19o,14655
|
65
|
-
sglang/srt/models/deepseek.py,sha256=adr57ZX6aPOBOpmvm7YIvoqo6u0jdrKJPZ8SGcVXAh8,16014
|
66
|
-
sglang/srt/models/deepseek_v2.py,sha256=9CORl-IroSguYPX3wz_aGe7mFoUE7cQRMs7CgbkBYLk,20087
|
67
|
-
sglang/srt/models/gemma.py,sha256=PMPI1-WLuLdk6e7u6I9d_LoCkauLkWY3aOP8MFEZ-sI,12279
|
68
|
-
sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,16420
|
69
|
-
sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
|
70
|
-
sglang/srt/models/grok.py,sha256=NfZdsRVErDIUWFqjhtNf2pqC9G4cRdYHBFpgDq1IZ2A,27855
|
71
|
-
sglang/srt/models/internlm2.py,sha256=Ld2GUxZeqqqJ2vd4QiX2s1y2AceJLA1nVnUYY88GMQk,12219
|
72
|
-
sglang/srt/models/llama2.py,sha256=zfOk3OK1_B6s6yuXsZFmNCf07RsfytVD72GunLBt8Cc,14282
|
73
|
-
sglang/srt/models/llama_classification.py,sha256=4r_orFZqBR3U_yC4bus1K3Z3-ADscYGSzgA82_VDN0g,4926
|
74
|
-
sglang/srt/models/llava.py,sha256=BJphgyQGdo7uTpJcKGEfWwdpH9GTMDnyiznLSSgmvm8,18476
|
75
|
-
sglang/srt/models/llavavid.py,sha256=-7vaVqaIfukCvMkNakEPblpwjIHC6ezrAvmpE5RzlUY,13602
|
76
|
-
sglang/srt/models/minicpm.py,sha256=Mj-dbhfN7li7cTEP-0sV7i5PSYkMGIaYCqRU7eDc-BY,13837
|
77
|
-
sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
|
78
|
-
sglang/srt/models/mixtral.py,sha256=QiswCUdZ4VwMghtrr_vGP_dkzxSCrcUIcBgjlOZh_Ao,21391
|
79
|
-
sglang/srt/models/mixtral_quant.py,sha256=I1sIdistZHw7GO35qvlteA16DGVtME5rvEVV86v0-7Y,14216
|
80
|
-
sglang/srt/models/qwen.py,sha256=xAtlWyhMkcfwocRqzZoH01qKbkohXxAf4tnkPh0xtpM,10000
|
81
|
-
sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12278
|
82
|
-
sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
|
83
|
-
sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
|
84
|
-
sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
|
85
|
-
sglang/srt/openai_api/adapter.py,sha256=h6TIU0Fu3jU361pye4J12vcDug7UJJRPiBAY_HfFUuE,32599
|
86
|
-
sglang/srt/openai_api/protocol.py,sha256=JXLnnQ63I-bJv93ICPfP0cBpyomQA5IYE_mkUg5X4Es,8177
|
87
|
-
sglang/test/run_eval.py,sha256=WvMLSi70G9fhruP8cPLOfDJ9XEKL7yNn2pylx-7tNsQ,3054
|
88
|
-
sglang/test/simple_eval_common.py,sha256=Qh1-iEXJCKfJmgpAzNSp28fcP1TUJzt3s9i1FjvemHY,12340
|
89
|
-
sglang/test/simple_eval_humaneval.py,sha256=IW0ZC6D4SXu06IJiMoAY9DK9SMsTOlDPAwu4cfbJco0,5826
|
90
|
-
sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
|
91
|
-
sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
|
92
|
-
sglang/test/test_utils.py,sha256=PndOL1zdseMrpHTHGmgsHHepxqYBn__eNLrlsSXLy6k,11905
|
93
|
-
sglang-0.2.9.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
94
|
-
sglang-0.2.9.post1.dist-info/METADATA,sha256=BY728JRrlJkEdmiLJAvi_qfuUY2LDBqcKqqtooU9dxk,33281
|
95
|
-
sglang-0.2.9.post1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
96
|
-
sglang-0.2.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
97
|
-
sglang-0.2.9.post1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|