sglang 0.2.9.post1__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +114 -63
- sglang/check_env.py +1 -0
- sglang/lang/backend/runtime_endpoint.py +0 -11
- sglang/srt/hf_transformers_utils.py +2 -2
- sglang/srt/layers/extend_attention.py +59 -7
- sglang/srt/layers/radix_attention.py +22 -9
- sglang/srt/layers/token_attention.py +28 -2
- sglang/srt/managers/io_struct.py +9 -4
- sglang/srt/managers/schedule_batch.py +15 -11
- sglang/srt/managers/tokenizer_manager.py +28 -13
- sglang/srt/mem_cache/memory_pool.py +65 -24
- sglang/srt/model_config.py +11 -0
- sglang/srt/model_executor/model_runner.py +46 -17
- sglang/srt/models/deepseek_v2.py +198 -16
- sglang/srt/openai_api/adapter.py +120 -20
- sglang/srt/openai_api/protocol.py +1 -1
- sglang/srt/server.py +87 -78
- sglang/srt/server_args.py +8 -2
- sglang/srt/utils.py +25 -20
- sglang/test/run_eval.py +21 -10
- sglang/test/runners.py +237 -0
- sglang/test/simple_eval_common.py +12 -12
- sglang/test/simple_eval_gpqa.py +92 -0
- sglang/test/simple_eval_humaneval.py +5 -5
- sglang/test/simple_eval_math.py +72 -0
- sglang/test/test_utils.py +94 -13
- sglang/utils.py +15 -37
- sglang/version.py +1 -1
- {sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/METADATA +29 -28
- {sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/RECORD +33 -30
- {sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/LICENSE +0 -0
- {sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/WHEEL +0 -0
- {sglang-0.2.9.post1.dist-info → sglang-0.2.10.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
"""Common utilities for testing and benchmarking"""
|
2
2
|
|
3
|
+
import argparse
|
3
4
|
import asyncio
|
5
|
+
import multiprocessing
|
4
6
|
import subprocess
|
7
|
+
import threading
|
5
8
|
import time
|
9
|
+
import unittest
|
6
10
|
from functools import partial
|
11
|
+
from typing import Callable, List, Optional
|
7
12
|
|
8
13
|
import numpy as np
|
9
14
|
import requests
|
@@ -247,7 +252,7 @@ async def call_select_lmql(context, choices, temperature=0, max_len=4096, model=
|
|
247
252
|
return choices.index(answer)
|
248
253
|
|
249
254
|
|
250
|
-
def add_common_other_args_and_parse(parser):
|
255
|
+
def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
|
251
256
|
parser.add_argument("--parallel", type=int, default=64)
|
252
257
|
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
253
258
|
parser.add_argument("--port", type=int, default=None)
|
@@ -286,7 +291,7 @@ def add_common_other_args_and_parse(parser):
|
|
286
291
|
return args
|
287
292
|
|
288
293
|
|
289
|
-
def add_common_sglang_args_and_parse(parser):
|
294
|
+
def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
|
290
295
|
parser.add_argument("--parallel", type=int, default=64)
|
291
296
|
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
292
297
|
parser.add_argument("--port", type=int, default=30000)
|
@@ -296,7 +301,7 @@ def add_common_sglang_args_and_parse(parser):
|
|
296
301
|
return args
|
297
302
|
|
298
303
|
|
299
|
-
def select_sglang_backend(args):
|
304
|
+
def select_sglang_backend(args: argparse.Namespace):
|
300
305
|
if args.backend.startswith("srt"):
|
301
306
|
if args.backend == "srt-no-parallel":
|
302
307
|
global_config.enable_parallel_decoding = False
|
@@ -309,7 +314,7 @@ def select_sglang_backend(args):
|
|
309
314
|
return backend
|
310
315
|
|
311
316
|
|
312
|
-
def _get_call_generate(args):
|
317
|
+
def _get_call_generate(args: argparse.Namespace):
|
313
318
|
if args.backend == "lightllm":
|
314
319
|
return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
|
315
320
|
elif args.backend == "vllm":
|
@@ -336,7 +341,7 @@ def _get_call_generate(args):
|
|
336
341
|
raise ValueError(f"Invalid backend: {args.backend}")
|
337
342
|
|
338
343
|
|
339
|
-
def _get_call_select(args):
|
344
|
+
def _get_call_select(args: argparse.Namespace):
|
340
345
|
if args.backend == "lightllm":
|
341
346
|
return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
|
342
347
|
elif args.backend == "vllm":
|
@@ -359,7 +364,7 @@ def _get_call_select(args):
|
|
359
364
|
raise ValueError(f"Invalid backend: {args.backend}")
|
360
365
|
|
361
366
|
|
362
|
-
def get_call_generate(args):
|
367
|
+
def get_call_generate(args: argparse.Namespace):
|
363
368
|
call_generate = _get_call_generate(args)
|
364
369
|
|
365
370
|
def func(*args, **kwargs):
|
@@ -372,7 +377,7 @@ def get_call_generate(args):
|
|
372
377
|
return func
|
373
378
|
|
374
379
|
|
375
|
-
def get_call_select(args):
|
380
|
+
def get_call_select(args: argparse.Namespace):
|
376
381
|
call_select = _get_call_select(args)
|
377
382
|
|
378
383
|
def func(*args, **kwargs):
|
@@ -385,7 +390,16 @@ def get_call_select(args):
|
|
385
390
|
return func
|
386
391
|
|
387
392
|
|
388
|
-
def popen_launch_server(
|
393
|
+
def popen_launch_server(
|
394
|
+
model: str,
|
395
|
+
base_url: str,
|
396
|
+
timeout: float,
|
397
|
+
api_key: Optional[str] = None,
|
398
|
+
other_args: tuple = (),
|
399
|
+
):
|
400
|
+
_, host, port = base_url.split(":")
|
401
|
+
host = host[2:]
|
402
|
+
|
389
403
|
command = [
|
390
404
|
"python3",
|
391
405
|
"-m",
|
@@ -393,21 +407,88 @@ def popen_launch_server(model, port, timeout, *args):
|
|
393
407
|
"--model-path",
|
394
408
|
model,
|
395
409
|
"--host",
|
396
|
-
|
410
|
+
host,
|
397
411
|
"--port",
|
398
|
-
|
399
|
-
*
|
412
|
+
port,
|
413
|
+
*other_args,
|
400
414
|
]
|
415
|
+
if api_key:
|
416
|
+
command += ["--api-key", api_key]
|
417
|
+
|
401
418
|
process = subprocess.Popen(command, stdout=None, stderr=None)
|
402
|
-
base_url = f"http://localhost:{port}/v1"
|
403
419
|
|
404
420
|
start_time = time.time()
|
405
421
|
while time.time() - start_time < timeout:
|
406
422
|
try:
|
407
|
-
|
423
|
+
headers = {
|
424
|
+
"Content-Type": "application/json; charset=utf-8",
|
425
|
+
"Authorization": f"Bearer {api_key}",
|
426
|
+
}
|
427
|
+
response = requests.get(f"{base_url}/v1/models", headers=headers)
|
408
428
|
if response.status_code == 200:
|
409
429
|
return process
|
410
430
|
except requests.RequestException:
|
411
431
|
pass
|
412
432
|
time.sleep(10)
|
413
433
|
raise TimeoutError("Server failed to start within the timeout period.")
|
434
|
+
|
435
|
+
|
436
|
+
def run_with_timeout(
|
437
|
+
func: Callable,
|
438
|
+
args: tuple = (),
|
439
|
+
kwargs: Optional[dict] = None,
|
440
|
+
timeout: float = None,
|
441
|
+
):
|
442
|
+
"""Run a function with timeout."""
|
443
|
+
ret_value = []
|
444
|
+
|
445
|
+
def _target_func():
|
446
|
+
ret_value.append(func(*args, **(kwargs or {})))
|
447
|
+
|
448
|
+
t = threading.Thread(target=_target_func)
|
449
|
+
t.start()
|
450
|
+
t.join(timeout=timeout)
|
451
|
+
if t.is_alive():
|
452
|
+
raise TimeoutError()
|
453
|
+
|
454
|
+
if not ret_value:
|
455
|
+
raise RuntimeError()
|
456
|
+
|
457
|
+
return ret_value[0]
|
458
|
+
|
459
|
+
|
460
|
+
def run_unittest_files(files: List[str], timeout_per_file: float):
|
461
|
+
tic = time.time()
|
462
|
+
success = True
|
463
|
+
|
464
|
+
for filename in files:
|
465
|
+
|
466
|
+
def func():
|
467
|
+
print(f"\n\nRun {filename}\n\n")
|
468
|
+
ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
|
469
|
+
|
470
|
+
p = multiprocessing.Process(target=func)
|
471
|
+
|
472
|
+
def run_one_file():
|
473
|
+
p.start()
|
474
|
+
p.join()
|
475
|
+
|
476
|
+
try:
|
477
|
+
run_with_timeout(run_one_file, timeout=timeout_per_file)
|
478
|
+
if p.exitcode != 0:
|
479
|
+
success = False
|
480
|
+
break
|
481
|
+
except TimeoutError:
|
482
|
+
p.terminate()
|
483
|
+
time.sleep(5)
|
484
|
+
print(
|
485
|
+
"\nTimeout after {timeout_per_file} seconds when running {filename}\n"
|
486
|
+
)
|
487
|
+
return False
|
488
|
+
|
489
|
+
if success:
|
490
|
+
print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
|
491
|
+
else:
|
492
|
+
print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
|
493
|
+
|
494
|
+
return 0 if success else -1
|
sglang/utils.py
CHANGED
@@ -12,6 +12,7 @@ import urllib.request
|
|
12
12
|
from concurrent.futures import ThreadPoolExecutor
|
13
13
|
from io import BytesIO
|
14
14
|
from json import dumps
|
15
|
+
from typing import Union
|
15
16
|
|
16
17
|
import numpy as np
|
17
18
|
import requests
|
@@ -25,7 +26,7 @@ def get_exception_traceback():
|
|
25
26
|
return err_str
|
26
27
|
|
27
28
|
|
28
|
-
def is_same_type(values):
|
29
|
+
def is_same_type(values: list):
|
29
30
|
"""Return whether the elements in values are of the same type."""
|
30
31
|
if len(values) <= 1:
|
31
32
|
return True
|
@@ -45,7 +46,7 @@ def read_jsonl(filename: str):
|
|
45
46
|
return rets
|
46
47
|
|
47
48
|
|
48
|
-
def dump_state_text(filename, states, mode="w"):
|
49
|
+
def dump_state_text(filename: str, states: list, mode: str = "w"):
|
49
50
|
"""Dump program state in a text file."""
|
50
51
|
from sglang.lang.interpreter import ProgramState
|
51
52
|
|
@@ -75,19 +76,13 @@ class HttpResponse:
|
|
75
76
|
return self.resp.status
|
76
77
|
|
77
78
|
|
78
|
-
def http_request(
|
79
|
-
url, json=None, stream=False, auth_token=None, api_key=None, verify=None
|
80
|
-
):
|
79
|
+
def http_request(url, json=None, stream=False, api_key=None, verify=None):
|
81
80
|
"""A faster version of requests.post with low-level urllib API."""
|
82
81
|
headers = {"Content-Type": "application/json; charset=utf-8"}
|
83
82
|
|
84
|
-
# add the Authorization header if an
|
85
|
-
if auth_token is not None:
|
86
|
-
headers["Authorization"] = f"Bearer {auth_token}"
|
87
|
-
|
88
|
-
# add the API Key header if an API key is provided
|
83
|
+
# add the Authorization header if an api key is provided
|
89
84
|
if api_key is not None:
|
90
|
-
headers["
|
85
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
91
86
|
|
92
87
|
if stream:
|
93
88
|
return requests.post(url, json=json, stream=True, headers=headers)
|
@@ -105,7 +100,7 @@ def http_request(
|
|
105
100
|
return HttpResponse(e)
|
106
101
|
|
107
102
|
|
108
|
-
def encode_image_base64(image_path):
|
103
|
+
def encode_image_base64(image_path: Union[str, bytes]):
|
109
104
|
"""Encode an image in base64."""
|
110
105
|
if isinstance(image_path, str):
|
111
106
|
with open(image_path, "rb") as image_file:
|
@@ -144,7 +139,7 @@ def encode_frame(frame):
|
|
144
139
|
return frame_bytes
|
145
140
|
|
146
141
|
|
147
|
-
def encode_video_base64(video_path, num_frames=16):
|
142
|
+
def encode_video_base64(video_path: str, num_frames: int = 16):
|
148
143
|
import cv2 # pip install opencv-python-headless
|
149
144
|
|
150
145
|
cap = cv2.VideoCapture(video_path)
|
@@ -190,7 +185,7 @@ def encode_video_base64(video_path, num_frames=16):
|
|
190
185
|
return video_base64
|
191
186
|
|
192
187
|
|
193
|
-
def _is_chinese_char(cp):
|
188
|
+
def _is_chinese_char(cp: int):
|
194
189
|
"""Checks whether CP is the codepoint of a CJK character."""
|
195
190
|
# This defines a "chinese character" as anything in the CJK Unicode block:
|
196
191
|
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
|
@@ -215,7 +210,7 @@ def _is_chinese_char(cp):
|
|
215
210
|
return False
|
216
211
|
|
217
212
|
|
218
|
-
def find_printable_text(text):
|
213
|
+
def find_printable_text(text: str):
|
219
214
|
"""Returns the longest printable substring of text that contains only entire words."""
|
220
215
|
# Borrowed from https://github.com/huggingface/transformers/blob/061580c82c2db1de9139528243e105953793f7a2/src/transformers/generation/streamers.py#L99
|
221
216
|
|
@@ -234,26 +229,7 @@ def find_printable_text(text):
|
|
234
229
|
return text[: text.rfind(" ") + 1]
|
235
230
|
|
236
231
|
|
237
|
-
def
|
238
|
-
"""Run a function with timeout."""
|
239
|
-
ret_value = []
|
240
|
-
|
241
|
-
def _target_func():
|
242
|
-
ret_value.append(func(*args, **(kwargs or {})))
|
243
|
-
|
244
|
-
t = threading.Thread(target=_target_func)
|
245
|
-
t.start()
|
246
|
-
t.join(timeout=timeout)
|
247
|
-
if t.is_alive():
|
248
|
-
raise TimeoutError()
|
249
|
-
|
250
|
-
if not ret_value:
|
251
|
-
raise RuntimeError()
|
252
|
-
|
253
|
-
return ret_value[0]
|
254
|
-
|
255
|
-
|
256
|
-
def graceful_registry(sub_module_name):
|
232
|
+
def graceful_registry(sub_module_name: str):
|
257
233
|
def graceful_shutdown(signum, frame):
|
258
234
|
logger.info(
|
259
235
|
f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
|
@@ -265,7 +241,9 @@ def graceful_registry(sub_module_name):
|
|
265
241
|
|
266
242
|
|
267
243
|
class LazyImport:
|
268
|
-
|
244
|
+
"""Lazy import to make `import sglang` run faster."""
|
245
|
+
|
246
|
+
def __init__(self, module_name: str, class_name: str):
|
269
247
|
self.module_name = module_name
|
270
248
|
self.class_name = class_name
|
271
249
|
self._module = None
|
@@ -276,7 +254,7 @@ class LazyImport:
|
|
276
254
|
self._module = getattr(module, self.class_name)
|
277
255
|
return self._module
|
278
256
|
|
279
|
-
def __getattr__(self, name):
|
257
|
+
def __getattr__(self, name: str):
|
280
258
|
module = self._load()
|
281
259
|
return getattr(module, name)
|
282
260
|
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.10"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.10
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -215,34 +215,35 @@ Requires-Dist: requests
|
|
215
215
|
Requires-Dist: tqdm
|
216
216
|
Requires-Dist: numpy
|
217
217
|
Provides-Extra: all
|
218
|
-
Requires-Dist: sglang[srt]
|
219
|
-
Requires-Dist: sglang[openai]
|
220
|
-
Requires-Dist: sglang[anthropic]
|
221
|
-
Requires-Dist: sglang[litellm]
|
218
|
+
Requires-Dist: sglang[srt]; extra == "all"
|
219
|
+
Requires-Dist: sglang[openai]; extra == "all"
|
220
|
+
Requires-Dist: sglang[anthropic]; extra == "all"
|
221
|
+
Requires-Dist: sglang[litellm]; extra == "all"
|
222
222
|
Provides-Extra: anthropic
|
223
|
-
Requires-Dist: anthropic
|
223
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
224
224
|
Provides-Extra: litellm
|
225
|
-
Requires-Dist: litellm
|
225
|
+
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
226
226
|
Provides-Extra: openai
|
227
|
-
Requires-Dist: openai
|
228
|
-
Requires-Dist: tiktoken
|
227
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
228
|
+
Requires-Dist: tiktoken; extra == "openai"
|
229
229
|
Provides-Extra: srt
|
230
|
-
Requires-Dist: aiohttp
|
231
|
-
Requires-Dist: fastapi
|
232
|
-
Requires-Dist: hf-transfer
|
233
|
-
Requires-Dist: huggingface-hub
|
234
|
-
Requires-Dist: interegular
|
235
|
-
Requires-Dist:
|
236
|
-
Requires-Dist:
|
237
|
-
Requires-Dist:
|
238
|
-
Requires-Dist:
|
239
|
-
Requires-Dist:
|
240
|
-
Requires-Dist:
|
241
|
-
Requires-Dist:
|
242
|
-
Requires-Dist:
|
243
|
-
Requires-Dist:
|
244
|
-
Requires-Dist:
|
245
|
-
Requires-Dist:
|
230
|
+
Requires-Dist: aiohttp; extra == "srt"
|
231
|
+
Requires-Dist: fastapi; extra == "srt"
|
232
|
+
Requires-Dist: hf-transfer; extra == "srt"
|
233
|
+
Requires-Dist: huggingface-hub; extra == "srt"
|
234
|
+
Requires-Dist: interegular; extra == "srt"
|
235
|
+
Requires-Dist: jsonlines; extra == "srt"
|
236
|
+
Requires-Dist: packaging; extra == "srt"
|
237
|
+
Requires-Dist: pillow; extra == "srt"
|
238
|
+
Requires-Dist: psutil; extra == "srt"
|
239
|
+
Requires-Dist: pydantic; extra == "srt"
|
240
|
+
Requires-Dist: python-multipart; extra == "srt"
|
241
|
+
Requires-Dist: torch; extra == "srt"
|
242
|
+
Requires-Dist: uvicorn; extra == "srt"
|
243
|
+
Requires-Dist: uvloop; extra == "srt"
|
244
|
+
Requires-Dist: zmq; extra == "srt"
|
245
|
+
Requires-Dist: vllm==0.5.3.post1; extra == "srt"
|
246
|
+
Requires-Dist: outlines>=0.0.44; extra == "srt"
|
246
247
|
|
247
248
|
<div align="center">
|
248
249
|
<img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400"></img>
|
@@ -300,8 +301,8 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
|
|
300
301
|
|
301
302
|
### Method 2: From source
|
302
303
|
```
|
303
|
-
# Use the
|
304
|
-
git clone -b v0.2.
|
304
|
+
# Use the last release branch
|
305
|
+
git clone -b v0.2.10 https://github.com/sgl-project/sglang.git
|
305
306
|
cd sglang
|
306
307
|
|
307
308
|
pip install --upgrade pip
|
@@ -453,7 +454,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec
|
|
453
454
|
|
454
455
|
### Benchmark Performance
|
455
456
|
|
456
|
-
- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as
|
457
|
+
- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`.
|
457
458
|
```
|
458
459
|
python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
|
459
460
|
```
|
@@ -1,13 +1,13 @@
|
|
1
1
|
sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
|
2
2
|
sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
|
3
|
-
sglang/bench_latency.py,sha256=
|
3
|
+
sglang/bench_latency.py,sha256=lHk9C3XM1e-UQd6HY2qn-njr2rG5AFQ_sNVD5hcF5Vc,12162
|
4
4
|
sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
|
5
|
-
sglang/check_env.py,sha256=
|
5
|
+
sglang/check_env.py,sha256=XlVou81XC20tPFVTuKDSKqDqLQJoO2QvlnReWMf-Ho4,4152
|
6
6
|
sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
|
7
7
|
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
8
|
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
|
-
sglang/utils.py,sha256=
|
10
|
-
sglang/version.py,sha256=
|
9
|
+
sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
|
10
|
+
sglang/version.py,sha256=waXgc7p-jgGCsUjdVfO_KjlVZblnCvrzf4A0dsBj_lg,23
|
11
11
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
13
|
sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
|
@@ -19,51 +19,51 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
|
|
19
19
|
sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
20
20
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
21
21
|
sglang/lang/backend/openai.py,sha256=6ww2rwKouWgtmjaCf4hk-kXXJ6bY6n9Xnbm3UTFZvl4,14808
|
22
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=
|
22
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=n78pyBWTCMYmDAS-0yZVFvzQYCiACz8Usj7FTDfdVKE,8763
|
23
23
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
24
24
|
sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
|
25
|
-
sglang/srt/hf_transformers_utils.py,sha256=
|
25
|
+
sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
|
26
26
|
sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
|
27
|
-
sglang/srt/model_config.py,sha256=
|
27
|
+
sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
|
28
28
|
sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
|
29
|
-
sglang/srt/server.py,sha256=
|
30
|
-
sglang/srt/server_args.py,sha256=
|
31
|
-
sglang/srt/utils.py,sha256=
|
29
|
+
sglang/srt/server.py,sha256=ur_fDb-nEmlzz1mSKwWa87XFJdQM1gxFz4cahMcMatA,16028
|
30
|
+
sglang/srt/server_args.py,sha256=oUMzSSBrJ5_g0yeBapABUv2MlhDNWEfWLdLVROgqZOU,16305
|
31
|
+
sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
|
32
32
|
sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
|
33
33
|
sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
|
34
34
|
sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
|
35
35
|
sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
|
36
36
|
sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
|
37
|
-
sglang/srt/layers/extend_attention.py,sha256=
|
37
|
+
sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
|
38
38
|
sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
|
39
39
|
sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
|
40
40
|
sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
|
41
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
42
|
-
sglang/srt/layers/token_attention.py,sha256=
|
41
|
+
sglang/srt/layers/radix_attention.py,sha256=cNSQWO74DcXgpAMKSMaHzfpy5IcLORUnWe5gOwATLrw,7466
|
42
|
+
sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
|
43
43
|
sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
|
44
44
|
sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
|
45
45
|
sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
|
46
46
|
sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
|
47
47
|
sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
|
48
|
-
sglang/srt/managers/io_struct.py,sha256=
|
48
|
+
sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
|
49
49
|
sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
|
50
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
51
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
50
|
+
sglang/srt/managers/schedule_batch.py,sha256=yIjiiMcaYYN9iaEOGQZoPUpFviDptMVh9hMwRRnDAco,37896
|
51
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=kxkoAa8VbQt9FJPX1fN-7IzAD8RIcIvz3AGR8uEMYjk,21202
|
52
52
|
sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
|
53
53
|
sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
|
54
54
|
sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
|
55
55
|
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
56
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=
|
56
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=8N4eHybhtBuwIwYyeNSvrZI90LGgMG8sA3OrXdXZAZs,5496
|
57
57
|
sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
|
58
58
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
|
59
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
59
|
+
sglang/srt/model_executor/model_runner.py,sha256=tJHlqk_JH3RJDaPAiSljaDI951LUu9AYW679eCKMJXs,17404
|
60
60
|
sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
|
61
61
|
sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
|
62
62
|
sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
|
63
63
|
sglang/srt/models/commandr.py,sha256=gaTI77hgitPlcUNyxMEdGu_XZQj2DuAMnh3KbZQ9HFg,14166
|
64
64
|
sglang/srt/models/dbrx.py,sha256=LQu7I2KH-XzY9iBlaK7IQsM1o3kzsuI1vTCspK2C19o,14655
|
65
65
|
sglang/srt/models/deepseek.py,sha256=adr57ZX6aPOBOpmvm7YIvoqo6u0jdrKJPZ8SGcVXAh8,16014
|
66
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
66
|
+
sglang/srt/models/deepseek_v2.py,sha256=jaVaQlL1aPCTu8nLcvtAW_rmtvHe6y2CviIOjXzh4q4,26962
|
67
67
|
sglang/srt/models/gemma.py,sha256=PMPI1-WLuLdk6e7u6I9d_LoCkauLkWY3aOP8MFEZ-sI,12279
|
68
68
|
sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,16420
|
69
69
|
sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
|
@@ -82,16 +82,19 @@ sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12
|
|
82
82
|
sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
|
83
83
|
sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
|
84
84
|
sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
|
85
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
86
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
87
|
-
sglang/test/run_eval.py,sha256=
|
88
|
-
sglang/test/
|
89
|
-
sglang/test/
|
85
|
+
sglang/srt/openai_api/adapter.py,sha256=p2HeYO9Qgl7EERXutwpsQ659NvZhFnkQmTZX5s-x-oI,37444
|
86
|
+
sglang/srt/openai_api/protocol.py,sha256=q1MuDUhwSM-8G2uGnWUMeEk87aZxei8lCcaP6VuA8So,8200
|
87
|
+
sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
|
88
|
+
sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
|
89
|
+
sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
|
90
|
+
sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
|
91
|
+
sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
|
92
|
+
sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
|
90
93
|
sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
|
91
94
|
sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
|
92
|
-
sglang/test/test_utils.py,sha256=
|
93
|
-
sglang-0.2.
|
94
|
-
sglang-0.2.
|
95
|
-
sglang-0.2.
|
96
|
-
sglang-0.2.
|
97
|
-
sglang-0.2.
|
95
|
+
sglang/test/test_utils.py,sha256=p-G6iiT5-Vkg6LMYgvDheomLJ6IYMLsYHCp3tkatiy8,13983
|
96
|
+
sglang-0.2.10.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
97
|
+
sglang-0.2.10.dist-info/METADATA,sha256=Lt9wnP2134unvF88fDj2PfQIf2YaeYJ6xZdfmMAJkoM,33303
|
98
|
+
sglang-0.2.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
99
|
+
sglang-0.2.10.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
100
|
+
sglang-0.2.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|