sglang 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +57 -2
- sglang/api.py +8 -5
- sglang/backend/anthropic.py +18 -4
- sglang/backend/openai.py +2 -1
- sglang/backend/runtime_endpoint.py +18 -5
- sglang/backend/vertexai.py +1 -0
- sglang/global_config.py +5 -1
- sglang/lang/chat_template.py +83 -2
- sglang/lang/interpreter.py +92 -35
- sglang/lang/ir.py +12 -9
- sglang/lang/tracer.py +6 -4
- sglang/launch_server_llavavid.py +31 -0
- sglang/srt/constrained/fsm_cache.py +1 -0
- sglang/srt/constrained/jump_forward.py +1 -0
- sglang/srt/conversation.py +2 -2
- sglang/srt/flush_cache.py +16 -0
- sglang/srt/hf_transformers_utils.py +10 -2
- sglang/srt/layers/context_flashattention_nopad.py +1 -0
- sglang/srt/layers/extend_attention.py +1 -0
- sglang/srt/layers/logits_processor.py +114 -54
- sglang/srt/layers/radix_attention.py +2 -1
- sglang/srt/layers/token_attention.py +1 -0
- sglang/srt/managers/detokenizer_manager.py +5 -1
- sglang/srt/managers/io_struct.py +27 -3
- sglang/srt/managers/router/infer_batch.py +97 -48
- sglang/srt/managers/router/manager.py +11 -8
- sglang/srt/managers/router/model_rpc.py +169 -90
- sglang/srt/managers/router/model_runner.py +110 -166
- sglang/srt/managers/router/radix_cache.py +89 -51
- sglang/srt/managers/router/scheduler.py +17 -28
- sglang/srt/managers/tokenizer_manager.py +110 -33
- sglang/srt/memory_pool.py +5 -14
- sglang/srt/model_config.py +11 -0
- sglang/srt/models/commandr.py +372 -0
- sglang/srt/models/dbrx.py +412 -0
- sglang/srt/models/dbrx_config.py +281 -0
- sglang/srt/models/gemma.py +24 -25
- sglang/srt/models/llama2.py +25 -26
- sglang/srt/models/llava.py +8 -10
- sglang/srt/models/llavavid.py +307 -0
- sglang/srt/models/mixtral.py +29 -33
- sglang/srt/models/qwen.py +34 -25
- sglang/srt/models/qwen2.py +25 -26
- sglang/srt/models/stablelm.py +26 -26
- sglang/srt/models/yivl.py +3 -5
- sglang/srt/openai_api_adapter.py +356 -0
- sglang/srt/{managers/openai_protocol.py → openai_protocol.py} +36 -20
- sglang/srt/sampling_params.py +2 -0
- sglang/srt/server.py +91 -456
- sglang/srt/server_args.py +79 -49
- sglang/srt/utils.py +212 -47
- sglang/srt/weight_utils.py +417 -0
- sglang/test/test_programs.py +8 -7
- sglang/test/test_utils.py +195 -7
- sglang/utils.py +77 -26
- {sglang-0.1.14.dist-info → sglang-0.1.16.dist-info}/METADATA +20 -18
- sglang-0.1.16.dist-info/RECORD +72 -0
- sglang-0.1.14.dist-info/RECORD +0 -64
- {sglang-0.1.14.dist-info → sglang-0.1.16.dist-info}/LICENSE +0 -0
- {sglang-0.1.14.dist-info → sglang-0.1.16.dist-info}/WHEEL +0 -0
- {sglang-0.1.14.dist-info → sglang-0.1.16.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -1,13 +1,20 @@
|
|
1
1
|
"""Common utilities for testing and benchmarking"""
|
2
2
|
|
3
|
+
import asyncio
|
4
|
+
from functools import partial
|
5
|
+
|
3
6
|
import numpy as np
|
4
7
|
import requests
|
8
|
+
|
5
9
|
from sglang.backend.openai import OpenAI
|
6
10
|
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
7
11
|
from sglang.global_config import global_config
|
12
|
+
from sglang.srt.utils import get_exception_traceback
|
8
13
|
|
9
14
|
|
10
|
-
def call_generate_lightllm(prompt, temperature, max_tokens, stop, url):
|
15
|
+
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
16
|
+
assert url is not None
|
17
|
+
|
11
18
|
data = {
|
12
19
|
"inputs": prompt,
|
13
20
|
"parameters": {
|
@@ -22,7 +29,9 @@ def call_generate_lightllm(prompt, temperature, max_tokens, stop, url):
|
|
22
29
|
return pred
|
23
30
|
|
24
31
|
|
25
|
-
def call_generate_vllm(prompt, temperature, max_tokens, stop,
|
32
|
+
def call_generate_vllm(prompt, temperature, max_tokens, stop=None, n=1, url=None):
|
33
|
+
assert url is not None
|
34
|
+
|
26
35
|
data = {
|
27
36
|
"prompt": prompt,
|
28
37
|
"temperature": temperature,
|
@@ -40,8 +49,10 @@ def call_generate_vllm(prompt, temperature, max_tokens, stop, url, n=1):
|
|
40
49
|
|
41
50
|
|
42
51
|
def call_generate_outlines(
|
43
|
-
prompt, temperature, max_tokens,
|
52
|
+
prompt, temperature, max_tokens, stop=[], regex=None, n=1, url=None
|
44
53
|
):
|
54
|
+
assert url is not None
|
55
|
+
|
45
56
|
data = {
|
46
57
|
"prompt": prompt,
|
47
58
|
"temperature": temperature,
|
@@ -59,7 +70,9 @@ def call_generate_outlines(
|
|
59
70
|
return pred
|
60
71
|
|
61
72
|
|
62
|
-
def call_generate_srt_raw(prompt, temperature, max_tokens, stop, url):
|
73
|
+
def call_generate_srt_raw(prompt, temperature, max_tokens, stop=None, url=None):
|
74
|
+
assert url is not None
|
75
|
+
|
63
76
|
data = {
|
64
77
|
"text": prompt,
|
65
78
|
"sampling_params": {
|
@@ -75,7 +88,71 @@ def call_generate_srt_raw(prompt, temperature, max_tokens, stop, url):
|
|
75
88
|
return pred
|
76
89
|
|
77
90
|
|
78
|
-
def
|
91
|
+
def call_generate_guidance(
|
92
|
+
prompt, temperature, max_tokens, stop=None, n=1, regex=None, model=None
|
93
|
+
):
|
94
|
+
assert model is not None
|
95
|
+
from guidance import gen
|
96
|
+
|
97
|
+
rets = []
|
98
|
+
for _ in range(n):
|
99
|
+
out = (
|
100
|
+
model
|
101
|
+
+ prompt
|
102
|
+
+ gen(
|
103
|
+
name="answer",
|
104
|
+
max_tokens=max_tokens,
|
105
|
+
temperature=temperature,
|
106
|
+
stop=stop,
|
107
|
+
regex=regex,
|
108
|
+
)
|
109
|
+
)
|
110
|
+
rets.append(out["answer"])
|
111
|
+
return rets if n > 1 else rets[0]
|
112
|
+
|
113
|
+
|
114
|
+
async def call_generate_lmql(
|
115
|
+
prompt, temperature, max_tokens, stop=None, n=1, max_len=4096, model=None, **kwargs
|
116
|
+
):
|
117
|
+
assert model is not None
|
118
|
+
import lmql
|
119
|
+
|
120
|
+
if stop != None:
|
121
|
+
|
122
|
+
@lmql.query(model=model)
|
123
|
+
async def program(question, max_tokens, stop):
|
124
|
+
'''lmql
|
125
|
+
"""{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens and STOPS_AT(ANSWER, stop)
|
126
|
+
return ANSWER
|
127
|
+
'''
|
128
|
+
|
129
|
+
else:
|
130
|
+
|
131
|
+
@lmql.query(model=model)
|
132
|
+
async def program(question, max_tokens):
|
133
|
+
'''lmql
|
134
|
+
"""{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens
|
135
|
+
return ANSWER
|
136
|
+
'''
|
137
|
+
|
138
|
+
tasks = [
|
139
|
+
program(
|
140
|
+
question=prompt,
|
141
|
+
temperature=temperature,
|
142
|
+
max_tokens=max_tokens,
|
143
|
+
stop=stop,
|
144
|
+
max_len=max_len,
|
145
|
+
**kwargs,
|
146
|
+
)
|
147
|
+
for _ in range(n)
|
148
|
+
]
|
149
|
+
rets = await asyncio.gather(*tasks)
|
150
|
+
return rets if n > 1 else rets[0]
|
151
|
+
|
152
|
+
|
153
|
+
def call_select_lightllm(context, choices, url=None):
|
154
|
+
assert url is not None
|
155
|
+
|
79
156
|
scores = []
|
80
157
|
for i in range(len(choices)):
|
81
158
|
data = {
|
@@ -90,7 +167,9 @@ def call_select_lightllm(context, choices, url):
|
|
90
167
|
return np.argmax(scores)
|
91
168
|
|
92
169
|
|
93
|
-
def call_select_vllm(context, choices, url):
|
170
|
+
def call_select_vllm(context, choices, url=None):
|
171
|
+
assert url is not None
|
172
|
+
|
94
173
|
scores = []
|
95
174
|
for i in range(len(choices)):
|
96
175
|
data = {
|
@@ -112,6 +191,31 @@ def call_select_vllm(context, choices, url):
|
|
112
191
|
"""
|
113
192
|
|
114
193
|
|
194
|
+
def call_select_guidance(context, choices, model=None):
|
195
|
+
assert model is not None
|
196
|
+
from guidance import select
|
197
|
+
|
198
|
+
out = model + context + select(choices, name="answer")
|
199
|
+
return choices.index(out["answer"])
|
200
|
+
|
201
|
+
|
202
|
+
async def call_select_lmql(context, choices, temperature=0, max_len=4096, model=None):
|
203
|
+
assert model is not None
|
204
|
+
import lmql
|
205
|
+
|
206
|
+
@lmql.query(model=model)
|
207
|
+
async def program(ctx, choices):
|
208
|
+
'''lmql
|
209
|
+
"""{ctx}[ANSWER]""" where ANSWER in set(choices)
|
210
|
+
return ANSWER
|
211
|
+
'''
|
212
|
+
|
213
|
+
answer = await program(
|
214
|
+
ctx=context, choices=choices, temperature=temperature, max_len=max_len
|
215
|
+
)
|
216
|
+
return choices.index(answer)
|
217
|
+
|
218
|
+
|
115
219
|
def add_common_other_args_and_parse(parser):
|
116
220
|
parser.add_argument("--parallel", type=int, default=64)
|
117
221
|
parser.add_argument("--host", type=str, default="http://127.0.0.1")
|
@@ -120,8 +224,17 @@ def add_common_other_args_and_parse(parser):
|
|
120
224
|
"--backend",
|
121
225
|
type=str,
|
122
226
|
required=True,
|
123
|
-
choices=[
|
227
|
+
choices=[
|
228
|
+
"vllm",
|
229
|
+
"outlines",
|
230
|
+
"lightllm",
|
231
|
+
"guidance",
|
232
|
+
"lmql",
|
233
|
+
"srt-raw",
|
234
|
+
"llama.cpp",
|
235
|
+
],
|
124
236
|
)
|
237
|
+
parser.add_argument("--n-ctx", type=int, default=4096)
|
125
238
|
parser.add_argument(
|
126
239
|
"--model-path", type=str, default="meta-llama/Llama-2-7b-chat-hf"
|
127
240
|
)
|
@@ -131,6 +244,7 @@ def add_common_other_args_and_parse(parser):
|
|
131
244
|
if args.port is None:
|
132
245
|
default_port = {
|
133
246
|
"vllm": 21000,
|
247
|
+
"outlines": 21000,
|
134
248
|
"lightllm": 22000,
|
135
249
|
"lmql": 23000,
|
136
250
|
"srt-raw": 30000,
|
@@ -160,3 +274,77 @@ def select_sglang_backend(args):
|
|
160
274
|
else:
|
161
275
|
raise ValueError(f"Invalid backend: {args.backend}")
|
162
276
|
return backend
|
277
|
+
|
278
|
+
|
279
|
+
def _get_call_generate(args):
|
280
|
+
if args.backend == "lightllm":
|
281
|
+
return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
|
282
|
+
elif args.backend == "vllm":
|
283
|
+
return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate")
|
284
|
+
elif args.backend == "srt-raw":
|
285
|
+
return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate")
|
286
|
+
elif args.backend == "outlines":
|
287
|
+
return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate")
|
288
|
+
elif args.backend == "guidance":
|
289
|
+
from guidance import models
|
290
|
+
|
291
|
+
model = models.LlamaCpp(args.model_path, n_gpu_layers=-1, n_ctx=args.n_ctx)
|
292
|
+
call_generate = partial(call_generate_guidance, model=model)
|
293
|
+
call_generate("Hello,", 1.0, 8, ".")
|
294
|
+
return call_generate
|
295
|
+
elif args.backend == "lmql":
|
296
|
+
import lmql
|
297
|
+
|
298
|
+
model = lmql.model(args.model_path, endpoint=f"{args.host}:{args.port}")
|
299
|
+
return partial(call_generate_lmql, model=model)
|
300
|
+
else:
|
301
|
+
raise ValueError(f"Invalid backend: {args.backend}")
|
302
|
+
|
303
|
+
|
304
|
+
def _get_call_select(args):
|
305
|
+
if args.backend == "lightllm":
|
306
|
+
return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
|
307
|
+
elif args.backend == "vllm":
|
308
|
+
return partial(call_select_vllm, url=f"{args.host}:{args.port}/generate")
|
309
|
+
elif args.backend == "guidance":
|
310
|
+
from guidance import models
|
311
|
+
|
312
|
+
model = models.LlamaCpp(args.model_path, n_gpu_layers=-1, n_ctx=args.n_ctx)
|
313
|
+
call_select = partial(call_select_guidance, model=model)
|
314
|
+
|
315
|
+
call_select("Hello,", ["world", "earth"])
|
316
|
+
return call_select
|
317
|
+
|
318
|
+
elif args.backend == "lmql":
|
319
|
+
import lmql
|
320
|
+
|
321
|
+
model = lmql.model(args.model_path, endpoint=f"{args.host}:{args.port}")
|
322
|
+
return partial(call_select_lmql, model=model)
|
323
|
+
else:
|
324
|
+
raise ValueError(f"Invalid backend: {args.backend}")
|
325
|
+
|
326
|
+
|
327
|
+
def get_call_generate(args):
|
328
|
+
call_generate = _get_call_generate(args)
|
329
|
+
|
330
|
+
def func(*args, **kwargs):
|
331
|
+
try:
|
332
|
+
return call_generate(*args, **kwargs)
|
333
|
+
except Exception:
|
334
|
+
print("Exception in call_generate:\n" + get_exception_traceback())
|
335
|
+
raise
|
336
|
+
|
337
|
+
return func
|
338
|
+
|
339
|
+
|
340
|
+
def get_call_select(args):
|
341
|
+
call_select = _get_call_select(args)
|
342
|
+
|
343
|
+
def func(*args, **kwargs):
|
344
|
+
try:
|
345
|
+
return call_select(*args, **kwargs)
|
346
|
+
except Exception:
|
347
|
+
print("Exception in call_select:\n" + get_exception_traceback())
|
348
|
+
raise
|
349
|
+
|
350
|
+
return func
|
sglang/utils.py
CHANGED
@@ -2,40 +2,23 @@
|
|
2
2
|
|
3
3
|
import base64
|
4
4
|
import json
|
5
|
+
import os
|
6
|
+
import sys
|
5
7
|
import threading
|
8
|
+
import traceback
|
6
9
|
import urllib.request
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
7
11
|
from io import BytesIO
|
8
12
|
from json import dumps
|
9
13
|
|
14
|
+
import numpy as np
|
10
15
|
import requests
|
11
16
|
|
12
17
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
"""
|
18
|
-
import torch
|
19
|
-
|
20
|
-
num_gpus = torch.cuda.device_count()
|
21
|
-
assert gpu_id < num_gpus
|
22
|
-
|
23
|
-
if torch.cuda.current_device() != gpu_id:
|
24
|
-
print(
|
25
|
-
f"WARNING: current device is not {gpu_id}, but {torch.cuda.current_device()}, ",
|
26
|
-
"which may cause useless memory allocation for torch CUDA context.",
|
27
|
-
)
|
28
|
-
|
29
|
-
free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
|
30
|
-
|
31
|
-
if distributed:
|
32
|
-
tensor = torch.tensor(free_gpu_memory, dtype=torch.float32).to(
|
33
|
-
torch.device("cuda", gpu_id)
|
34
|
-
)
|
35
|
-
torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.MIN)
|
36
|
-
free_gpu_memory = tensor.item()
|
37
|
-
|
38
|
-
return free_gpu_memory / (1 << 30)
|
18
|
+
def get_exception_traceback():
|
19
|
+
etype, value, tb = sys.exc_info()
|
20
|
+
err_str = "".join(traceback.format_exception(etype, value, tb))
|
21
|
+
return err_str
|
39
22
|
|
40
23
|
|
41
24
|
def is_same_type(values):
|
@@ -130,6 +113,74 @@ def encode_image_base64(image_path):
|
|
130
113
|
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
131
114
|
|
132
115
|
|
116
|
+
def encode_frame(frame):
|
117
|
+
import cv2 # pip install opencv-python-headless
|
118
|
+
from PIL import Image
|
119
|
+
|
120
|
+
# Convert the frame to RGB (OpenCV uses BGR by default)
|
121
|
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
122
|
+
|
123
|
+
# Convert the frame to PIL Image to easily convert to bytes
|
124
|
+
im_pil = Image.fromarray(frame)
|
125
|
+
|
126
|
+
# Convert to bytes
|
127
|
+
buffered = BytesIO()
|
128
|
+
|
129
|
+
# frame_format = str(os.getenv('FRAME_FORMAT', "JPEG"))
|
130
|
+
|
131
|
+
im_pil.save(buffered, format="PNG")
|
132
|
+
|
133
|
+
frame_bytes = buffered.getvalue()
|
134
|
+
|
135
|
+
# Return the bytes of the frame
|
136
|
+
return frame_bytes
|
137
|
+
|
138
|
+
|
139
|
+
def encode_video_base64(video_path, num_frames=16):
|
140
|
+
import cv2
|
141
|
+
cap = cv2.VideoCapture(video_path)
|
142
|
+
if not cap.isOpened():
|
143
|
+
raise IOError(f"Could not open video file:{video_path}")
|
144
|
+
|
145
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
146
|
+
print(f"target_frames: {num_frames}")
|
147
|
+
|
148
|
+
frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
|
149
|
+
|
150
|
+
frames = []
|
151
|
+
for i in range(total_frames):
|
152
|
+
ret, frame = cap.read()
|
153
|
+
if ret:
|
154
|
+
frames.append(frame)
|
155
|
+
else:
|
156
|
+
# Handle the case where the frame could not be read
|
157
|
+
# print(f"Warning: Could not read frame at index {i}.")
|
158
|
+
pass
|
159
|
+
|
160
|
+
cap.release()
|
161
|
+
|
162
|
+
# Safely select frames based on frame_indices, avoiding IndexError
|
163
|
+
frames = [frames[i] for i in frame_indices if i < len(frames)]
|
164
|
+
|
165
|
+
# If there are not enough frames, duplicate the last frame until we reach the target
|
166
|
+
while len(frames) < num_frames:
|
167
|
+
frames.append(frames[-1])
|
168
|
+
|
169
|
+
# Use ThreadPoolExecutor to process and encode frames in parallel
|
170
|
+
with ThreadPoolExecutor() as executor:
|
171
|
+
encoded_frames = list(executor.map(encode_frame, frames))
|
172
|
+
|
173
|
+
# encoded_frames = list(map(encode_frame, frames))
|
174
|
+
|
175
|
+
# Concatenate all frames bytes
|
176
|
+
video_bytes = b"".join(encoded_frames)
|
177
|
+
|
178
|
+
# Encode the concatenated bytes to base64
|
179
|
+
video_base64 = "video:" + base64.b64encode(video_bytes).decode("utf-8")
|
180
|
+
|
181
|
+
return video_base64
|
182
|
+
|
183
|
+
|
133
184
|
def _is_chinese_char(cp):
|
134
185
|
"""Checks whether CP is the codepoint of a CJK character."""
|
135
186
|
# This defines a "chinese character" as anything in the CJK Unicode block:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: A structured generation langauge for LLMs.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -212,6 +212,7 @@ Requires-Python: >=3.8
|
|
212
212
|
Description-Content-Type: text/markdown
|
213
213
|
License-File: LICENSE
|
214
214
|
Requires-Dist: requests
|
215
|
+
Requires-Dist: tqdm
|
215
216
|
Provides-Extra: all
|
216
217
|
Requires-Dist: sglang[srt] ; extra == 'all'
|
217
218
|
Requires-Dist: sglang[openai] ; extra == 'all'
|
@@ -222,6 +223,7 @@ Requires-Dist: numpy ; extra == 'anthropic'
|
|
222
223
|
Provides-Extra: openai
|
223
224
|
Requires-Dist: openai >=1.0 ; extra == 'openai'
|
224
225
|
Requires-Dist: numpy ; extra == 'openai'
|
226
|
+
Requires-Dist: tiktoken ; extra == 'openai'
|
225
227
|
Provides-Extra: srt
|
226
228
|
Requires-Dist: aiohttp ; extra == 'srt'
|
227
229
|
Requires-Dist: fastapi ; extra == 'srt'
|
@@ -231,16 +233,14 @@ Requires-Dist: torch ; extra == 'srt'
|
|
231
233
|
Requires-Dist: uvloop ; extra == 'srt'
|
232
234
|
Requires-Dist: uvicorn ; extra == 'srt'
|
233
235
|
Requires-Dist: zmq ; extra == 'srt'
|
234
|
-
Requires-Dist: vllm >=0.
|
236
|
+
Requires-Dist: vllm >=0.4.2 ; extra == 'srt'
|
235
237
|
Requires-Dist: interegular ; extra == 'srt'
|
236
|
-
Requires-Dist: lark ; extra == 'srt'
|
237
|
-
Requires-Dist: numba ; extra == 'srt'
|
238
238
|
Requires-Dist: pydantic ; extra == 'srt'
|
239
|
-
Requires-Dist: referencing ; extra == 'srt'
|
240
|
-
Requires-Dist: diskcache ; extra == 'srt'
|
241
|
-
Requires-Dist: cloudpickle ; extra == 'srt'
|
242
239
|
Requires-Dist: pillow ; extra == 'srt'
|
243
|
-
Requires-Dist:
|
240
|
+
Requires-Dist: packaging ; extra == 'srt'
|
241
|
+
Requires-Dist: huggingface-hub ; extra == 'srt'
|
242
|
+
Requires-Dist: hf-transfer ; extra == 'srt'
|
243
|
+
Requires-Dist: outlines >=0.0.34 ; extra == 'srt'
|
244
244
|
|
245
245
|
<div align="center">
|
246
246
|
<img src="assets/logo.png" alt="logo" width="400"></img>
|
@@ -541,7 +541,6 @@ curl http://localhost:30000/generate \
|
|
541
541
|
Learn more about the argument format [here](docs/sampling_params.md).
|
542
542
|
|
543
543
|
### OpenAI Compatible API
|
544
|
-
|
545
544
|
In addition, the server supports an experimental OpenAI-compatible API.
|
546
545
|
|
547
546
|
```python
|
@@ -571,15 +570,17 @@ response = client.chat.completions.create(
|
|
571
570
|
print(response)
|
572
571
|
```
|
573
572
|
|
574
|
-
|
575
|
-
|
573
|
+
|
574
|
+
By default, the server uses the chat template specified in the model tokenizer from Hugging Face. It should just work for most official models such as Llama-2/Llama-3.
|
575
|
+
|
576
|
+
If needed, you can also override the chat template when launching the server:
|
576
577
|
|
577
578
|
```
|
578
579
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --chat-template llama-2
|
579
580
|
```
|
580
581
|
|
581
582
|
If the chat template you are looking for is missing, you are welcome to contribute it.
|
582
|
-
Meanwhile, you can also
|
583
|
+
Meanwhile, you can also temporarily register your chat template as follows:
|
583
584
|
|
584
585
|
```json
|
585
586
|
{
|
@@ -606,7 +607,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
|
|
606
607
|
```
|
607
608
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7
|
608
609
|
```
|
609
|
-
- You can turn on [flashinfer](docs/flashinfer.md) to
|
610
|
+
- You can turn on [flashinfer](docs/flashinfer.md) to accelerate the inference by using highly optimized CUDA kernels.
|
610
611
|
|
611
612
|
### Supported Models
|
612
613
|
- Llama
|
@@ -622,10 +623,14 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
|
|
622
623
|
- `python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.6-34b --tokenizer-path liuhaotian/llava-v1.6-34b-tokenizer --port 3000`
|
623
624
|
- Yi-VL
|
624
625
|
- see [srt_example_yi_vl.py](examples/quick_start/srt_example_yi_vl.py).
|
625
|
-
-
|
626
|
+
- StableLM
|
627
|
+
- Command-R
|
628
|
+
- DBRX
|
629
|
+
- AWQ/GPTQ/Marlin quantization
|
626
630
|
|
627
|
-
|
631
|
+
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
|
628
632
|
|
633
|
+
## Benchmark And Performance
|
629
634
|
- Llama-7B on NVIDIA A10G, FP16, Tensor Parallelism=1
|
630
635
|

|
631
636
|
|
@@ -649,7 +654,4 @@ https://github.com/sgl-project/sglang/issues/157
|
|
649
654
|
}
|
650
655
|
```
|
651
656
|
|
652
|
-
[](https://huggingface.co/papers/2312.07104)
|
653
|
-
|
654
|
-
|
655
657
|
We learned from the design and reused some code of the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), [LMQL](https://github.com/eth-sri/lmql).
|
@@ -0,0 +1,72 @@
|
|
1
|
+
sglang/__init__.py,sha256=lKabCNZM2OhtymVLUuW4bpt-Jdxwk81wP1TkhVqIJEg,1058
|
2
|
+
sglang/api.py,sha256=hnVPt_p2ALLrraAKpVbkGocVtgb0MqgOH5NUQKOA6sY,4548
|
3
|
+
sglang/global_config.py,sha256=LxoF7VGCYszeEafC8zBbzUQ5PPFdv2rPzw2zEGPLgfg,961
|
4
|
+
sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
|
5
|
+
sglang/launch_server_llavavid.py,sha256=UWo_qUCJ9yknp1TVPzrz4B_aZtEuQpLQq0l96FMgynI,1058
|
6
|
+
sglang/utils.py,sha256=Xp5mmhLoXNLB5U0NmCg-WMkfV0Ov4KVqzOvGZa3XKmc,7610
|
7
|
+
sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
|
9
|
+
sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
10
|
+
sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
|
11
|
+
sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
|
12
|
+
sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
|
13
|
+
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
sglang/lang/chat_template.py,sha256=ogIT8iMlDcSEgcNBTh5pRLoCkdQI_ec5Hc27wFUFDIg,11532
|
15
|
+
sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
|
16
|
+
sglang/lang/interpreter.py,sha256=GSIbO9N6ThfchdURb7XzQMZ9U6p1xirKHgXGmqLxKtg,28434
|
17
|
+
sglang/lang/ir.py,sha256=NxvIWlUidvtpQpPG4GAXZEN64Y2vLOBjN2Z2JkZVG1U,13350
|
18
|
+
sglang/lang/tracer.py,sha256=QcslAObEjepk8XmiqCobwzWaDpihofEQXjeRs_3B8NQ,8282
|
19
|
+
sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
|
20
|
+
sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
|
21
|
+
sglang/srt/flush_cache.py,sha256=JOXLH4pmweVbuEWDPu3SEDrLYFG82nR2SpzbslW4b-A,381
|
22
|
+
sglang/srt/hf_transformers_utils.py,sha256=UneOMsw3w7taH9EKIi6uHZ-GNUZG0vbZIWN-ZoQZ5gM,5417
|
23
|
+
sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
|
24
|
+
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
25
|
+
sglang/srt/model_config.py,sha256=843L1KxEPZcEk1uwQH10BwSX9L5DYJ3OGUUBo8wMdZg,1695
|
26
|
+
sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
|
27
|
+
sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
|
28
|
+
sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
|
29
|
+
sglang/srt/server.py,sha256=YAUiniJs9ebNrJ0Lweg2TnUL_yZ0P3PtWoT0Z_3d8vk,10371
|
30
|
+
sglang/srt/server_args.py,sha256=TQxIEdF0crqtY6WfZ6q7SKOQcCSomBEVjJ5K4HyTSvQ,9539
|
31
|
+
sglang/srt/utils.py,sha256=cr2uZmEB-Exq-wi3Y8B3yQu7kFUiyV4PAvzouvKYkWg,13090
|
32
|
+
sglang/srt/weight_utils.py,sha256=bFNh9-T8gseB0zKeu1qsMww8FpyrGFxbPcOFSeJtL5Q,15505
|
33
|
+
sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
|
34
|
+
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
35
|
+
sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
|
36
|
+
sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
|
37
|
+
sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
|
38
|
+
sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
|
39
|
+
sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
|
40
|
+
sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
|
41
|
+
sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
|
42
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
|
43
|
+
sglang/srt/managers/io_struct.py,sha256=fFfUQtC-D31xGYdCAfuNVuX3QyaNDgGpfzC8qnKt0YA,4294
|
44
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=TlGyFhWz1b24vkeUVvCwKFBERffi-esxGRhoukBnET8,13116
|
45
|
+
sglang/srt/managers/router/infer_batch.py,sha256=a1F3EjSBdER5pbgZFifuTdrE2Xom8Mt4aT9rmB8n35M,20204
|
46
|
+
sglang/srt/managers/router/manager.py,sha256=tdvYmwGHMeG2MMYZ4ZThdAJ_b4fp94UpemISFWOddno,2697
|
47
|
+
sglang/srt/managers/router/model_rpc.py,sha256=FJFgf1KAJ0Z8Yq4EPyczxZkCmZBjwNwCwXcjwyhU0k4,29775
|
48
|
+
sglang/srt/managers/router/model_runner.py,sha256=fp9wPh4sQY6Q-5PVtv_e9p5GgkkixSDUIqfFt7lVlV8,16527
|
49
|
+
sglang/srt/managers/router/radix_cache.py,sha256=GE6oY8bppRJCIxZWiDKO4P6al58zcqLQe605Y1d2bdo,7924
|
50
|
+
sglang/srt/managers/router/scheduler.py,sha256=pvlKSyCyIXmu14eyy1mvP9-QdG78eLUqMlr4cnfes2Y,2259
|
51
|
+
sglang/srt/models/commandr.py,sha256=DVdUF5C5etm82RoXJTNjYqlS2W2_9awzxzXNMubRoVg,13579
|
52
|
+
sglang/srt/models/dbrx.py,sha256=NIhlJp2B_y_L1ltK_Y7SEenAiHTUUp3p1rf8LIydC0o,14173
|
53
|
+
sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
|
54
|
+
sglang/srt/models/gemma.py,sha256=Wk25zFkqkdG62xVVJEzeIjDES1LnoO0EY2W2p9XMvbA,11637
|
55
|
+
sglang/srt/models/llama2.py,sha256=Y2XwS5XXG77OfPAvbju7zp53CP5izzee_4-laVqu5ZM,11655
|
56
|
+
sglang/srt/models/llava.py,sha256=HtR7lUnAYW39vWw6xmDZkbG7AueswZDJxXeu6rQfpSU,14921
|
57
|
+
sglang/srt/models/llavavid.py,sha256=ueImEwOR4ZlNFUoBvXbwZPNRcrYWg54sPNK7pmGnrp0,13219
|
58
|
+
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
59
|
+
sglang/srt/models/mixtral.py,sha256=1aggGw0P0MVQu5C5D3pMaZpRpY_PmrK_nwBOygOlPEM,13839
|
60
|
+
sglang/srt/models/qwen.py,sha256=cakvxjghKdGg5iGq9TJ_nGlVQaJ4-9V91EyyZnV4rmc,9390
|
61
|
+
sglang/srt/models/qwen2.py,sha256=PyOA8-RA_frRVLXfh8d1Ui1hUd1YmM3GfsPw2q5rCDI,11351
|
62
|
+
sglang/srt/models/stablelm.py,sha256=TCfQumj0acu2lCGujJj_PuzHFp3kFIwENQEfT-hnHUA,10867
|
63
|
+
sglang/srt/models/yivl.py,sha256=q8MUvIFIWpKCQ4pSZBoFpw-pnbdjkfr-M8jBJfGFu7E,4393
|
64
|
+
sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
|
65
|
+
sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
|
66
|
+
sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
|
67
|
+
sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
|
68
|
+
sglang-0.1.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
sglang-0.1.16.dist-info/METADATA,sha256=yiziPDpVr6NPPhX58sA0GaLYKCut4FnBKD7TE50HH6k,28911
|
70
|
+
sglang-0.1.16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
71
|
+
sglang-0.1.16.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
72
|
+
sglang-0.1.16.dist-info/RECORD,,
|
sglang-0.1.14.dist-info/RECORD
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
sglang/__init__.py,sha256=Nxa2M7XCh2-e6I7VrCg7OSBL6BvEW3gyRD14ZdykpRM,96
|
2
|
-
sglang/api.py,sha256=0-Eh7c41hWKjPXrzzvLFdLAUVkvmPGJGLAsrG9evDTE,4576
|
3
|
-
sglang/global_config.py,sha256=PAX7TWeFcq0HBzNUWyCONAOjqIokWqw8vT7I6sBSKTc,797
|
4
|
-
sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
|
5
|
-
sglang/utils.py,sha256=2dUXLMPz9VhhzbIRQABmfZnVW5yz61F3UVtb6yKyevM,6237
|
6
|
-
sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
sglang/backend/anthropic.py,sha256=GJ_T1Jg0VOtajgkgczPKt5sjuVYdbAiWd2jXlJRNRmg,1677
|
8
|
-
sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
|
9
|
-
sglang/backend/openai.py,sha256=nPdA88A5GISJTH88svJdww3qHWIHZcGG2NEn0XjMkLU,9578
|
10
|
-
sglang/backend/runtime_endpoint.py,sha256=r7dTazselaudlFx8hqk-PQLYDHZhpbAKjyFF1zLuM_E,8022
|
11
|
-
sglang/backend/vertexai.py,sha256=BLfWf_tEgoHY9srCufJM5PLe3tql2j0G6ia7cPykxCM,4713
|
12
|
-
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
sglang/lang/chat_template.py,sha256=MaCF0fvNky0nJC9OvmAeApeHYgM6Lr03mtRhF0lS31U,8000
|
14
|
-
sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
|
15
|
-
sglang/lang/interpreter.py,sha256=ahRxuEJZ7b1Tts2Lr7wViWIqL-Z12T3anvgj0XdvMN8,26666
|
16
|
-
sglang/lang/ir.py,sha256=8Ap-uEUz6K9eNQTOKtMixePuLwRFHFKcN0Z5Yn44nKk,13320
|
17
|
-
sglang/lang/tracer.py,sha256=pFiSNzPSg0l7ZZIlGqJDLCmQALR-wyo2dFgJP73J4_Y,8260
|
18
|
-
sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
|
19
|
-
sglang/srt/conversation.py,sha256=mTstD-SsXG5p_YhWQUPEWU-vzzDMF4RgQ7KmLkOOC7U,15496
|
20
|
-
sglang/srt/hf_transformers_utils.py,sha256=soRyYLoCn7GxgxvonufGFkdFBA3eH5i3Izk_wi7p1l0,5285
|
21
|
-
sglang/srt/memory_pool.py,sha256=BMoX2wvicj214mV-xvcr_Iv_Je0qs3zTuzXfQVpV8u4,3609
|
22
|
-
sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
|
23
|
-
sglang/srt/model_config.py,sha256=ned-odjmKBKBhVPo04FEpus9gJsUWxrFLrLxahLwSaw,1328
|
24
|
-
sglang/srt/sampling_params.py,sha256=83Fp-4HWThC20TEh139XcIb_erBqfI7KZg5txdRBq7c,2896
|
25
|
-
sglang/srt/server.py,sha256=WLXissKuXQI7JFb2V8D47QSF-PPHnW-JZCiQm4YW0xE,24070
|
26
|
-
sglang/srt/server_args.py,sha256=bvbi-Rb_JudqztFFfRsuXBYtUsG9hq4zMFt7X97uDhA,8954
|
27
|
-
sglang/srt/utils.py,sha256=IEqpmWx_hl4eXn_KoHM0EPXmxeN2wKkgK7H01_t0x5Q,7355
|
28
|
-
sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
|
29
|
-
sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
|
30
|
-
sglang/srt/constrained/fsm_cache.py,sha256=20mEgtDXU1Zeoicl5KBQC3arkg-RhRWiYnchJc00m1g,901
|
31
|
-
sglang/srt/constrained/jump_forward.py,sha256=Z-pz2Jnvk1CxSEZA65OVq0GryqdiKuOkhhc13v5T6Lo,2482
|
32
|
-
sglang/srt/layers/context_flashattention_nopad.py,sha256=TVYQ6IjftWVXORmKpEROMqQxDOnF6n2g0G1Ci4LquYM,5209
|
33
|
-
sglang/srt/layers/extend_attention.py,sha256=KGqQOA5mel9qScXMAQP_3Qyhp3BNbiQ7Y_6wi38Lxcs,12622
|
34
|
-
sglang/srt/layers/logits_processor.py,sha256=MW2bpqSXyghODMojqeMSYWZhUHuAFPk_gUkyyLw9HkM,4827
|
35
|
-
sglang/srt/layers/radix_attention.py,sha256=bqrb8H8K8RbKTr1PzVmpnUxRzMj0H-OWCi1JYZKuRDw,5597
|
36
|
-
sglang/srt/layers/token_attention.py,sha256=waOjGsWZlvf6epFhYerRJlAaMwvDTy_Z3uzPaXsVQUU,8516
|
37
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=1lPNh_Pe6Pr0v-TzlCBBREbvz4uFWxyw31SmnEZh0s8,3292
|
38
|
-
sglang/srt/managers/io_struct.py,sha256=nXJh3CrOvv9MdAfIFoo6SCXuNQTG3KswmRKkwF61Tek,3141
|
39
|
-
sglang/srt/managers/openai_protocol.py,sha256=cttqg9iv3de8fhtCqDI4cYoPPZ_gULedMXstV1ok6WA,4563
|
40
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=hgsR9AMj6ic9S3-2WiELh7Hnp8Xnb_bzp7kpbjHwHtM,9733
|
41
|
-
sglang/srt/managers/router/infer_batch.py,sha256=U-Ckt9ad1WaOQF_dW6Eo9AMIRQoOJQ-Pm-MMXnEmPP8,18399
|
42
|
-
sglang/srt/managers/router/manager.py,sha256=TNYs0IrkZGkPvZJViwL7BMUg0VlvzeyTjDMjuvRoMDI,2529
|
43
|
-
sglang/srt/managers/router/model_rpc.py,sha256=VlwLNpHZ92bnteQl4PhVKoAXM0C8Y4_2LBBVaffeu3g,26766
|
44
|
-
sglang/srt/managers/router/model_runner.py,sha256=-wWv00EbB_UkkLpio6VKGBTagfzxLHfY-eKDDQ0rZQc,18292
|
45
|
-
sglang/srt/managers/router/radix_cache.py,sha256=XGUF5mxQTSCzD7GW_ltNP2p5aelEKrMXzdezufJ7NCQ,6484
|
46
|
-
sglang/srt/managers/router/scheduler.py,sha256=V-LAnVSzgD2ddy2eXW3jWURCeq9Lv7YxCGk4kHyytfM,2818
|
47
|
-
sglang/srt/models/gemma.py,sha256=8XlfHPtVixPYYjz5F9T4DOAuoordWFStmyFFWGfny1k,11582
|
48
|
-
sglang/srt/models/llama2.py,sha256=VL4iN8R3wyTNr0bDxxKdLNnVGEvdXF6iGvA768YeakA,11611
|
49
|
-
sglang/srt/models/llava.py,sha256=42sn-AgI-6dMaTEU4aEbi4Js5epy0J3JVQoMooUOKt8,14922
|
50
|
-
sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
|
51
|
-
sglang/srt/models/mixtral.py,sha256=wqIwKfR90ih0gDiTZkFZcQD4PIYpZFD3CmzxRcuKIqw,13915
|
52
|
-
sglang/srt/models/qwen.py,sha256=CvdbcF90aI1tJPSQ-3OMUaQGMuaxCGe0y29m5nU_Yj0,9225
|
53
|
-
sglang/srt/models/qwen2.py,sha256=myPc0wvgf5ZzJyGhUGN49YjY-tMf4t8Jn_Imjg8D7Mk,11307
|
54
|
-
sglang/srt/models/stablelm.py,sha256=vMZUNgwXKPGYr5FcdYHw5g3QifVu9owKqq51_-EBOY0,10817
|
55
|
-
sglang/srt/models/yivl.py,sha256=Qvp-zQ93cOZGg3zVyaiQLhRsfXiLrQhxu9TyQP2FMm4,4414
|
56
|
-
sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
|
57
|
-
sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
|
58
|
-
sglang/test/test_programs.py,sha256=mrLhGuprwvx8ZJ-0Qe28E-iCw5Qv-9T0SAv1Jgo1AJw,11421
|
59
|
-
sglang/test/test_utils.py,sha256=6PhTRi8UnR-BRNjit6aGu0M5lO0RebNQwEcDt712hE4,4830
|
60
|
-
sglang-0.1.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
61
|
-
sglang-0.1.14.dist-info/METADATA,sha256=C5N0VOYRHixdJcsf4dExIvP-Q099kYBMKs_dA4LBXSM,28809
|
62
|
-
sglang-0.1.14.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
63
|
-
sglang-0.1.14.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
64
|
-
sglang-0.1.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|