sglang 0.3.3.post1__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +28 -10
- sglang/bench_server_latency.py +21 -10
- sglang/bench_serving.py +101 -7
- sglang/global_config.py +0 -1
- sglang/srt/layers/attention/__init__.py +27 -5
- sglang/srt/layers/attention/double_sparsity_backend.py +281 -0
- sglang/srt/layers/attention/flashinfer_backend.py +352 -83
- sglang/srt/layers/attention/triton_backend.py +6 -4
- sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +772 -0
- sglang/srt/layers/attention/triton_ops/extend_attention.py +5 -3
- sglang/srt/layers/attention/triton_ops/prefill_attention.py +4 -2
- sglang/srt/layers/sampler.py +6 -2
- sglang/srt/managers/detokenizer_manager.py +31 -10
- sglang/srt/managers/io_struct.py +4 -0
- sglang/srt/managers/schedule_batch.py +120 -43
- sglang/srt/managers/schedule_policy.py +2 -1
- sglang/srt/managers/scheduler.py +202 -140
- sglang/srt/managers/tokenizer_manager.py +5 -1
- sglang/srt/managers/tp_worker.py +111 -1
- sglang/srt/mem_cache/chunk_cache.py +8 -4
- sglang/srt/mem_cache/memory_pool.py +77 -4
- sglang/srt/mem_cache/radix_cache.py +15 -7
- sglang/srt/model_executor/cuda_graph_runner.py +4 -4
- sglang/srt/model_executor/forward_batch_info.py +16 -21
- sglang/srt/model_executor/model_runner.py +60 -1
- sglang/srt/models/baichuan.py +2 -3
- sglang/srt/models/chatglm.py +5 -6
- sglang/srt/models/commandr.py +1 -2
- sglang/srt/models/dbrx.py +1 -2
- sglang/srt/models/deepseek.py +4 -5
- sglang/srt/models/deepseek_v2.py +5 -6
- sglang/srt/models/exaone.py +1 -2
- sglang/srt/models/gemma.py +2 -2
- sglang/srt/models/gemma2.py +5 -5
- sglang/srt/models/gpt_bigcode.py +5 -5
- sglang/srt/models/grok.py +1 -2
- sglang/srt/models/internlm2.py +1 -2
- sglang/srt/models/llama.py +1 -2
- sglang/srt/models/llama_classification.py +1 -2
- sglang/srt/models/llama_reward.py +2 -3
- sglang/srt/models/llava.py +4 -8
- sglang/srt/models/llavavid.py +1 -2
- sglang/srt/models/minicpm.py +1 -2
- sglang/srt/models/minicpm3.py +5 -6
- sglang/srt/models/mixtral.py +1 -2
- sglang/srt/models/mixtral_quant.py +1 -2
- sglang/srt/models/olmo.py +352 -0
- sglang/srt/models/olmoe.py +1 -2
- sglang/srt/models/qwen.py +1 -2
- sglang/srt/models/qwen2.py +1 -2
- sglang/srt/models/qwen2_moe.py +4 -5
- sglang/srt/models/stablelm.py +1 -2
- sglang/srt/models/torch_native_llama.py +1 -2
- sglang/srt/models/xverse.py +1 -2
- sglang/srt/models/xverse_moe.py +4 -5
- sglang/srt/models/yivl.py +1 -2
- sglang/srt/openai_api/adapter.py +92 -49
- sglang/srt/openai_api/protocol.py +10 -2
- sglang/srt/sampling/penaltylib/orchestrator.py +28 -9
- sglang/srt/sampling/sampling_batch_info.py +92 -58
- sglang/srt/sampling/sampling_params.py +2 -0
- sglang/srt/server.py +116 -17
- sglang/srt/server_args.py +121 -45
- sglang/srt/utils.py +11 -3
- sglang/test/few_shot_gsm8k.py +4 -1
- sglang/test/few_shot_gsm8k_engine.py +144 -0
- sglang/test/srt/sampling/penaltylib/utils.py +16 -12
- sglang/version.py +1 -1
- {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/METADATA +72 -29
- {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/RECORD +73 -70
- {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/WHEEL +1 -1
- sglang/srt/layers/attention/flashinfer_utils.py +0 -237
- {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/LICENSE +0 -0
- {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
import argparse
|
2
|
+
import ast
|
3
|
+
import asyncio
|
4
|
+
import json
|
5
|
+
import re
|
6
|
+
import time
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
import sglang as sgl
|
11
|
+
from sglang.api import set_default_backend
|
12
|
+
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
13
|
+
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
14
|
+
|
15
|
+
INVALID = -9999999
|
16
|
+
|
17
|
+
|
18
|
+
def get_one_example(lines, i, include_answer):
|
19
|
+
ret = "Question: " + lines[i]["question"] + "\nAnswer:"
|
20
|
+
if include_answer:
|
21
|
+
ret += " " + lines[i]["answer"]
|
22
|
+
return ret
|
23
|
+
|
24
|
+
|
25
|
+
def get_few_shot_examples(lines, k):
|
26
|
+
ret = ""
|
27
|
+
for i in range(k):
|
28
|
+
ret += get_one_example(lines, i, True) + "\n\n"
|
29
|
+
return ret
|
30
|
+
|
31
|
+
|
32
|
+
def get_answer_value(answer_str):
|
33
|
+
answer_str = answer_str.replace(",", "")
|
34
|
+
numbers = re.findall(r"\d+", answer_str)
|
35
|
+
if len(numbers) < 1:
|
36
|
+
return INVALID
|
37
|
+
try:
|
38
|
+
return ast.literal_eval(numbers[-1])
|
39
|
+
except SyntaxError:
|
40
|
+
return INVALID
|
41
|
+
|
42
|
+
|
43
|
+
async def concurrent_generate(engine, prompts, sampling_param):
|
44
|
+
tasks = []
|
45
|
+
for prompt in prompts:
|
46
|
+
tasks.append(asyncio.create_task(engine.async_generate(prompt, sampling_param)))
|
47
|
+
|
48
|
+
outputs = await asyncio.gather(*tasks)
|
49
|
+
return outputs
|
50
|
+
|
51
|
+
|
52
|
+
def run_eval(args):
|
53
|
+
# Select backend
|
54
|
+
engine = sgl.Engine(model_path=args.model_path, log_level="error")
|
55
|
+
|
56
|
+
if args.local_data_path is None:
|
57
|
+
# Read data
|
58
|
+
url = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl"
|
59
|
+
filename = download_and_cache_file(url)
|
60
|
+
else:
|
61
|
+
filename = args.local_data_path
|
62
|
+
|
63
|
+
lines = list(read_jsonl(filename))
|
64
|
+
|
65
|
+
# Construct prompts
|
66
|
+
num_questions = args.num_questions
|
67
|
+
num_shots = args.num_shots
|
68
|
+
few_shot_examples = get_few_shot_examples(lines, num_shots)
|
69
|
+
|
70
|
+
questions = []
|
71
|
+
labels = []
|
72
|
+
for i in range(len(lines[:num_questions])):
|
73
|
+
questions.append(get_one_example(lines, i, False))
|
74
|
+
labels.append(get_answer_value(lines[i]["answer"]))
|
75
|
+
assert all(l != INVALID for l in labels)
|
76
|
+
arguments = [{"question": q} for q in questions]
|
77
|
+
|
78
|
+
# construct the prompts
|
79
|
+
prompts = []
|
80
|
+
for i, arg in enumerate(arguments):
|
81
|
+
q = arg["question"]
|
82
|
+
prompt = few_shot_examples + q
|
83
|
+
prompts.append(prompt)
|
84
|
+
|
85
|
+
sampling_param = {
|
86
|
+
"stop": ["Question", "Assistant:", "<|separator|>"],
|
87
|
+
"max_new_tokens": 512,
|
88
|
+
"temperature": 0,
|
89
|
+
}
|
90
|
+
|
91
|
+
# Run requests
|
92
|
+
tic = time.time()
|
93
|
+
|
94
|
+
loop = asyncio.get_event_loop()
|
95
|
+
|
96
|
+
outputs = loop.run_until_complete(
|
97
|
+
concurrent_generate(engine, prompts, sampling_param)
|
98
|
+
)
|
99
|
+
|
100
|
+
# End requests
|
101
|
+
latency = time.time() - tic
|
102
|
+
|
103
|
+
# Shutdown the engine
|
104
|
+
engine.shutdown()
|
105
|
+
|
106
|
+
# Parse output
|
107
|
+
preds = []
|
108
|
+
|
109
|
+
for output in outputs:
|
110
|
+
preds.append(get_answer_value(output["text"]))
|
111
|
+
|
112
|
+
# Compute accuracy
|
113
|
+
acc = np.mean(np.array(preds) == np.array(labels))
|
114
|
+
invalid = np.mean(np.array(preds) == INVALID)
|
115
|
+
|
116
|
+
# Compute speed
|
117
|
+
num_output_tokens = sum(
|
118
|
+
output["meta_info"]["completion_tokens"] for output in outputs
|
119
|
+
)
|
120
|
+
output_throughput = num_output_tokens / latency
|
121
|
+
|
122
|
+
# Print results
|
123
|
+
print(f"Accuracy: {acc:.3f}")
|
124
|
+
print(f"Invalid: {invalid:.3f}")
|
125
|
+
print(f"Latency: {latency:.3f} s")
|
126
|
+
print(f"Output throughput: {output_throughput:.3f} token/s")
|
127
|
+
|
128
|
+
return {
|
129
|
+
"accuracy": acc,
|
130
|
+
"latency": latency,
|
131
|
+
"output_throughput": output_throughput,
|
132
|
+
}
|
133
|
+
|
134
|
+
|
135
|
+
if __name__ == "__main__":
|
136
|
+
parser = argparse.ArgumentParser()
|
137
|
+
parser.add_argument(
|
138
|
+
"--model-path", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct"
|
139
|
+
)
|
140
|
+
parser.add_argument("--local-data-path", type=Optional[str], default=None)
|
141
|
+
parser.add_argument("--num-shots", type=int, default=5)
|
142
|
+
parser.add_argument("--num-questions", type=int, default=200)
|
143
|
+
args = parser.parse_args()
|
144
|
+
metrics = run_eval(args)
|
@@ -164,19 +164,20 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
164
164
|
msg=f"key={key}\nactual={getattr(penalizer, key)}\nexpected={tensor}",
|
165
165
|
)
|
166
166
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
device=self.device,
|
172
|
-
)
|
167
|
+
original = torch.ones(
|
168
|
+
size=(len(case.test_subjects), self.vocab_size),
|
169
|
+
dtype=torch.float32,
|
170
|
+
device=self.device,
|
173
171
|
)
|
172
|
+
actual = orchestrator.apply(original.clone())
|
174
173
|
expected = torch.cat(
|
175
174
|
tensors=[
|
176
175
|
subject.steps[0].expected_logits
|
177
176
|
for subject in case.test_subjects
|
178
177
|
],
|
179
178
|
)
|
179
|
+
if actual is None:
|
180
|
+
actual = original
|
180
181
|
torch.testing.assert_close(
|
181
182
|
actual=actual,
|
182
183
|
expected=expected,
|
@@ -226,6 +227,8 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
226
227
|
device=self.device,
|
227
228
|
)
|
228
229
|
)
|
230
|
+
if actual_logits is None:
|
231
|
+
continue
|
229
232
|
filtered_expected_logits = torch.cat(
|
230
233
|
tensors=[
|
231
234
|
subject.steps[0].expected_logits
|
@@ -317,19 +320,20 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
|
|
317
320
|
msg=f"key={key}\nactual={getattr(penalizer, key)}\nexpected={tensor}",
|
318
321
|
)
|
319
322
|
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
device=self.device,
|
325
|
-
)
|
323
|
+
original = torch.ones(
|
324
|
+
size=(len(filtered_subjects), self.vocab_size),
|
325
|
+
dtype=torch.float32,
|
326
|
+
device=self.device,
|
326
327
|
)
|
328
|
+
actual_logits = orchestrator.apply(original.clone())
|
327
329
|
filtered_expected_logits = torch.cat(
|
328
330
|
tensors=[
|
329
331
|
subject.steps[i].expected_logits
|
330
332
|
for subject in filtered_subjects
|
331
333
|
],
|
332
334
|
)
|
335
|
+
if actual_logits is None:
|
336
|
+
actual_logits = original
|
333
337
|
torch.testing.assert_close(
|
334
338
|
actual=actual_logits,
|
335
339
|
expected=filtered_expected_logits,
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.3.
|
1
|
+
__version__ = "0.3.4"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -219,36 +219,49 @@ Requires-Dist: sglang[srt]; extra == "all"
|
|
219
219
|
Requires-Dist: sglang[openai]; extra == "all"
|
220
220
|
Requires-Dist: sglang[anthropic]; extra == "all"
|
221
221
|
Requires-Dist: sglang[litellm]; extra == "all"
|
222
|
+
Provides-Extra: all_xpu
|
223
|
+
Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
|
224
|
+
Requires-Dist: sglang[openai]; extra == "all-xpu"
|
225
|
+
Requires-Dist: sglang[anthropic]; extra == "all-xpu"
|
226
|
+
Requires-Dist: sglang[litellm]; extra == "all-xpu"
|
222
227
|
Provides-Extra: anthropic
|
223
228
|
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
224
229
|
Provides-Extra: dev
|
225
230
|
Requires-Dist: sglang[all]; extra == "dev"
|
226
231
|
Requires-Dist: sglang[test]; extra == "dev"
|
232
|
+
Provides-Extra: dev_xpu
|
233
|
+
Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
|
234
|
+
Requires-Dist: sglang[test]; extra == "dev-xpu"
|
227
235
|
Provides-Extra: litellm
|
228
236
|
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
229
237
|
Provides-Extra: openai
|
230
238
|
Requires-Dist: openai>=1.0; extra == "openai"
|
231
239
|
Requires-Dist: tiktoken; extra == "openai"
|
240
|
+
Provides-Extra: runtime_common
|
241
|
+
Requires-Dist: aiohttp; extra == "runtime-common"
|
242
|
+
Requires-Dist: decord; extra == "runtime-common"
|
243
|
+
Requires-Dist: fastapi; extra == "runtime-common"
|
244
|
+
Requires-Dist: hf-transfer; extra == "runtime-common"
|
245
|
+
Requires-Dist: huggingface-hub; extra == "runtime-common"
|
246
|
+
Requires-Dist: interegular; extra == "runtime-common"
|
247
|
+
Requires-Dist: orjson; extra == "runtime-common"
|
248
|
+
Requires-Dist: packaging; extra == "runtime-common"
|
249
|
+
Requires-Dist: pillow; extra == "runtime-common"
|
250
|
+
Requires-Dist: psutil; extra == "runtime-common"
|
251
|
+
Requires-Dist: pydantic; extra == "runtime-common"
|
252
|
+
Requires-Dist: python-multipart; extra == "runtime-common"
|
253
|
+
Requires-Dist: torchao; extra == "runtime-common"
|
254
|
+
Requires-Dist: uvicorn; extra == "runtime-common"
|
255
|
+
Requires-Dist: uvloop; extra == "runtime-common"
|
256
|
+
Requires-Dist: zmq; extra == "runtime-common"
|
257
|
+
Requires-Dist: outlines>=0.0.44; extra == "runtime-common"
|
258
|
+
Requires-Dist: modelscope; extra == "runtime-common"
|
232
259
|
Provides-Extra: srt
|
233
|
-
Requires-Dist:
|
234
|
-
Requires-Dist: decord; extra == "srt"
|
235
|
-
Requires-Dist: fastapi; extra == "srt"
|
236
|
-
Requires-Dist: hf-transfer; extra == "srt"
|
237
|
-
Requires-Dist: huggingface-hub; extra == "srt"
|
238
|
-
Requires-Dist: interegular; extra == "srt"
|
239
|
-
Requires-Dist: packaging; extra == "srt"
|
240
|
-
Requires-Dist: pillow; extra == "srt"
|
241
|
-
Requires-Dist: psutil; extra == "srt"
|
242
|
-
Requires-Dist: pydantic; extra == "srt"
|
243
|
-
Requires-Dist: python-multipart; extra == "srt"
|
260
|
+
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
244
261
|
Requires-Dist: torch; extra == "srt"
|
245
|
-
Requires-Dist: torchao; extra == "srt"
|
246
|
-
Requires-Dist: uvicorn; extra == "srt"
|
247
|
-
Requires-Dist: uvloop; extra == "srt"
|
248
|
-
Requires-Dist: zmq; extra == "srt"
|
249
262
|
Requires-Dist: vllm==0.5.5; extra == "srt"
|
250
|
-
|
251
|
-
Requires-Dist:
|
263
|
+
Provides-Extra: srt_xpu
|
264
|
+
Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
|
252
265
|
Provides-Extra: test
|
253
266
|
Requires-Dist: jsonlines; extra == "test"
|
254
267
|
Requires-Dist: matplotlib; extra == "test"
|
@@ -270,14 +283,13 @@ Requires-Dist: peft; extra == "test"
|
|
270
283
|
|
271
284
|
--------------------------------------------------------------------------------
|
272
285
|
|
273
|
-
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slides**](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_dev_day_v2.
|
274
|
-
|
275
|
-
## Upcoming Events
|
276
|
-
- [Oct. 16, 2024] Online meetup for efficient LLM deployment and serving, co-hosted by SGLang, FlashInfer, and MLC LLM! Fill out the [Google form](https://forms.gle/B3YeedLxmrrhL1NM8) to receive the invite link.
|
286
|
+
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slides**](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_dev_day_v2.pdf) | [**Learn More**](https://github.com/sgl-project/sgl-learning-materials) | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) |
|
287
|
+
[**Join Bi-Weekly Development Meeting (Oct. 19)**](https://calendar.app.google/GYW7S8QGoanCuaxW6) |
|
277
288
|
|
278
289
|
## News
|
279
|
-
- [2024/
|
280
|
-
- [2024/
|
290
|
+
- [2024/10] 🔥 The First SGLang Online Meetup ([slides](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#the-first-sglang-online-meetup)).
|
291
|
+
- [2024/09] SGLang v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
|
292
|
+
- [2024/07] Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
|
281
293
|
- [2024/02] SGLang enables **3x faster JSON decoding** with compressed finite state machine ([blog](https://lmsys.org/blog/2024-02-05-compressed-fsm/)).
|
282
294
|
|
283
295
|
<details>
|
@@ -323,7 +335,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
|
323
335
|
### Method 2: From source
|
324
336
|
```
|
325
337
|
# Use the last release branch
|
326
|
-
git clone -b v0.3.
|
338
|
+
git clone -b v0.3.4 https://github.com/sgl-project/sglang.git
|
327
339
|
cd sglang
|
328
340
|
|
329
341
|
pip install --upgrade pip
|
@@ -500,6 +512,40 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
500
512
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
|
501
513
|
```
|
502
514
|
|
515
|
+
### Engine Without HTTP Server
|
516
|
+
|
517
|
+
We also provide an inference engine **without a HTTP server**. For example,
|
518
|
+
|
519
|
+
```python
|
520
|
+
import sglang as sgl
|
521
|
+
|
522
|
+
|
523
|
+
def main():
|
524
|
+
prompts = [
|
525
|
+
"Hello, my name is",
|
526
|
+
"The president of the United States is",
|
527
|
+
"The capital of France is",
|
528
|
+
"The future of AI is",
|
529
|
+
]
|
530
|
+
sampling_params = {"temperature": 0.8, "top_p": 0.95}
|
531
|
+
llm = sgl.Engine(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
532
|
+
|
533
|
+
outputs = llm.generate(prompts, sampling_params)
|
534
|
+
for prompt, output in zip(prompts, outputs):
|
535
|
+
print("===============================")
|
536
|
+
print(f"Prompt: {prompt}\nGenerated text: {output['text']}")
|
537
|
+
|
538
|
+
if __name__ == "__main__":
|
539
|
+
main()
|
540
|
+
```
|
541
|
+
|
542
|
+
This can be used for:
|
543
|
+
|
544
|
+
1. **Offline Batch Inference**
|
545
|
+
2. **Building Custom Servers**
|
546
|
+
|
547
|
+
You can view the full example [here](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine)
|
548
|
+
|
503
549
|
### Supported Models
|
504
550
|
|
505
551
|
**Generative Models**
|
@@ -836,10 +882,7 @@ def chat_example(s):
|
|
836
882
|
- The `regex` argument in `sgl.gen` is implemented through autoregressive decoding with logit bias masking, according to the constraints set by the regex. It is compatible with `temperature=0` and `temperature != 0`.
|
837
883
|
|
838
884
|
## Benchmark And Performance
|
839
|
-
|
840
|
-

|
841
|
-
|
842
|
-
Learn more at this [blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/).
|
885
|
+
Learn more in our release blogs: [v0.2](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3](https://lmsys.org/blog/2024-09-04-sglang-v0-3/).
|
843
886
|
|
844
887
|
## Roadmap
|
845
888
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
@@ -1,14 +1,14 @@
|
|
1
1
|
sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
|
2
2
|
sglang/api.py,sha256=5x591S4rLbmNPs75qPwGKVu1sonVGDyjPAJlHTyWw50,6956
|
3
|
-
sglang/bench_latency.py,sha256=
|
4
|
-
sglang/bench_server_latency.py,sha256=
|
5
|
-
sglang/bench_serving.py,sha256=
|
3
|
+
sglang/bench_latency.py,sha256=RWSyZ-UhLV6dyPMMtK3nSOoNsjCY5xMpYKeUKRNtdcA,18276
|
4
|
+
sglang/bench_server_latency.py,sha256=2AMPwU2_85q-Btz9UdZC-TnZJPgXcNkydvFYWn2CJlU,5892
|
5
|
+
sglang/bench_serving.py,sha256=jcxNP7reIJPh3x1hG5TCM6wMlDXshjyMJUUjL2O7kzs,40060
|
6
6
|
sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
|
7
|
-
sglang/global_config.py,sha256=
|
7
|
+
sglang/global_config.py,sha256=1r_W9rrBxGCCc2eqESRduOMMNq46e54xLgFLifHuQm0,1014
|
8
8
|
sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
|
9
9
|
sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
|
10
10
|
sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
|
11
|
-
sglang/version.py,sha256=
|
11
|
+
sglang/version.py,sha256=oYLGMpySamd16KLiaBTfRyrAS7_oyp-TOEHmzmeumwg,22
|
12
12
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
|
14
14
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
@@ -26,9 +26,9 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
|
|
26
26
|
sglang/srt/conversation.py,sha256=B4QPGOUVdoXRJwWWxSm5pfifGpuBs07fDTxJ1BHUXLw,20003
|
27
27
|
sglang/srt/hf_transformers_utils.py,sha256=rt6flb6BoYTO8fw7AKCXmQLJx5XuSUuRmZX-VJHmuLQ,6064
|
28
28
|
sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
|
29
|
-
sglang/srt/server.py,sha256=
|
30
|
-
sglang/srt/server_args.py,sha256=
|
31
|
-
sglang/srt/utils.py,sha256=
|
29
|
+
sglang/srt/server.py,sha256=65b39k4FN_TzL8qAimS1mRx8xdO8jmKCdUftOISUv7M,26809
|
30
|
+
sglang/srt/server_args.py,sha256=IDuX8ZCJd_6t2xHf7wNGskVgvpAQtUcFSGBnKFnLf3U,27290
|
31
|
+
sglang/srt/utils.py,sha256=0zalNeGrtrIyfmD7DHLRqocCY1_yNbPCD5hmionHpL0,23071
|
32
32
|
sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
|
33
33
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
34
34
|
sglang/srt/configs/model_config.py,sha256=36My-o44trhWY3KYDeSFMGvv9XuUtIVI5e7F8VlOTWo,6723
|
@@ -42,15 +42,16 @@ sglang/srt/layers/linear.py,sha256=9rjCiSb_QOn5RgpVjIhEKdReRvSYVfcTSjbWBEbApLI,4
|
|
42
42
|
sglang/srt/layers/logits_processor.py,sha256=Fq7VHwjP4iSzl_OBLo8qw_HVbIDbYB-0MGmfiD3Jk_E,12521
|
43
43
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
44
44
|
sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
|
45
|
-
sglang/srt/layers/sampler.py,sha256=
|
45
|
+
sglang/srt/layers/sampler.py,sha256=23wRDw2Fs3wZfPBh6gFBz2vqwxnoDd9LAHWq7YdQWlc,4166
|
46
46
|
sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
|
47
|
-
sglang/srt/layers/attention/__init__.py,sha256=
|
48
|
-
sglang/srt/layers/attention/
|
49
|
-
sglang/srt/layers/attention/
|
50
|
-
sglang/srt/layers/attention/triton_backend.py,sha256=
|
47
|
+
sglang/srt/layers/attention/__init__.py,sha256=hyrPpnuiTs5VIZNyoIjZLRsHUX20gX2dvY9kkqyXIII,2158
|
48
|
+
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=owzPwLWcShZ0ezkVjBr0vV73vtQIUh8z-rcQtXLG1fk,10374
|
49
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=y9saTqOhb_tJoRtjq9sishlQFGYlFkQD7QcV9x_ureo,19554
|
50
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=B6YuIZWh4Zn85Q57i0z3eZ08RCrS0rfyPJzkRr_zOIQ,6150
|
51
51
|
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
|
52
|
-
sglang/srt/layers/attention/triton_ops/
|
53
|
-
sglang/srt/layers/attention/triton_ops/
|
52
|
+
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
53
|
+
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=nEG7iBh1pAy3WaqPdLZwCJwDgyk5HLQ181kBS2nxbwg,11179
|
54
|
+
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=bNHHZeEowwI1wwOWj2T6bjBTBtVZUbcL-0cgfZwpHek,5471
|
54
55
|
sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
|
55
56
|
sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
|
56
57
|
sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
|
@@ -61,65 +62,67 @@ sglang/srt/lora/lora.py,sha256=a5j_Yy0s95msVPFgOuH5PCe7sMu0AyZFQ5wL0H-YIg8,14913
|
|
61
62
|
sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
|
62
63
|
sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
|
63
64
|
sglang/srt/managers/data_parallel_controller.py,sha256=GJGfX1-5DoQFZ-EMh_p02nvrOtrOc0UebnULWHhFrss,5765
|
64
|
-
sglang/srt/managers/detokenizer_manager.py,sha256=
|
65
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=G2-Y-nDbq7LF8ZFWcXXcMkbCwzyBEh1g4UrciDlkNYY,7985
|
65
66
|
sglang/srt/managers/image_processor.py,sha256=9Y9RqyLdbt4uOK7pnJCJIhY77791klskSrEg8U6pyS4,6910
|
66
|
-
sglang/srt/managers/io_struct.py,sha256=
|
67
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
68
|
-
sglang/srt/managers/schedule_policy.py,sha256=
|
69
|
-
sglang/srt/managers/scheduler.py,sha256=
|
70
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
71
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
67
|
+
sglang/srt/managers/io_struct.py,sha256=QJ1Eu7XNsRH35ec5nUOUS2XSjiFWyjYHKsnMqviC_Mk,12298
|
68
|
+
sglang/srt/managers/schedule_batch.py,sha256=6LUXbAd6PvHIDk6iwpsufosahIT2z32JZjsNx5xeKcg,33930
|
69
|
+
sglang/srt/managers/schedule_policy.py,sha256=unDmK7Y_Ti0Eiizh3_iEFMsC1KDqGMTqU8MlQgg-6qo,11951
|
70
|
+
sglang/srt/managers/scheduler.py,sha256=Y7R-VkLt8Az2jZGrGRuhG1g4UPO5y-7b9BaOknuC2aI,43019
|
71
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=SprHC0Bs8kvtnYboDPrH587uO_sdKHyp7tVBCdbEB9c,25066
|
72
|
+
sglang/srt/managers/tp_worker.py,sha256=nwkIXiasGA4w97pnMG32U1DN1RlLFkvETvl9q7SjGeY,8887
|
72
73
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
|
73
|
-
sglang/srt/mem_cache/chunk_cache.py,sha256=
|
74
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
|
74
75
|
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
75
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=
|
76
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=
|
77
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
78
|
-
sglang/srt/model_executor/forward_batch_info.py,sha256=
|
79
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
80
|
-
sglang/srt/models/baichuan.py,sha256=
|
81
|
-
sglang/srt/models/chatglm.py,sha256=
|
82
|
-
sglang/srt/models/commandr.py,sha256=
|
83
|
-
sglang/srt/models/dbrx.py,sha256=
|
84
|
-
sglang/srt/models/deepseek.py,sha256=
|
85
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
86
|
-
sglang/srt/models/exaone.py,sha256=
|
87
|
-
sglang/srt/models/gemma.py,sha256=
|
88
|
-
sglang/srt/models/gemma2.py,sha256=
|
89
|
-
sglang/srt/models/gpt_bigcode.py,sha256=
|
90
|
-
sglang/srt/models/grok.py,sha256=
|
91
|
-
sglang/srt/models/internlm2.py,sha256
|
92
|
-
sglang/srt/models/llama.py,sha256=
|
93
|
-
sglang/srt/models/llama_classification.py,sha256=
|
76
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=ihVZXlJ_Fvs1L2c2SZQaijUYSn9X6eyiFiG2NNRQS_M,9297
|
77
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=cS6G5uOW_0QICH30PXxatetka4wnELfhP4czHn8RDJE,10414
|
78
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=KgSBvoF2IyCGDFNXQyN7sV3E_S2NndeMQyaQZB97Pak,10499
|
79
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=rSazAtkWKyc2g2QILT2-AsUdaBt51hQBU1qcS2iw_Nw,5690
|
80
|
+
sglang/srt/model_executor/model_runner.py,sha256=hn-VeLABZL4y_GcJFl2y_C7x6ZS_Xw0tDyKzOxYMtVQ,25983
|
81
|
+
sglang/srt/models/baichuan.py,sha256=uV20fr7SqlATxoziXRtJyXFnZZoWTUy3qvQNFaEvw8M,15014
|
82
|
+
sglang/srt/models/chatglm.py,sha256=uep4Wy_2jwn_x6Cvagt5rs3JRY_AlcM-VXvRTCFC5Tc,13172
|
83
|
+
sglang/srt/models/commandr.py,sha256=WIMwjV3C0pRbVs4Xv9tqnHGreRvWC7zsML2hNuXw4A0,14060
|
84
|
+
sglang/srt/models/dbrx.py,sha256=_DshXyXr_xVB7wtE28PFcb6KDIkA6gygkXYKqivSCFc,14554
|
85
|
+
sglang/srt/models/deepseek.py,sha256=W342tVpEpkc_fvO_DTP4fX3EGF-DIFC4QOySdUGzl9w,15837
|
86
|
+
sglang/srt/models/deepseek_v2.py,sha256=5P5678aaMT4iO4jS0dZWUiRG4o9EE98xVgs6Zjy-Mr0,28229
|
87
|
+
sglang/srt/models/exaone.py,sha256=bIXdAXoWlCdfDdX2q47Br3QOa3jEYiiP2Hdd1T4crnM,12993
|
88
|
+
sglang/srt/models/gemma.py,sha256=4MVHwc5Jc4CSg3HIdNJEBYk8mhspjuwvc_6Oi8Cd-g8,12202
|
89
|
+
sglang/srt/models/gemma2.py,sha256=3VL223T_3syBG3fUInbtFaXvIs7dYjtsfX3OfDQc7m4,14777
|
90
|
+
sglang/srt/models/gpt_bigcode.py,sha256=q9N13Js2v0VheudWssRoSjnptS6TSf7DOmC8zLRGxeo,10049
|
91
|
+
sglang/srt/models/grok.py,sha256=vc7-E_hemNKaNORxg4rmaQcVYlpoavyaAZUG9B2dgbY,14835
|
92
|
+
sglang/srt/models/internlm2.py,sha256=-liQB13sgR3GnXJacBSMuEbLa2N4tICx0LsNgu_nNvU,12108
|
93
|
+
sglang/srt/models/llama.py,sha256=a43Y5mvMDmFcRcPL78vsAElaOvTqPajLPB2_BDwJ7pM,15767
|
94
|
+
sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
|
94
95
|
sglang/srt/models/llama_embedding.py,sha256=4j3WNLB-x7XQnJvohdRs7VSSEabbhiE2BRHmnG5IZRU,3453
|
95
|
-
sglang/srt/models/llama_reward.py,sha256=
|
96
|
-
sglang/srt/models/llava.py,sha256=
|
97
|
-
sglang/srt/models/llavavid.py,sha256=
|
98
|
-
sglang/srt/models/minicpm.py,sha256=
|
99
|
-
sglang/srt/models/minicpm3.py,sha256
|
96
|
+
sglang/srt/models/llama_reward.py,sha256=ag3eVdP38iURj81fTCa-sC2jV_eCkTIjXUQf1I96fCI,5297
|
97
|
+
sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
|
98
|
+
sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
|
99
|
+
sglang/srt/models/minicpm.py,sha256=LpUdxKA27z79DSYAPPlfCgI4GEnWCYznhgSQl-QCsTY,13731
|
100
|
+
sglang/srt/models/minicpm3.py,sha256=-fLZ-RRbR2jLGSsatBWV-qsSNIZCPbS_jasmrOlUdK8,25023
|
100
101
|
sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
|
101
|
-
sglang/srt/models/mixtral.py,sha256=
|
102
|
-
sglang/srt/models/mixtral_quant.py,sha256=
|
103
|
-
sglang/srt/models/
|
104
|
-
sglang/srt/models/
|
105
|
-
sglang/srt/models/
|
106
|
-
sglang/srt/models/
|
107
|
-
sglang/srt/models/
|
108
|
-
sglang/srt/models/
|
109
|
-
sglang/srt/models/
|
110
|
-
sglang/srt/models/
|
111
|
-
sglang/srt/models/
|
112
|
-
sglang/srt/
|
113
|
-
sglang/srt/openai_api/
|
114
|
-
sglang/srt/
|
115
|
-
sglang/srt/sampling/
|
102
|
+
sglang/srt/models/mixtral.py,sha256=UUqzpOBXsObirmpJz4xstlG82uu4JfXsh-gWQmiKbW0,13803
|
103
|
+
sglang/srt/models/mixtral_quant.py,sha256=HPipVG_Gc5Ki0YXg49Rwn2_uvtCCI1IxlA7mVRVFivw,13978
|
104
|
+
sglang/srt/models/olmo.py,sha256=lD4VewXK0rVqhttGkOOzaxoqRQgVfV90s8ElStPBBdE,11896
|
105
|
+
sglang/srt/models/olmoe.py,sha256=3qHnY1DWBhyx9FWGJGb3a8kewcmEdYZOkYZ1JBx1LWs,15251
|
106
|
+
sglang/srt/models/qwen.py,sha256=mjGqo3NkTYfJ2qqztFw8mjKggPT2moW15nQgrq3GxWk,9860
|
107
|
+
sglang/srt/models/qwen2.py,sha256=I2ZzH9pVTZdjP1fHlq1qdG4JiWHt1CC6t1EK2gN5Ppc,12337
|
108
|
+
sglang/srt/models/qwen2_moe.py,sha256=BaNq8xgZKqjr_fcEBtH4yjBSc3-p4VztPiknVwllcQk,16984
|
109
|
+
sglang/srt/models/stablelm.py,sha256=0NWUVsYGhbc_X2eT9x38MaaUhZGmFtMgw_2PBv25Yxw,11265
|
110
|
+
sglang/srt/models/torch_native_llama.py,sha256=dtasdhwfRPE1eOcAIFUBsHrDnkjegXvo8WhGlqvXGKk,19154
|
111
|
+
sglang/srt/models/xverse.py,sha256=v4OaFdss9oD5YNzXsnjoXE9ffCkXL9U5o0OWLm1vHQQ,13573
|
112
|
+
sglang/srt/models/xverse_moe.py,sha256=A8EB82NpozoBplp7Qd8B_kY_3cL-UMydAxYIrhACVPE,15682
|
113
|
+
sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
|
114
|
+
sglang/srt/openai_api/adapter.py,sha256=WkYCKVaYTkFdLrySBhlkDyHJVaaHMF7KrhNnmw3L3us,53534
|
115
|
+
sglang/srt/openai_api/protocol.py,sha256=EZ6G209rBEDP7cepO2kAYqE8wMe1ksYdN7to1iT97Lw,10248
|
116
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=EAdep3I5qmbDDQJ0Ktrq0ySXJ6DCrTAjniEwFu4ZRqE,7679
|
117
|
+
sglang/srt/sampling/sampling_params.py,sha256=ZPHCQq7Bi4P_sxUzdKgYVXZpB_tC-kA7rlLwiW9Ct9A,5781
|
116
118
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
117
|
-
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=
|
119
|
+
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
|
118
120
|
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
|
119
121
|
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=XJZP0C4NFyXgcODbIWXxrgVEjmRgqLdZuVAtoN-LveY,3565
|
120
122
|
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
|
121
123
|
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
|
122
|
-
sglang/test/few_shot_gsm8k.py,sha256=
|
124
|
+
sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
|
125
|
+
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
123
126
|
sglang/test/run_eval.py,sha256=NWxeLWmInBgkCvC9Jr_QzF7GfAiBve3Gf1JQrEOlNlU,3899
|
124
127
|
sglang/test/runners.py,sha256=VCmtH08FsAq_JTAKfKo0zB4o-osNMAxxwe4aKcSxr4c,13515
|
125
128
|
sglang/test/simple_eval_common.py,sha256=r0G-9QLycs2ax3RMc44T_61fzMxlpTzv6pececC7lyY,12379
|
@@ -132,9 +135,9 @@ sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxo
|
|
132
135
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
133
136
|
sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
|
134
137
|
sglang/test/test_utils.py,sha256=NkJuezjmonjgC3_i_CTBd8KSqWh6W9CLcgoaqvTNK2U,18684
|
135
|
-
sglang/test/srt/sampling/penaltylib/utils.py,sha256
|
136
|
-
sglang-0.3.
|
137
|
-
sglang-0.3.
|
138
|
-
sglang-0.3.
|
139
|
-
sglang-0.3.
|
140
|
-
sglang-0.3.
|
138
|
+
sglang/test/srt/sampling/penaltylib/utils.py,sha256=Koe8GYoxIBUCz71of0oHhM5t5QcEd6a1IYq5SszRFAw,12730
|
139
|
+
sglang-0.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
140
|
+
sglang-0.3.4.dist-info/METADATA,sha256=rrkwX2teVdp79NEuOJfTBPUYCs_72LHIabuIesToPdI,40738
|
141
|
+
sglang-0.3.4.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
142
|
+
sglang-0.3.4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
143
|
+
sglang-0.3.4.dist-info/RECORD,,
|