sglang 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_serving.py +3 -5
- sglang/lang/interpreter.py +2 -1
- sglang/lang/ir.py +0 -1
- sglang/srt/constrained/{base_cache.py → base_tool_cache.py} +2 -2
- sglang/srt/constrained/fsm_cache.py +2 -2
- sglang/srt/constrained/jump_forward.py +2 -2
- sglang/srt/layers/logits_processor.py +1 -1
- sglang/srt/managers/schedule_batch.py +29 -9
- sglang/srt/managers/tokenizer_manager.py +1 -0
- sglang/srt/managers/tp_worker.py +29 -6
- sglang/srt/mem_cache/base_cache.py +43 -0
- sglang/srt/mem_cache/chunk_cache.py +60 -0
- sglang/srt/mem_cache/radix_cache.py +5 -2
- sglang/srt/model_executor/model_runner.py +17 -2
- sglang/srt/models/llama2.py +5 -21
- sglang/srt/openai_api/adapter.py +76 -22
- sglang/srt/openai_api/protocol.py +20 -2
- sglang/srt/server.py +9 -14
- sglang/srt/server_args.py +18 -4
- sglang/srt/utils.py +20 -0
- sglang/test/run_eval.py +104 -0
- sglang/test/simple_eval_common.py +467 -0
- sglang/test/simple_eval_humaneval.py +139 -0
- sglang/test/simple_eval_mmlu.py +120 -0
- sglang/test/test_programs.py +12 -9
- sglang/test/test_utils.py +32 -0
- sglang/version.py +1 -1
- {sglang-0.2.7.dist-info → sglang-0.2.9.dist-info}/METADATA +4 -4
- {sglang-0.2.7.dist-info → sglang-0.2.9.dist-info}/RECORD +32 -28
- sglang/test/test_conversation.py +0 -46
- sglang/test/test_openai_protocol.py +0 -51
- {sglang-0.2.7.dist-info → sglang-0.2.9.dist-info}/LICENSE +0 -0
- {sglang-0.2.7.dist-info → sglang-0.2.9.dist-info}/WHEEL +0 -0
- {sglang-0.2.7.dist-info → sglang-0.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,120 @@
|
|
1
|
+
# Adapted from https://github.com/openai/simple-evals/
|
2
|
+
|
3
|
+
"""
|
4
|
+
Measuring Massive Multitask Language Understanding
|
5
|
+
Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, Jacob Steinhardt
|
6
|
+
https://arxiv.org/abs/2009.03300
|
7
|
+
"""
|
8
|
+
|
9
|
+
import random
|
10
|
+
import re
|
11
|
+
|
12
|
+
import pandas
|
13
|
+
|
14
|
+
from sglang.test import simple_eval_common as common
|
15
|
+
from sglang.test.simple_eval_common import (
|
16
|
+
ANSWER_PATTERN_MULTICHOICE,
|
17
|
+
HTML_JINJA,
|
18
|
+
Eval,
|
19
|
+
EvalResult,
|
20
|
+
SamplerBase,
|
21
|
+
SingleEvalResult,
|
22
|
+
format_multichoice_question,
|
23
|
+
)
|
24
|
+
|
25
|
+
subject2category = {
|
26
|
+
"abstract_algebra": "stem",
|
27
|
+
"anatomy": "other",
|
28
|
+
"astronomy": "stem",
|
29
|
+
"business_ethics": "other",
|
30
|
+
"clinical_knowledge": "other",
|
31
|
+
"college_biology": "stem",
|
32
|
+
"college_chemistry": "stem",
|
33
|
+
"college_computer_science": "stem",
|
34
|
+
"college_mathematics": "stem",
|
35
|
+
"college_medicine": "other",
|
36
|
+
"college_physics": "stem",
|
37
|
+
"computer_security": "stem",
|
38
|
+
"conceptual_physics": "stem",
|
39
|
+
"econometrics": "social_sciences",
|
40
|
+
"electrical_engineering": "stem",
|
41
|
+
"elementary_mathematics": "stem",
|
42
|
+
"formal_logic": "humanities",
|
43
|
+
"global_facts": "other",
|
44
|
+
"high_school_biology": "stem",
|
45
|
+
"high_school_chemistry": "stem",
|
46
|
+
"high_school_computer_science": "stem",
|
47
|
+
"high_school_european_history": "humanities",
|
48
|
+
"high_school_geography": "social_sciences",
|
49
|
+
"high_school_government_and_politics": "social_sciences",
|
50
|
+
"high_school_macroeconomics": "social_sciences",
|
51
|
+
"high_school_mathematics": "stem",
|
52
|
+
"high_school_microeconomics": "social_sciences",
|
53
|
+
"high_school_physics": "stem",
|
54
|
+
"high_school_psychology": "social_sciences",
|
55
|
+
"high_school_statistics": "stem",
|
56
|
+
"high_school_us_history": "humanities",
|
57
|
+
"high_school_world_history": "humanities",
|
58
|
+
"human_aging": "other",
|
59
|
+
"human_sexuality": "social_sciences",
|
60
|
+
"international_law": "humanities",
|
61
|
+
"jurisprudence": "humanities",
|
62
|
+
"logical_fallacies": "humanities",
|
63
|
+
"machine_learning": "stem",
|
64
|
+
"management": "other",
|
65
|
+
"marketing": "other",
|
66
|
+
"medical_genetics": "other",
|
67
|
+
"miscellaneous": "other",
|
68
|
+
"moral_disputes": "humanities",
|
69
|
+
"moral_scenarios": "humanities",
|
70
|
+
"nutrition": "other",
|
71
|
+
"philosophy": "humanities",
|
72
|
+
"prehistory": "humanities",
|
73
|
+
"professional_accounting": "other",
|
74
|
+
"professional_law": "humanities",
|
75
|
+
"professional_medicine": "other",
|
76
|
+
"professional_psychology": "social_sciences",
|
77
|
+
"public_relations": "social_sciences",
|
78
|
+
"security_studies": "social_sciences",
|
79
|
+
"sociology": "social_sciences",
|
80
|
+
"us_foreign_policy": "social_sciences",
|
81
|
+
"virology": "other",
|
82
|
+
"world_religions": "humanities",
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
class MMLUEval(Eval):
|
87
|
+
def __init__(self, filename: str, num_examples: int | None, num_threads: int):
|
88
|
+
df = pandas.read_csv(filename)
|
89
|
+
examples = [row.to_dict() for _, row in df.iterrows()]
|
90
|
+
if num_examples:
|
91
|
+
examples = random.Random(0).sample(examples, num_examples)
|
92
|
+
self.examples = examples
|
93
|
+
self.num_threads = num_threads
|
94
|
+
|
95
|
+
def __call__(self, sampler: SamplerBase) -> EvalResult:
|
96
|
+
def fn(row: dict):
|
97
|
+
prompt_messages = [
|
98
|
+
sampler._pack_message(
|
99
|
+
content=format_multichoice_question(row), role="user"
|
100
|
+
)
|
101
|
+
]
|
102
|
+
response_text = sampler(prompt_messages)
|
103
|
+
match = re.search(ANSWER_PATTERN_MULTICHOICE, response_text)
|
104
|
+
extracted_answer = match.group(1) if match else None
|
105
|
+
score = 1.0 if extracted_answer == row["Answer"] else 0.0
|
106
|
+
html = common.jinja_env.from_string(HTML_JINJA).render(
|
107
|
+
prompt_messages=prompt_messages,
|
108
|
+
next_message=dict(content=response_text, role="assistant"),
|
109
|
+
score=score,
|
110
|
+
correct_answer=row["Answer"],
|
111
|
+
extracted_answer=extracted_answer,
|
112
|
+
)
|
113
|
+
convo = prompt_messages + [dict(content=response_text, role="assistant")]
|
114
|
+
category = subject2category.get(row["Subject"], "other")
|
115
|
+
return SingleEvalResult(
|
116
|
+
html=html, score=score, metrics={category: score}, convo=convo
|
117
|
+
)
|
118
|
+
|
119
|
+
results = common.map_with_progress(fn, self.examples, self.num_threads)
|
120
|
+
return common.aggregate_results(results)
|
sglang/test/test_programs.py
CHANGED
@@ -105,23 +105,21 @@ def test_decode_json_regex():
|
|
105
105
|
def decode_json(s):
|
106
106
|
from sglang.lang.ir import REGEX_FLOAT, REGEX_INT, REGEX_STRING
|
107
107
|
|
108
|
-
s += "Generate a JSON object to describe the basic information of
|
108
|
+
s += "Generate a JSON object to describe the basic city information of Paris.\n"
|
109
109
|
|
110
110
|
with s.var_scope("json_output"):
|
111
111
|
s += "{\n"
|
112
112
|
s += ' "name": ' + sgl.gen(regex=REGEX_STRING + ",") + "\n"
|
113
113
|
s += ' "population": ' + sgl.gen(regex=REGEX_INT + ",") + "\n"
|
114
114
|
s += ' "area": ' + sgl.gen(regex=REGEX_INT + ",") + "\n"
|
115
|
-
s += ' "latitude": ' + sgl.gen(regex=REGEX_FLOAT
|
116
|
-
s += ' "country": ' + sgl.gen(regex=REGEX_STRING + ",") + "\n"
|
117
|
-
s += ' "timezone": ' + sgl.gen(regex=REGEX_STRING) + "\n"
|
115
|
+
s += ' "latitude": ' + sgl.gen(regex=REGEX_FLOAT) + "\n"
|
118
116
|
s += "}"
|
119
117
|
|
120
|
-
ret = decode_json.run()
|
118
|
+
ret = decode_json.run(temperature=0.0)
|
121
119
|
try:
|
122
120
|
js_obj = json.loads(ret["json_output"])
|
123
121
|
except json.decoder.JSONDecodeError:
|
124
|
-
print(ret["json_output"])
|
122
|
+
print("JSONDecodeError", ret["json_output"])
|
125
123
|
raise
|
126
124
|
assert isinstance(js_obj["name"], str)
|
127
125
|
assert isinstance(js_obj["population"], int)
|
@@ -130,7 +128,7 @@ def test_decode_json_regex():
|
|
130
128
|
def test_decode_json():
|
131
129
|
@sgl.function
|
132
130
|
def decode_json(s):
|
133
|
-
s += "Generate a JSON object to describe the basic information of
|
131
|
+
s += "Generate a JSON object to describe the basic city information of Paris.\n"
|
134
132
|
|
135
133
|
with s.var_scope("json_output"):
|
136
134
|
s += "{\n"
|
@@ -141,8 +139,12 @@ def test_decode_json():
|
|
141
139
|
s += ' "timezone": ' + sgl.gen(dtype=str) + "\n"
|
142
140
|
s += "}"
|
143
141
|
|
144
|
-
ret = decode_json.run()
|
145
|
-
|
142
|
+
ret = decode_json.run(max_new_tokens=64)
|
143
|
+
try:
|
144
|
+
js_obj = json.loads(ret["json_output"])
|
145
|
+
except json.decoder.JSONDecodeError:
|
146
|
+
print("JSONDecodeError", ret["json_output"])
|
147
|
+
raise
|
146
148
|
assert isinstance(js_obj["name"], str)
|
147
149
|
assert isinstance(js_obj["population"], int)
|
148
150
|
|
@@ -261,6 +263,7 @@ def test_parallel_decoding():
|
|
261
263
|
s += "\nIn summary," + sgl.gen("summary", max_tokens=512)
|
262
264
|
|
263
265
|
ret = parallel_decoding.run(topic="writing a good blog post", temperature=0.3)
|
266
|
+
assert isinstance(ret["summary"], str)
|
264
267
|
|
265
268
|
|
266
269
|
def test_parallel_encoding(check_answer=True):
|
sglang/test/test_utils.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
"""Common utilities for testing and benchmarking"""
|
2
2
|
|
3
3
|
import asyncio
|
4
|
+
import subprocess
|
5
|
+
import time
|
4
6
|
from functools import partial
|
5
7
|
|
6
8
|
import numpy as np
|
@@ -11,6 +13,8 @@ from sglang.lang.backend.openai import OpenAI
|
|
11
13
|
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
12
14
|
from sglang.utils import get_exception_traceback
|
13
15
|
|
16
|
+
MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
17
|
+
|
14
18
|
|
15
19
|
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
16
20
|
assert url is not None
|
@@ -379,3 +383,31 @@ def get_call_select(args):
|
|
379
383
|
raise
|
380
384
|
|
381
385
|
return func
|
386
|
+
|
387
|
+
|
388
|
+
def popen_launch_server(model, port, timeout, *args):
|
389
|
+
command = [
|
390
|
+
"python3",
|
391
|
+
"-m",
|
392
|
+
"sglang.launch_server",
|
393
|
+
"--model-path",
|
394
|
+
model,
|
395
|
+
"--host",
|
396
|
+
"localhost",
|
397
|
+
"--port",
|
398
|
+
str(port),
|
399
|
+
*args,
|
400
|
+
]
|
401
|
+
process = subprocess.Popen(command, stdout=None, stderr=None)
|
402
|
+
base_url = f"http://localhost:{port}/v1"
|
403
|
+
|
404
|
+
start_time = time.time()
|
405
|
+
while time.time() - start_time < timeout:
|
406
|
+
try:
|
407
|
+
response = requests.get(f"{base_url}/models")
|
408
|
+
if response.status_code == 200:
|
409
|
+
return process
|
410
|
+
except requests.RequestException:
|
411
|
+
pass
|
412
|
+
time.sleep(10)
|
413
|
+
raise TimeoutError("Server failed to start within the timeout period.")
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.9"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -299,8 +299,8 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
|
|
299
299
|
|
300
300
|
### Method 2: From source
|
301
301
|
```
|
302
|
-
# Use the stable
|
303
|
-
git clone -b
|
302
|
+
# Use the stable v0.2.9 branch
|
303
|
+
git clone -b v0.2.9 https://github.com/sgl-project/sglang.git
|
304
304
|
cd sglang
|
305
305
|
|
306
306
|
pip install --upgrade pip
|
@@ -312,7 +312,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
|
|
312
312
|
|
313
313
|
### Method 3: Using docker
|
314
314
|
The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags), built from [Dockerfile](docker).
|
315
|
-
|
315
|
+
Replace `<secret>` below with your huggingface hub [token](https://huggingface.co/docs/hub/en/security-tokens).
|
316
316
|
|
317
317
|
```bash
|
318
318
|
docker run --gpus all \
|
@@ -1,18 +1,18 @@
|
|
1
1
|
sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
|
2
2
|
sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
|
3
3
|
sglang/bench_latency.py,sha256=JPatRvstM3nXb-ViVgtR-TaRrFHpcHzqoDG7BQmRYK8,10539
|
4
|
-
sglang/bench_serving.py,sha256=
|
4
|
+
sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
|
5
5
|
sglang/check_env.py,sha256=Eeb_20VetnlEFYSRcHFlNqt85lYUQN60NEtkoX7ahPA,4121
|
6
6
|
sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
|
7
7
|
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
8
|
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
9
|
sglang/utils.py,sha256=r0Z7hY_bFFk-b6WeQJir9br-hCW2-p7n5E7Et2WziaQ,8776
|
10
|
-
sglang/version.py,sha256=
|
10
|
+
sglang/version.py,sha256=F8OVhAhMXSkvvXYgZtbPn2SG1AQC3joK4yu-FrHt81Y,22
|
11
11
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
13
|
sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
|
14
|
-
sglang/lang/interpreter.py,sha256=
|
15
|
-
sglang/lang/ir.py,sha256=
|
14
|
+
sglang/lang/interpreter.py,sha256=_MbvYB0vweCgALklpM2DlofiCXuITCmX_fl8rPPcp5U,30340
|
15
|
+
sglang/lang/ir.py,sha256=0r-mhA4aO-uuS97Dvkw99ERTcJXfzuV6jJQMmuCwHEg,16615
|
16
16
|
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
17
17
|
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
18
|
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
@@ -26,18 +26,18 @@ sglang/srt/hf_transformers_utils.py,sha256=Fg-3panb6lsqOhHmAYA0ivkXyBjdnvY5mqvil
|
|
26
26
|
sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
|
27
27
|
sglang/srt/model_config.py,sha256=DO7m84WiT3dzPWmyKz_UXDAHEdqEjq8Lq5wCjzjYMME,6023
|
28
28
|
sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
|
29
|
-
sglang/srt/server.py,sha256=
|
30
|
-
sglang/srt/server_args.py,sha256=
|
31
|
-
sglang/srt/utils.py,sha256=
|
29
|
+
sglang/srt/server.py,sha256=cDHUmLqj7MjF-3L9WcfA-4z9dRl55cwF5ygXuncMl-Q,15852
|
30
|
+
sglang/srt/server_args.py,sha256=wdRlxR-509RfNYuMQoxUAefMwoc5eme6sYwEMyRBHmk,16034
|
31
|
+
sglang/srt/utils.py,sha256=5wgGe6kI59JAmf8kxLsItulJ4xQaOJHHYaWWd6_WWmo,23384
|
32
32
|
sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
|
33
|
-
sglang/srt/constrained/
|
34
|
-
sglang/srt/constrained/fsm_cache.py,sha256=
|
35
|
-
sglang/srt/constrained/jump_forward.py,sha256=
|
33
|
+
sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
|
34
|
+
sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
|
35
|
+
sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
|
36
36
|
sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
|
37
37
|
sglang/srt/layers/extend_attention.py,sha256=zuNnAdL_wF6BX0Mwn1dgDJvh3YJjYwqa5Fbzp8muOVc,12573
|
38
38
|
sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
|
39
39
|
sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
|
40
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
40
|
+
sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
|
41
41
|
sglang/srt/layers/radix_attention.py,sha256=tdA-kdd9LQY1wbw3iYuy-9cikVJYmy3EctwAlUfN-Uo,6945
|
42
42
|
sglang/srt/layers/token_attention.py,sha256=ylUqUnozJCCohxTGAiiP3sxgUrcXfEVic8-qgcHYDj4,7968
|
43
43
|
sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
|
@@ -47,14 +47,16 @@ sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk
|
|
47
47
|
sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
|
48
48
|
sglang/srt/managers/io_struct.py,sha256=Rz7Ur9Yw6prDGdy6XjsSiUmVBccS6cef-G_9TW7HA_4,7105
|
49
49
|
sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
|
50
|
-
sglang/srt/managers/schedule_batch.py,sha256=
|
51
|
-
sglang/srt/managers/tokenizer_manager.py,sha256=
|
52
|
-
sglang/srt/managers/tp_worker.py,sha256=
|
50
|
+
sglang/srt/managers/schedule_batch.py,sha256=LIoVCPNivh0u1dOrrWRgFD6a4ywq3nrG_4dNgCK0kIw,37697
|
51
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=rtZ44aiZOMHLHkXDhMgj0HDR3gExpeGjWfoCD0PfG_o,20574
|
52
|
+
sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
|
53
|
+
sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
|
54
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
|
53
55
|
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
54
56
|
sglang/srt/mem_cache/memory_pool.py,sha256=wkhjyYLbAZrl2FB5i4ODkxgMufBuDpe4N0kbXhu6ZO0,4509
|
55
|
-
sglang/srt/mem_cache/radix_cache.py,sha256=
|
57
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
|
56
58
|
sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
|
57
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
59
|
+
sglang/srt/model_executor/model_runner.py,sha256=fo3fbnNaHkcHz2UDkyvFjU7sGvdClhmhdelQh0n9PgA,16079
|
58
60
|
sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
|
59
61
|
sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
|
60
62
|
sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
|
@@ -67,7 +69,7 @@ sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,1
|
|
67
69
|
sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
|
68
70
|
sglang/srt/models/grok.py,sha256=NfZdsRVErDIUWFqjhtNf2pqC9G4cRdYHBFpgDq1IZ2A,27855
|
69
71
|
sglang/srt/models/internlm2.py,sha256=Ld2GUxZeqqqJ2vd4QiX2s1y2AceJLA1nVnUYY88GMQk,12219
|
70
|
-
sglang/srt/models/llama2.py,sha256=
|
72
|
+
sglang/srt/models/llama2.py,sha256=zfOk3OK1_B6s6yuXsZFmNCf07RsfytVD72GunLBt8Cc,14282
|
71
73
|
sglang/srt/models/llama_classification.py,sha256=4r_orFZqBR3U_yC4bus1K3Z3-ADscYGSzgA82_VDN0g,4926
|
72
74
|
sglang/srt/models/llava.py,sha256=BJphgyQGdo7uTpJcKGEfWwdpH9GTMDnyiznLSSgmvm8,18476
|
73
75
|
sglang/srt/models/llavavid.py,sha256=-7vaVqaIfukCvMkNakEPblpwjIHC6ezrAvmpE5RzlUY,13602
|
@@ -80,14 +82,16 @@ sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12
|
|
80
82
|
sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
|
81
83
|
sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
|
82
84
|
sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
|
83
|
-
sglang/srt/openai_api/adapter.py,sha256=
|
84
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
85
|
-
sglang/test/
|
86
|
-
sglang/test/
|
87
|
-
sglang/test/
|
88
|
-
sglang/test/
|
89
|
-
sglang
|
90
|
-
sglang
|
91
|
-
sglang-0.2.
|
92
|
-
sglang-0.2.
|
93
|
-
sglang-0.2.
|
85
|
+
sglang/srt/openai_api/adapter.py,sha256=h6TIU0Fu3jU361pye4J12vcDug7UJJRPiBAY_HfFUuE,32599
|
86
|
+
sglang/srt/openai_api/protocol.py,sha256=JXLnnQ63I-bJv93ICPfP0cBpyomQA5IYE_mkUg5X4Es,8177
|
87
|
+
sglang/test/run_eval.py,sha256=WvMLSi70G9fhruP8cPLOfDJ9XEKL7yNn2pylx-7tNsQ,3054
|
88
|
+
sglang/test/simple_eval_common.py,sha256=Qh1-iEXJCKfJmgpAzNSp28fcP1TUJzt3s9i1FjvemHY,12340
|
89
|
+
sglang/test/simple_eval_humaneval.py,sha256=IW0ZC6D4SXu06IJiMoAY9DK9SMsTOlDPAwu4cfbJco0,5826
|
90
|
+
sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
|
91
|
+
sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
|
92
|
+
sglang/test/test_utils.py,sha256=PndOL1zdseMrpHTHGmgsHHepxqYBn__eNLrlsSXLy6k,11905
|
93
|
+
sglang-0.2.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
94
|
+
sglang-0.2.9.dist-info/METADATA,sha256=8vhH67MeR6EdJepUSvmqKSneJTQ8l_9LD9L6FfzyrHk,33214
|
95
|
+
sglang-0.2.9.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
96
|
+
sglang-0.2.9.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
97
|
+
sglang-0.2.9.dist-info/RECORD,,
|
sglang/test/test_conversation.py
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
from sglang.srt.conversation import generate_chat_conv
|
2
|
-
from sglang.srt.managers.openai_api.protocol import (
|
3
|
-
ChatCompletionMessageContentImagePart,
|
4
|
-
ChatCompletionMessageContentImageURL,
|
5
|
-
ChatCompletionMessageContentTextPart,
|
6
|
-
ChatCompletionMessageGenericParam,
|
7
|
-
ChatCompletionMessageUserParam,
|
8
|
-
ChatCompletionRequest,
|
9
|
-
)
|
10
|
-
|
11
|
-
|
12
|
-
def test_chat_completion_to_conv_image():
|
13
|
-
"""Test that we can convert a chat image request to a convo"""
|
14
|
-
request = ChatCompletionRequest(
|
15
|
-
model="default",
|
16
|
-
messages=[
|
17
|
-
ChatCompletionMessageGenericParam(
|
18
|
-
role="system", content="You are a helpful AI assistant"
|
19
|
-
),
|
20
|
-
ChatCompletionMessageUserParam(
|
21
|
-
role="user",
|
22
|
-
content=[
|
23
|
-
ChatCompletionMessageContentTextPart(
|
24
|
-
type="text", text="Describe this image"
|
25
|
-
),
|
26
|
-
ChatCompletionMessageContentImagePart(
|
27
|
-
type="image_url",
|
28
|
-
image_url=ChatCompletionMessageContentImageURL(
|
29
|
-
url="https://someurl.com"
|
30
|
-
),
|
31
|
-
),
|
32
|
-
],
|
33
|
-
),
|
34
|
-
],
|
35
|
-
)
|
36
|
-
conv = generate_chat_conv(request, "vicuna_v1.1")
|
37
|
-
assert conv.messages == [
|
38
|
-
["USER", "Describe this image<image>"],
|
39
|
-
["ASSISTANT", None],
|
40
|
-
]
|
41
|
-
assert conv.system_message == "You are a helpful AI assistant"
|
42
|
-
assert conv.image_data == ["https://someurl.com"]
|
43
|
-
|
44
|
-
|
45
|
-
if __name__ == "__main__":
|
46
|
-
test_chat_completion_to_conv_image()
|
@@ -1,51 +0,0 @@
|
|
1
|
-
from sglang.srt.managers.openai_api.protocol import (
|
2
|
-
ChatCompletionMessageContentImagePart,
|
3
|
-
ChatCompletionMessageContentImageURL,
|
4
|
-
ChatCompletionMessageContentTextPart,
|
5
|
-
ChatCompletionMessageGenericParam,
|
6
|
-
ChatCompletionMessageUserParam,
|
7
|
-
ChatCompletionRequest,
|
8
|
-
)
|
9
|
-
|
10
|
-
|
11
|
-
def test_chat_completion_request_image():
|
12
|
-
"""Test that Chat Completion Requests with images can be converted."""
|
13
|
-
|
14
|
-
image_request = {
|
15
|
-
"model": "default",
|
16
|
-
"messages": [
|
17
|
-
{"role": "system", "content": "You are a helpful AI assistant"},
|
18
|
-
{
|
19
|
-
"role": "user",
|
20
|
-
"content": [
|
21
|
-
{"type": "text", "text": "Describe this image"},
|
22
|
-
{"type": "image_url", "image_url": {"url": "https://someurl.com"}},
|
23
|
-
],
|
24
|
-
},
|
25
|
-
],
|
26
|
-
"temperature": 0,
|
27
|
-
"max_tokens": 64,
|
28
|
-
}
|
29
|
-
request = ChatCompletionRequest(**image_request)
|
30
|
-
assert len(request.messages) == 2
|
31
|
-
assert request.messages[0] == ChatCompletionMessageGenericParam(
|
32
|
-
role="system", content="You are a helpful AI assistant"
|
33
|
-
)
|
34
|
-
assert request.messages[1] == ChatCompletionMessageUserParam(
|
35
|
-
role="user",
|
36
|
-
content=[
|
37
|
-
ChatCompletionMessageContentTextPart(
|
38
|
-
type="text", text="Describe this image"
|
39
|
-
),
|
40
|
-
ChatCompletionMessageContentImagePart(
|
41
|
-
type="image_url",
|
42
|
-
image_url=ChatCompletionMessageContentImageURL(
|
43
|
-
url="https://someurl.com"
|
44
|
-
),
|
45
|
-
),
|
46
|
-
],
|
47
|
-
)
|
48
|
-
|
49
|
-
|
50
|
-
if __name__ == "__main__":
|
51
|
-
test_chat_completion_request_image()
|
File without changes
|
File without changes
|
File without changes
|