sglang 0.2.11__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_latency.py +6 -4
- sglang/bench_serving.py +46 -22
- sglang/lang/compiler.py +2 -2
- sglang/lang/ir.py +3 -3
- sglang/srt/constrained/base_tool_cache.py +1 -1
- sglang/srt/constrained/fsm_cache.py +12 -2
- sglang/srt/layers/activation.py +33 -0
- sglang/srt/layers/{token_attention.py → decode_attention.py} +9 -5
- sglang/srt/layers/extend_attention.py +6 -1
- sglang/srt/layers/layernorm.py +65 -0
- sglang/srt/layers/logits_processor.py +5 -0
- sglang/srt/layers/pooler.py +50 -0
- sglang/srt/layers/{context_flashattention_nopad.py → prefill_attention.py} +5 -0
- sglang/srt/layers/radix_attention.py +2 -2
- sglang/srt/managers/detokenizer_manager.py +31 -9
- sglang/srt/managers/io_struct.py +63 -0
- sglang/srt/managers/policy_scheduler.py +173 -25
- sglang/srt/managers/schedule_batch.py +110 -87
- sglang/srt/managers/tokenizer_manager.py +193 -111
- sglang/srt/managers/tp_worker.py +289 -352
- sglang/srt/mem_cache/{base_cache.py → base_prefix_cache.py} +9 -4
- sglang/srt/mem_cache/chunk_cache.py +43 -20
- sglang/srt/mem_cache/memory_pool.py +2 -2
- sglang/srt/mem_cache/radix_cache.py +74 -40
- sglang/srt/model_executor/cuda_graph_runner.py +24 -9
- sglang/srt/model_executor/forward_batch_info.py +168 -105
- sglang/srt/model_executor/model_runner.py +24 -37
- sglang/srt/models/gemma2.py +0 -1
- sglang/srt/models/internlm2.py +2 -7
- sglang/srt/models/llama2.py +4 -4
- sglang/srt/models/llama_embedding.py +88 -0
- sglang/srt/models/qwen2_moe.py +0 -11
- sglang/srt/openai_api/adapter.py +155 -27
- sglang/srt/openai_api/protocol.py +37 -1
- sglang/srt/sampling/penaltylib/__init__.py +13 -0
- sglang/srt/sampling/penaltylib/orchestrator.py +357 -0
- sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +80 -0
- sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py +105 -0
- sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py +79 -0
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +83 -0
- sglang/srt/sampling_params.py +31 -4
- sglang/srt/server.py +69 -15
- sglang/srt/server_args.py +26 -19
- sglang/srt/utils.py +31 -13
- sglang/test/run_eval.py +10 -1
- sglang/test/runners.py +63 -63
- sglang/test/simple_eval_humaneval.py +2 -8
- sglang/test/simple_eval_mgsm.py +203 -0
- sglang/test/srt/sampling/penaltylib/utils.py +337 -0
- sglang/test/test_layernorm.py +60 -0
- sglang/test/test_programs.py +4 -2
- sglang/test/test_utils.py +20 -2
- sglang/utils.py +0 -1
- sglang/version.py +1 -1
- {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/METADATA +23 -14
- sglang-0.2.12.dist-info/RECORD +112 -0
- sglang/srt/layers/linear.py +0 -884
- sglang/srt/layers/quantization/__init__.py +0 -64
- sglang/srt/layers/quantization/fp8.py +0 -677
- sglang-0.2.11.dist-info/RECORD +0 -102
- {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/LICENSE +0 -0
- {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/WHEEL +0 -0
- {sglang-0.2.11.dist-info → sglang-0.2.12.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py
CHANGED
@@ -12,6 +12,8 @@ from typing import Callable, List, Optional
|
|
12
12
|
|
13
13
|
import numpy as np
|
14
14
|
import requests
|
15
|
+
import torch
|
16
|
+
import torch.nn.functional as F
|
15
17
|
|
16
18
|
from sglang.global_config import global_config
|
17
19
|
from sglang.lang.backend.openai import OpenAI
|
@@ -19,6 +21,7 @@ from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
|
19
21
|
from sglang.utils import get_exception_traceback
|
20
22
|
|
21
23
|
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
24
|
+
DEFAULT_URL_FOR_TEST = "http://127.0.0.1:8157"
|
22
25
|
|
23
26
|
|
24
27
|
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
@@ -396,6 +399,8 @@ def popen_launch_server(
|
|
396
399
|
timeout: float,
|
397
400
|
api_key: Optional[str] = None,
|
398
401
|
other_args: tuple = (),
|
402
|
+
env: Optional[dict] = None,
|
403
|
+
return_stdout_stderr: bool = False,
|
399
404
|
):
|
400
405
|
_, host, port = base_url.split(":")
|
401
406
|
host = host[2:]
|
@@ -415,7 +420,16 @@ def popen_launch_server(
|
|
415
420
|
if api_key:
|
416
421
|
command += ["--api-key", api_key]
|
417
422
|
|
418
|
-
|
423
|
+
if return_stdout_stderr:
|
424
|
+
process = subprocess.Popen(
|
425
|
+
command,
|
426
|
+
stdout=subprocess.PIPE,
|
427
|
+
stderr=subprocess.PIPE,
|
428
|
+
env=env,
|
429
|
+
text=True,
|
430
|
+
)
|
431
|
+
else:
|
432
|
+
process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
|
419
433
|
|
420
434
|
start_time = time.time()
|
421
435
|
while time.time() - start_time < timeout:
|
@@ -482,7 +496,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float):
|
|
482
496
|
p.terminate()
|
483
497
|
time.sleep(5)
|
484
498
|
print(
|
485
|
-
"\nTimeout after {timeout_per_file} seconds when running {filename}\n"
|
499
|
+
f"\nTimeout after {timeout_per_file} seconds when running {filename}\n"
|
486
500
|
)
|
487
501
|
return False
|
488
502
|
|
@@ -492,3 +506,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float):
|
|
492
506
|
print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
|
493
507
|
|
494
508
|
return 0 if success else -1
|
509
|
+
|
510
|
+
|
511
|
+
def get_similarities(vec1, vec2):
|
512
|
+
return F.cosine_similarity(torch.tensor(vec1), torch.tensor(vec2), dim=0)
|
sglang/utils.py
CHANGED
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.12"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.12
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -308,7 +308,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
|
|
308
308
|
### Method 2: From source
|
309
309
|
```
|
310
310
|
# Use the last release branch
|
311
|
-
git clone -b v0.2.
|
311
|
+
git clone -b v0.2.12 https://github.com/sgl-project/sglang.git
|
312
312
|
cd sglang
|
313
313
|
|
314
314
|
pip install --upgrade pip
|
@@ -392,23 +392,23 @@ print(response)
|
|
392
392
|
It supports streaming, vision, and most features of the Chat/Completions/Models/Batch endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/).
|
393
393
|
|
394
394
|
### Additional Server Arguments
|
395
|
-
- Add `--tp 2` to enable tensor parallelism. If it
|
395
|
+
- Add `--tp 2` to enable multi-GPU tensor parallelism. If it reports the error "peer access is not supported between these two devices", add `--enable-p2p-check` to the server launch command.
|
396
396
|
```
|
397
397
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --tp 2
|
398
398
|
```
|
399
|
-
- Add `--dp 2` to enable data parallelism. It can also be used together with
|
399
|
+
- Add `--dp 2` to enable multi-GPU data parallelism. It can also be used together with tensor parallelism. Data parallelism is better for throughput if there is enough memory.
|
400
400
|
```
|
401
401
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --dp 2 --tp 2
|
402
402
|
```
|
403
|
-
- If you see out-of-memory errors during serving,
|
403
|
+
- If you see out-of-memory errors during serving, try to reduce the memory usage of the KV cache pool by setting a smaller value of `--mem-fraction-static`. The default value is `0.9`.
|
404
404
|
```
|
405
405
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
|
406
406
|
```
|
407
|
-
-
|
407
|
+
- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
408
|
+
- If you see out-of-memory errors during prefill for long prompts, try to set a smaller chunked prefill size.
|
408
409
|
```
|
409
|
-
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3
|
410
|
+
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --chunked-prefill-size 4096
|
410
411
|
```
|
411
|
-
- See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
|
412
412
|
- Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
|
413
413
|
```
|
414
414
|
# Node 0
|
@@ -418,13 +418,13 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
418
418
|
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
|
419
419
|
```
|
420
420
|
- If the model does not have a template in the Hugging Face tokenizer, you can specify a [custom chat template](docs/en/custom_chat_template.md).
|
421
|
-
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
422
421
|
- To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
|
423
|
-
|
422
|
+
- To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
|
423
|
+
|
424
424
|
### Supported Models
|
425
425
|
|
426
426
|
- Llama / Llama 2 / Llama 3 / Llama 3.1
|
427
|
-
- Mistral / Mixtral
|
427
|
+
- Mistral / Mixtral / Mistral NeMo
|
428
428
|
- Gemma / Gemma 2
|
429
429
|
- Qwen / Qwen 2 / Qwen 2 MoE
|
430
430
|
- DeepSeek / DeepSeek 2
|
@@ -442,11 +442,20 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
|
|
442
442
|
- Grok
|
443
443
|
- ChatGLM
|
444
444
|
- InternLM 2
|
445
|
-
- Mistral NeMo
|
446
445
|
|
447
446
|
Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
|
448
447
|
|
449
|
-
|
448
|
+
#### Use Models From ModelScope
|
449
|
+
To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE.
|
450
|
+
```
|
451
|
+
export SGLANG_USE_MODELSCOPE=true
|
452
|
+
```
|
453
|
+
Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server
|
454
|
+
```
|
455
|
+
SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000
|
456
|
+
```
|
457
|
+
|
458
|
+
#### Run Llama 3.1 405B
|
450
459
|
|
451
460
|
```bash
|
452
461
|
## Run 405B (fp8) on a single node
|
@@ -474,7 +483,7 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
|
|
474
483
|
```
|
475
484
|
|
476
485
|
## Frontend: Structured Generation Language (SGLang)
|
477
|
-
The frontend language can be used with local models or API models.
|
486
|
+
The frontend language can be used with local models or API models. It is an alternative to the OpenAI API. You may found it easier to use for complex prompting workflow.
|
478
487
|
|
479
488
|
### Quick Start
|
480
489
|
The example below shows how to use sglang to answer a mulit-turn question.
|
@@ -0,0 +1,112 @@
|
|
1
|
+
sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
|
2
|
+
sglang/api.py,sha256=gAY9JhqWXjrYoWnMvR-iiuuY1YSN94We-lc1LH0z3cw,6030
|
3
|
+
sglang/bench_latency.py,sha256=E-cfuZSjBGonzKL0LgB0zAqMWpiP3qozB_Ht9dH8qvc,16207
|
4
|
+
sglang/bench_serving.py,sha256=sS-fawAyzngrOVbPE3N1FBxPojoPd9vj9XQDsWpIYTQ,35798
|
5
|
+
sglang/check_env.py,sha256=oU8VmjjPK2SviRhr41cF1953soBu-eTT5E0Hf04zMzo,4974
|
6
|
+
sglang/global_config.py,sha256=9JxaFkBKSgep6BVeEl_kx9tuW9PqdijYELyBGTryl6o,1704
|
7
|
+
sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
|
8
|
+
sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
|
9
|
+
sglang/utils.py,sha256=zFYGkC4vOUR3sTv1TmQXcsOLZDtDBR3wnjqnDp3xMIs,8352
|
10
|
+
sglang/version.py,sha256=X4KG3FscE5AhbGbcdDDgdDC550CVpxNMwdNLcx6EQ7M,23
|
11
|
+
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
|
13
|
+
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
14
|
+
sglang/lang/compiler.py,sha256=1Tc6MQs4RsIfrNmmO7PMSUEHIqvNqKOp_HxaYqonwFE,7533
|
15
|
+
sglang/lang/interpreter.py,sha256=3RIeSGdKlKTq2Ixg_Tyo0fGEDTvBKS2f9FaJYODBHzA,30102
|
16
|
+
sglang/lang/ir.py,sha256=Ow6jXDPIeRd1piAuYjvgyFxfro1G2_-1QwUFfq4Aihs,16842
|
17
|
+
sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
|
18
|
+
sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
|
20
|
+
sglang/lang/backend/base_backend.py,sha256=Q5HdiDtyBewQeoYH0kDtBRVL8KFiEPNq9dw7XmauHQ8,1985
|
21
|
+
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
22
|
+
sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
|
23
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=AaBc5yczchX7mkwiKDMyjLjBkJsh2Lubrfd9lvCOlDo,9544
|
24
|
+
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
25
|
+
sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
|
26
|
+
sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
|
27
|
+
sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
|
28
|
+
sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
|
29
|
+
sglang/srt/sampling_params.py,sha256=5V1MhhEvyCWZrCF5VmQxcKNuKVoC4LynY-q4Bx3P3mo,4876
|
30
|
+
sglang/srt/server.py,sha256=FvczPB9ojDVLIdC2kic0RLAmOTt0WZrql_BvYzwbeRY,18495
|
31
|
+
sglang/srt/server_args.py,sha256=GLuJkgwv-Osmf3IqCvZqfdqIBJjcHkdtoNT0_zq75Kc,16849
|
32
|
+
sglang/srt/utils.py,sha256=ReJqGMdquK_cfve269yjpWWQaozTVoEHSLG5P3CKvAg,24102
|
33
|
+
sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
|
34
|
+
sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
|
35
|
+
sglang/srt/constrained/fsm_cache.py,sha256=QTrBFoZCp2FeigtIakz2MCgQLtvQFXgl2lDPQaGtu9M,2784
|
36
|
+
sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
|
37
|
+
sglang/srt/layers/activation.py,sha256=MXkuGi5caKHEwqUegoEfOk2Omab8OLrxP-sjPj2TVzU,1197
|
38
|
+
sglang/srt/layers/decode_attention.py,sha256=Vgxd2rWzSZkNFp0bjZRAUAusG4bz6iy3D0CULnN-cdk,8904
|
39
|
+
sglang/srt/layers/extend_attention.py,sha256=_LOgzSr-1c2UweHZXADjWHbXOmd2JPm-tUMb1vwTTZI,14197
|
40
|
+
sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
|
41
|
+
sglang/srt/layers/layernorm.py,sha256=RzN4eESN9S8mw32r2Nxarq7wKFdeG1yhxPmehUMx79s,2073
|
42
|
+
sglang/srt/layers/logits_processor.py,sha256=iewPk7VR4jdJeLH6NAO_XqwqM4RhIHdWJzj7-qPRYIw,11362
|
43
|
+
sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
|
44
|
+
sglang/srt/layers/prefill_attention.py,sha256=y7vdcuX8lMa9Qf_jQYNDvQO9PVCBQSs3hb5LV2DFgpU,5256
|
45
|
+
sglang/srt/layers/radix_attention.py,sha256=LpfTizXKXm1oS5oUfh6aowZceHUHqnquvx-GpfyYjdk,7508
|
46
|
+
sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
|
47
|
+
sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
|
48
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=OXufjdCt2ebt-S7MDndjY9Ew16rP4fhualGgj6YEKp0,6295
|
49
|
+
sglang/srt/managers/io_struct.py,sha256=Xvfl6DNZ2Ek2S4qlRzpVo3foc-aC-1-N-5odcJ4gdq4,9446
|
50
|
+
sglang/srt/managers/policy_scheduler.py,sha256=KRFaZwjCAkPQDX3W8lbzrxYqgOe7LKFDj2BPlcmlnR8,8379
|
51
|
+
sglang/srt/managers/schedule_batch.py,sha256=iZ2OwdEn5As7cVGAoe0x97cMCPSS6q_SI_iG79mF8LQ,31111
|
52
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=TIIo4YlfdM10LE4JVqv2cO2uDJJtKXDagwzfjMCDU5Q,24858
|
53
|
+
sglang/srt/managers/tp_worker.py,sha256=qOx99QL6BIW0aOz7SknWqgflLeNeFYpJsGq0ZsYmYFY,32805
|
54
|
+
sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
|
55
|
+
sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
|
56
|
+
sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
|
57
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=eXDCstd5Mvu1CbHt1y9z27Eq60QYwW45FsKbZspu4yw,5310
|
58
|
+
sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
|
59
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=xQgTTtoMkvYJhYyRJHxPdybmPtfvcODqPLW9btUFt60,10003
|
60
|
+
sglang/srt/model_executor/forward_batch_info.py,sha256=B3flTlRNLMa7Km7use1O0Z2YL3-a6rw1BodNKjKV51g,11049
|
61
|
+
sglang/srt/model_executor/model_runner.py,sha256=ZlFgqBNuqgWpa-NrjkfTT-_amtea33H9M1tBl-MT_nk,16977
|
62
|
+
sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
|
63
|
+
sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
|
64
|
+
sglang/srt/models/chatglm.py,sha256=7bHU2AFoppINDZm0EdxgtAJe7rwr9OPkhOCfq2qNrIA,13862
|
65
|
+
sglang/srt/models/commandr.py,sha256=5BEtIS2uUQJANkkY-6ZeDqlrpUK5yXVYHiztU3vsTKY,14172
|
66
|
+
sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
|
67
|
+
sglang/srt/models/deepseek.py,sha256=E5W4nkH-Ne449rAIwQZgz-FAH2Qqp2r1vNfboyk5wEg,16024
|
68
|
+
sglang/srt/models/deepseek_v2.py,sha256=NMcckZb48kVUwAmDA2l8wO19T6DNkJOkKAhHa6utBZM,26968
|
69
|
+
sglang/srt/models/gemma.py,sha256=ilfN_NOcz7hpwEJ2y7NW3fBFmFO7YfjhdFDbfzl2qww,12285
|
70
|
+
sglang/srt/models/gemma2.py,sha256=ybQOXAPofw_Pv3mBer7dTpH4SlZt6Gf2I462Q3lOIww,16359
|
71
|
+
sglang/srt/models/gpt_bigcode.py,sha256=OKk9UP67as3T5bePlTRGHTCD-1wqaUEk92AowXPm6dg,10204
|
72
|
+
sglang/srt/models/grok.py,sha256=M9rtdXslqYBle5VyZqFVHiJUXq_q_aHbza63xa03zqI,27861
|
73
|
+
sglang/srt/models/internlm2.py,sha256=6j7JH0p3yib8GZDH8Cmrs-pgwfH3eOlAK6V3Cq64O7w,12202
|
74
|
+
sglang/srt/models/llama2.py,sha256=HmzE1I8OnesmrdPY5b56l7okhWH_lRvWAg16K-UwKHg,14300
|
75
|
+
sglang/srt/models/llama_classification.py,sha256=Dvzy3PfETiJtnKFOk8qDDLUoZECf_cpSrNeA60PaDo4,4932
|
76
|
+
sglang/srt/models/llama_embedding.py,sha256=e2lpZ6GHKrHT1rr7_5gHGoCpfqdOBMusZCz34n62lec,3542
|
77
|
+
sglang/srt/models/llava.py,sha256=-ysi192vpBDxNaMS8qaLOhC34lXQyRtbG_0niVaceSo,18436
|
78
|
+
sglang/srt/models/llavavid.py,sha256=MX7YpqYh5J4BoOnV7vVAIfoOlBFQXYpp8Kpe7WK0ejk,13562
|
79
|
+
sglang/srt/models/minicpm.py,sha256=ea_OyiwVTo6Tg9jNRAwqxETnA6FFeAqlIbiUS-xViEI,13843
|
80
|
+
sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
|
81
|
+
sglang/srt/models/mixtral.py,sha256=raSLbp6AfWg5_u-f-lYeRejE9koAjbHt8iIHXd3nURM,21397
|
82
|
+
sglang/srt/models/mixtral_quant.py,sha256=xYeeatZ9OfwCTas_KbH9nl6lnUT4YqSY7NAxpgLp5LE,14222
|
83
|
+
sglang/srt/models/qwen.py,sha256=43ea6gn4wHzAaI3JTDLtl08aEm0vIqgzbVH9M8oeuY0,10006
|
84
|
+
sglang/srt/models/qwen2.py,sha256=Hyhks2r4KHpKeb9iHZpnvEVc5klmnrPwcLohqg8j1kw,12284
|
85
|
+
sglang/srt/models/qwen2_moe.py,sha256=pTfBivDyzdbcP22_7PdmdPqgx34esH8J98r-EgFA9Uw,17747
|
86
|
+
sglang/srt/models/stablelm.py,sha256=yPrdzPEoUD2s_Q3RgOq7BBC7z-UtEaACzabqbDRs2tA,11368
|
87
|
+
sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
|
88
|
+
sglang/srt/openai_api/adapter.py,sha256=fgUAPAcQ_mUJszbpsI_cgv2vzOAS7AKKAJPi2B91aw4,42490
|
89
|
+
sglang/srt/openai_api/protocol.py,sha256=knf-nds0XO2LYg-hPM-Ho1f1y2XZIV_Gvg3xcCKLfgQ,9411
|
90
|
+
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
91
|
+
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
|
92
|
+
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
|
93
|
+
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=XJZP0C4NFyXgcODbIWXxrgVEjmRgqLdZuVAtoN-LveY,3565
|
94
|
+
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
|
95
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
|
96
|
+
sglang/test/run_eval.py,sha256=NWxeLWmInBgkCvC9Jr_QzF7GfAiBve3Gf1JQrEOlNlU,3899
|
97
|
+
sglang/test/runners.py,sha256=FYLbrWePfTacN5bsbAgMl5RiDI4g_Bsbwh1gXqRwr0Y,7794
|
98
|
+
sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
|
99
|
+
sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
|
100
|
+
sglang/test/simple_eval_humaneval.py,sha256=iCtN2LBL6j3nxMDjRJ--m0MCNPAwDo81gJ2whE-2Rt0,5674
|
101
|
+
sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
|
102
|
+
sglang/test/simple_eval_mgsm.py,sha256=wfbqJW9Rkc66vzq2fEMF6jchmoA8mw1OUiGU55cZ2B0,10261
|
103
|
+
sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
|
104
|
+
sglang/test/test_layernorm.py,sha256=VDdoeqGvebUa-l3rDiid6cC7wZq0Phpbm5fxxD0-cpg,1910
|
105
|
+
sglang/test/test_programs.py,sha256=vRhKIriZgSk_Zn8gGviIfiY_suOBA7Ni7P0NaQM2Esk,13894
|
106
|
+
sglang/test/test_utils.py,sha256=cO0ZbnfBS_MxyZ6MDyA7DrDVwu3umKRb3WP_dwggPng,14505
|
107
|
+
sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
|
108
|
+
sglang-0.2.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
109
|
+
sglang-0.2.12.dist-info/METADATA,sha256=k4QBFP1vyWHeXgCA9Npoz7Wb8qT9aC8rL7R1QP2J60g,34314
|
110
|
+
sglang-0.2.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
111
|
+
sglang-0.2.12.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
112
|
+
sglang-0.2.12.dist-info/RECORD,,
|