sglang 0.4.1.post5__py3-none-any.whl → 0.4.1.post7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/__init__.py +21 -23
- sglang/api.py +2 -7
- sglang/bench_offline_throughput.py +24 -16
- sglang/bench_one_batch.py +51 -3
- sglang/bench_one_batch_server.py +1 -1
- sglang/bench_serving.py +37 -28
- sglang/lang/backend/runtime_endpoint.py +183 -4
- sglang/lang/chat_template.py +15 -4
- sglang/launch_server.py +1 -1
- sglang/srt/_custom_ops.py +80 -42
- sglang/srt/configs/device_config.py +1 -1
- sglang/srt/configs/model_config.py +16 -6
- sglang/srt/constrained/base_grammar_backend.py +21 -0
- sglang/srt/constrained/xgrammar_backend.py +8 -4
- sglang/srt/conversation.py +14 -1
- sglang/srt/distributed/__init__.py +3 -3
- sglang/srt/distributed/communication_op.py +2 -1
- sglang/srt/distributed/device_communicators/cuda_wrapper.py +2 -1
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +107 -40
- sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +2 -2
- sglang/srt/distributed/device_communicators/hpu_communicator.py +2 -1
- sglang/srt/distributed/device_communicators/pynccl.py +80 -1
- sglang/srt/distributed/device_communicators/pynccl_wrapper.py +112 -2
- sglang/srt/distributed/device_communicators/shm_broadcast.py +5 -72
- sglang/srt/distributed/device_communicators/xpu_communicator.py +2 -1
- sglang/srt/distributed/parallel_state.py +1 -1
- sglang/srt/distributed/utils.py +2 -1
- sglang/srt/entrypoints/engine.py +449 -0
- sglang/srt/entrypoints/http_server.py +579 -0
- sglang/srt/layers/activation.py +3 -3
- sglang/srt/layers/attention/flashinfer_backend.py +27 -12
- sglang/srt/layers/attention/triton_backend.py +4 -6
- sglang/srt/layers/attention/vision.py +204 -0
- sglang/srt/layers/dp_attention.py +69 -0
- sglang/srt/layers/linear.py +76 -102
- sglang/srt/layers/logits_processor.py +48 -63
- sglang/srt/layers/moe/ep_moe/layer.py +4 -4
- sglang/srt/layers/moe/fused_moe_native.py +69 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +9 -6
- sglang/srt/layers/moe/fused_moe_triton/layer.py +66 -14
- sglang/srt/layers/moe/topk.py +4 -2
- sglang/srt/layers/parameter.py +26 -17
- sglang/srt/layers/quantization/__init__.py +22 -23
- sglang/srt/layers/quantization/fp8.py +112 -55
- sglang/srt/layers/quantization/fp8_utils.py +1 -1
- sglang/srt/layers/quantization/int8_kernel.py +54 -0
- sglang/srt/layers/quantization/modelopt_quant.py +2 -3
- sglang/srt/layers/quantization/w8a8_int8.py +117 -0
- sglang/srt/layers/radix_attention.py +2 -0
- sglang/srt/layers/rotary_embedding.py +1179 -31
- sglang/srt/layers/sampler.py +39 -1
- sglang/srt/layers/vocab_parallel_embedding.py +17 -4
- sglang/srt/lora/lora.py +1 -9
- sglang/srt/managers/configure_logging.py +46 -0
- sglang/srt/managers/data_parallel_controller.py +79 -72
- sglang/srt/managers/detokenizer_manager.py +23 -8
- sglang/srt/managers/image_processor.py +158 -2
- sglang/srt/managers/io_struct.py +54 -15
- sglang/srt/managers/schedule_batch.py +49 -22
- sglang/srt/managers/schedule_policy.py +26 -12
- sglang/srt/managers/scheduler.py +319 -181
- sglang/srt/managers/session_controller.py +1 -0
- sglang/srt/managers/tokenizer_manager.py +303 -158
- sglang/srt/managers/tp_worker.py +6 -4
- sglang/srt/managers/tp_worker_overlap_thread.py +5 -8
- sglang/srt/managers/utils.py +44 -0
- sglang/srt/mem_cache/memory_pool.py +110 -77
- sglang/srt/metrics/collector.py +25 -11
- sglang/srt/model_executor/cuda_graph_runner.py +4 -6
- sglang/srt/model_executor/model_runner.py +80 -21
- sglang/srt/model_loader/loader.py +8 -6
- sglang/srt/model_loader/weight_utils.py +55 -2
- sglang/srt/models/baichuan.py +6 -6
- sglang/srt/models/chatglm.py +2 -2
- sglang/srt/models/commandr.py +3 -3
- sglang/srt/models/dbrx.py +4 -4
- sglang/srt/models/deepseek.py +3 -3
- sglang/srt/models/deepseek_v2.py +8 -8
- sglang/srt/models/exaone.py +2 -2
- sglang/srt/models/gemma.py +2 -2
- sglang/srt/models/gemma2.py +6 -24
- sglang/srt/models/gpt2.py +3 -5
- sglang/srt/models/gpt_bigcode.py +1 -1
- sglang/srt/models/granite.py +2 -2
- sglang/srt/models/grok.py +3 -3
- sglang/srt/models/internlm2.py +2 -2
- sglang/srt/models/llama.py +41 -4
- sglang/srt/models/minicpm.py +2 -2
- sglang/srt/models/minicpm3.py +6 -6
- sglang/srt/models/minicpmv.py +1238 -0
- sglang/srt/models/mixtral.py +3 -3
- sglang/srt/models/mixtral_quant.py +3 -3
- sglang/srt/models/mllama.py +2 -2
- sglang/srt/models/olmo.py +3 -3
- sglang/srt/models/olmo2.py +4 -4
- sglang/srt/models/olmoe.py +7 -13
- sglang/srt/models/phi3_small.py +2 -2
- sglang/srt/models/qwen.py +2 -2
- sglang/srt/models/qwen2.py +52 -4
- sglang/srt/models/qwen2_eagle.py +131 -0
- sglang/srt/models/qwen2_moe.py +3 -3
- sglang/srt/models/qwen2_vl.py +22 -122
- sglang/srt/models/stablelm.py +2 -2
- sglang/srt/models/torch_native_llama.py +3 -3
- sglang/srt/models/xverse.py +6 -6
- sglang/srt/models/xverse_moe.py +6 -6
- sglang/srt/openai_api/protocol.py +2 -0
- sglang/srt/sampling/custom_logit_processor.py +38 -0
- sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py +15 -5
- sglang/srt/sampling/sampling_batch_info.py +153 -9
- sglang/srt/sampling/sampling_params.py +4 -2
- sglang/srt/server.py +4 -1037
- sglang/srt/server_args.py +84 -32
- sglang/srt/speculative/eagle_worker.py +1 -0
- sglang/srt/torch_memory_saver_adapter.py +59 -0
- sglang/srt/utils.py +130 -63
- sglang/test/runners.py +8 -13
- sglang/test/test_programs.py +1 -1
- sglang/test/test_utils.py +3 -1
- sglang/utils.py +12 -2
- sglang/version.py +1 -1
- {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post7.dist-info}/METADATA +26 -13
- {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post7.dist-info}/RECORD +126 -117
- sglang/launch_server_llavavid.py +0 -25
- sglang/srt/constrained/__init__.py +0 -16
- sglang/srt/distributed/device_communicators/__init__.py +0 -0
- {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post7.dist-info}/LICENSE +0 -0
- {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post7.dist-info}/WHEEL +0 -0
- {sglang-0.4.1.post5.dist-info → sglang-0.4.1.post7.dist-info}/top_level.txt +0 -0
sglang/test/test_programs.py
CHANGED
@@ -535,7 +535,7 @@ def test_hellaswag_select():
|
|
535
535
|
|
536
536
|
# Compute accuracy
|
537
537
|
accuracy_gen = np.mean(np.array(preds_gen) == np.array(labels))
|
538
|
-
assert np.abs(accuracy_gen - accuracy) < 0.
|
538
|
+
assert np.abs(accuracy_gen - accuracy) < 0.05
|
539
539
|
assert np.abs(latency_gen - latency) < 1
|
540
540
|
|
541
541
|
return accuracy, latency
|
sglang/test/test_utils.py
CHANGED
@@ -40,6 +40,7 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Llama-3.1-70B-Instruct,mis
|
|
40
40
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8"
|
41
41
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
|
42
42
|
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
43
|
+
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
|
43
44
|
|
44
45
|
|
45
46
|
def is_in_ci():
|
@@ -405,7 +406,7 @@ def popen_launch_server(
|
|
405
406
|
base_url: str,
|
406
407
|
timeout: float,
|
407
408
|
api_key: Optional[str] = None,
|
408
|
-
other_args:
|
409
|
+
other_args: list[str] = (),
|
409
410
|
env: Optional[dict] = None,
|
410
411
|
return_stdout_stderr: Optional[tuple] = None,
|
411
412
|
):
|
@@ -560,6 +561,7 @@ def run_bench_serving(
|
|
560
561
|
tokenizer=tokenizer,
|
561
562
|
num_prompts=num_prompts,
|
562
563
|
sharegpt_output_len=None,
|
564
|
+
sharegpt_context_len=None,
|
563
565
|
random_input_len=random_input_len,
|
564
566
|
random_output_len=random_output_len,
|
565
567
|
random_range_ratio=0.0,
|
sglang/utils.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Common utilities"""
|
2
2
|
|
3
3
|
import base64
|
4
|
-
import gc
|
5
4
|
import importlib
|
6
5
|
import json
|
7
6
|
import logging
|
@@ -15,7 +14,7 @@ import urllib.request
|
|
15
14
|
from concurrent.futures import ThreadPoolExecutor
|
16
15
|
from io import BytesIO
|
17
16
|
from json import dumps
|
18
|
-
from typing import Optional, Union
|
17
|
+
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
19
18
|
|
20
19
|
import numpy as np
|
21
20
|
import requests
|
@@ -363,3 +362,14 @@ def terminate_process(process):
|
|
363
362
|
def print_highlight(html_content: str):
|
364
363
|
html_content = str(html_content).replace("\n", "<br>")
|
365
364
|
display(HTML(f"<strong style='color: #00008B;'>{html_content}</strong>"))
|
365
|
+
|
366
|
+
|
367
|
+
class TypeBasedDispatcher:
|
368
|
+
def __init__(self, mapping: List[Tuple[Type, Callable]]):
|
369
|
+
self._mapping = mapping
|
370
|
+
|
371
|
+
def __call__(self, obj: Any):
|
372
|
+
for ty, fn in self._mapping:
|
373
|
+
if isinstance(obj, ty):
|
374
|
+
return fn(obj)
|
375
|
+
raise ValueError(f"Invalid object: {obj}")
|
sglang/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.4.1.
|
1
|
+
__version__ = "0.4.1.post7"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: sglang
|
3
|
-
Version: 0.4.1.
|
3
|
+
Version: 0.4.1.post7
|
4
4
|
Summary: SGLang is yet another fast serving framework for large language models and vision language models.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -236,13 +236,13 @@ Requires-Dist: pyzmq>=25.1.2; extra == "runtime-common"
|
|
236
236
|
Requires-Dist: torchao>=0.7.0; extra == "runtime-common"
|
237
237
|
Requires-Dist: uvicorn; extra == "runtime-common"
|
238
238
|
Requires-Dist: uvloop; extra == "runtime-common"
|
239
|
-
Requires-Dist: xgrammar>=0.1.
|
239
|
+
Requires-Dist: xgrammar>=0.1.10; extra == "runtime-common"
|
240
240
|
Provides-Extra: srt
|
241
241
|
Requires-Dist: sglang[runtime_common]; extra == "srt"
|
242
242
|
Requires-Dist: cuda-python; extra == "srt"
|
243
|
-
Requires-Dist: sgl-kernel>=0.0.2.
|
243
|
+
Requires-Dist: sgl-kernel>=0.0.2.post14; extra == "srt"
|
244
244
|
Requires-Dist: torch; extra == "srt"
|
245
|
-
Requires-Dist: vllm
|
245
|
+
Requires-Dist: vllm==0.6.4.post1; extra == "srt"
|
246
246
|
Requires-Dist: flashinfer==0.1.6; extra == "srt"
|
247
247
|
Provides-Extra: srt-hip
|
248
248
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
|
@@ -252,6 +252,9 @@ Provides-Extra: srt-xpu
|
|
252
252
|
Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
|
253
253
|
Provides-Extra: srt-hpu
|
254
254
|
Requires-Dist: sglang[runtime_common]; extra == "srt-hpu"
|
255
|
+
Provides-Extra: srt-cpu
|
256
|
+
Requires-Dist: sglang[runtime_common]; extra == "srt-cpu"
|
257
|
+
Requires-Dist: torch; extra == "srt-cpu"
|
255
258
|
Provides-Extra: openai
|
256
259
|
Requires-Dist: openai>=1.0; extra == "openai"
|
257
260
|
Requires-Dist: tiktoken; extra == "openai"
|
@@ -259,6 +262,8 @@ Provides-Extra: anthropic
|
|
259
262
|
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
260
263
|
Provides-Extra: litellm
|
261
264
|
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
265
|
+
Provides-Extra: torch-memory-saver
|
266
|
+
Requires-Dist: torch_memory_saver; extra == "torch-memory-saver"
|
262
267
|
Provides-Extra: test
|
263
268
|
Requires-Dist: jsonlines; extra == "test"
|
264
269
|
Requires-Dist: matplotlib; extra == "test"
|
@@ -286,6 +291,11 @@ Requires-Dist: sglang[srt_hpu]; extra == "all-hpu"
|
|
286
291
|
Requires-Dist: sglang[openai]; extra == "all-hpu"
|
287
292
|
Requires-Dist: sglang[anthropic]; extra == "all-hpu"
|
288
293
|
Requires-Dist: sglang[litellm]; extra == "all-hpu"
|
294
|
+
Provides-Extra: all-cpu
|
295
|
+
Requires-Dist: sglang[srt_cpu]; extra == "all-cpu"
|
296
|
+
Requires-Dist: sglang[openai]; extra == "all-cpu"
|
297
|
+
Requires-Dist: sglang[anthropic]; extra == "all-cpu"
|
298
|
+
Requires-Dist: sglang[litellm]; extra == "all-cpu"
|
289
299
|
Provides-Extra: dev
|
290
300
|
Requires-Dist: sglang[all]; extra == "dev"
|
291
301
|
Requires-Dist: sglang[test]; extra == "dev"
|
@@ -298,6 +308,9 @@ Requires-Dist: sglang[test]; extra == "dev-xpu"
|
|
298
308
|
Provides-Extra: dev-hpu
|
299
309
|
Requires-Dist: sglang[all_hpu]; extra == "dev-hpu"
|
300
310
|
Requires-Dist: sglang[test]; extra == "dev-hpu"
|
311
|
+
Provides-Extra: dev-cpu
|
312
|
+
Requires-Dist: sglang[all_cpu]; extra == "dev-cpu"
|
313
|
+
Requires-Dist: sglang[test]; extra == "dev-cpu"
|
301
314
|
|
302
315
|
<div align="center" id="sglangtop">
|
303
316
|
<img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400" margin="10px"></img>
|
@@ -314,9 +327,9 @@ Requires-Dist: sglang[test]; extra == "dev-hpu"
|
|
314
327
|
--------------------------------------------------------------------------------
|
315
328
|
|
316
329
|
| [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/)
|
317
|
-
| [**Documentation**](https://
|
318
|
-
| [**Join Slack**](https://
|
319
|
-
| [**Join Bi-Weekly Development Meeting**](https://
|
330
|
+
| [**Documentation**](https://docs.sglang.ai/)
|
331
|
+
| [**Join Slack**](https://slack.sglang.ai/)
|
332
|
+
| [**Join Bi-Weekly Development Meeting**](https://meeting.sglang.ai/)
|
320
333
|
| [**Slides**](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#slides) |
|
321
334
|
|
322
335
|
## News
|
@@ -346,11 +359,11 @@ The core features include:
|
|
346
359
|
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
347
360
|
|
348
361
|
## Getting Started
|
349
|
-
- [Install SGLang](https://
|
350
|
-
- [Quick Start](https://
|
351
|
-
- [Backend Tutorial](https://
|
352
|
-
- [Frontend Tutorial](https://
|
353
|
-
- [Contribution Guide](https://
|
362
|
+
- [Install SGLang](https://docs.sglang.ai/start/install.html)
|
363
|
+
- [Quick Start](https://docs.sglang.ai/start/send_request.html)
|
364
|
+
- [Backend Tutorial](https://docs.sglang.ai/backend/openai_api_completions.html)
|
365
|
+
- [Frontend Tutorial](https://docs.sglang.ai/frontend/frontend.html)
|
366
|
+
- [Contribution Guide](https://docs.sglang.ai/references/contribution_guide.html)
|
354
367
|
|
355
368
|
## Benchmark and Performance
|
356
369
|
Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/), [v0.4 blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)
|
@@ -359,7 +372,7 @@ Learn more in the release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
|
|
359
372
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
360
373
|
|
361
374
|
## Adoption and Sponsorship
|
362
|
-
The project is supported by (alphabetically): AMD, Baseten, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
|
375
|
+
The project is supported by (alphabetically): AMD, Baseten, Cursor, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
|
363
376
|
|
364
377
|
## Acknowledgment and Citation
|
365
378
|
We learned the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql). Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
@@ -1,19 +1,18 @@
|
|
1
|
-
sglang/__init__.py,sha256=
|
2
|
-
sglang/api.py,sha256=
|
1
|
+
sglang/__init__.py,sha256=njc4c2IBYklSqVMiT70GL630Uddg5D_IU_6dthApPxc,1587
|
2
|
+
sglang/api.py,sha256=PuJTtrKJ50ddFNOuT22ChCSd7xJISkbi3pnGcbDJ9QQ,6882
|
3
3
|
sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
|
4
|
-
sglang/bench_offline_throughput.py,sha256=
|
5
|
-
sglang/bench_one_batch.py,sha256=
|
6
|
-
sglang/bench_one_batch_server.py,sha256
|
7
|
-
sglang/bench_serving.py,sha256=
|
4
|
+
sglang/bench_offline_throughput.py,sha256=XXBLRJZtOuCs8hO5tRso8s7RUMEKfooGdYgwoFClwm0,12926
|
5
|
+
sglang/bench_one_batch.py,sha256=OrGu64y5xzjpU3tXhjlOR3nzWSpREVs2ZCkjSC18FOo,17699
|
6
|
+
sglang/bench_one_batch_server.py,sha256=iu73SsvYwnuRktYZDz1P6psMiRx8MbEbF5sbsYJdzYg,5962
|
7
|
+
sglang/bench_serving.py,sha256=G3XJt29FfRHKtMw1sI9A1FX_oGsx2-Ehw6KCE7xa7to,54538
|
8
8
|
sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
|
9
9
|
sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
|
10
|
-
sglang/launch_server.py,sha256=
|
11
|
-
sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
|
10
|
+
sglang/launch_server.py,sha256=mDXfwha8LHpWQJekcCosR98QhCQsbmilsBlI5jAIgg0,420
|
12
11
|
sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
|
13
|
-
sglang/utils.py,sha256=
|
14
|
-
sglang/version.py,sha256=
|
12
|
+
sglang/utils.py,sha256=wvLVVC8U2qIhCSCrtzvV3wXapvJweir1XDNdpfoPFRM,11934
|
13
|
+
sglang/version.py,sha256=U15rrN4uKNhqBdlIfapKU6UjYHsYIkv659f2ebUKgKU,28
|
15
14
|
sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
sglang/lang/chat_template.py,sha256=
|
15
|
+
sglang/lang/chat_template.py,sha256=_fh2e2fnyMvVgl2TY50fpAoFb4Dw3OfifxUjC5qDOlA,16236
|
17
16
|
sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
|
18
17
|
sglang/lang/compiler.py,sha256=o1C6G3TzhjSlsH-doTPy5oiVehr57dxNTa5oZw5TTAI,7639
|
19
18
|
sglang/lang/interpreter.py,sha256=r7x5mBxAOaEwmxjaMBMcn7N8HDFv6V6K9eINtffDygQ,33074
|
@@ -24,72 +23,75 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
|
|
24
23
|
sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
|
25
24
|
sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
|
26
25
|
sglang/lang/backend/openai.py,sha256=ha9a2P6T80TmSgYlyIwB1qYawWkjcOgiOptkktkqa1U,15436
|
27
|
-
sglang/lang/backend/runtime_endpoint.py,sha256=
|
26
|
+
sglang/lang/backend/runtime_endpoint.py,sha256=gM97bi8Kv8sLzCDJnH5ZZTQ9I6t31CeVUve7qdTsopo,16755
|
28
27
|
sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
|
29
|
-
sglang/srt/_custom_ops.py,sha256=
|
28
|
+
sglang/srt/_custom_ops.py,sha256=7jL5BTcoS8PmR56y2Qsa3q8emI-tmrJuV4hLTwLVFBE,5040
|
30
29
|
sglang/srt/aio_rwlock.py,sha256=6LYtOdeTUY3hkfa1dmYkgsaF2ttrwIF3hUWz2AZ2fqw,2970
|
31
|
-
sglang/srt/conversation.py,sha256=
|
30
|
+
sglang/srt/conversation.py,sha256=USUoYiJf5DdHz7Ouclu30k3QSxMiem4WgZrA148MpSA,21695
|
32
31
|
sglang/srt/hf_transformers_utils.py,sha256=_24uqCkZ4dvS9Uc5p2cCzX0Q8ShUzrh_Hp6mvg7hxHY,7729
|
33
32
|
sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
|
34
33
|
sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
|
35
|
-
sglang/srt/server.py,sha256=
|
36
|
-
sglang/srt/server_args.py,sha256=
|
37
|
-
sglang/srt/
|
34
|
+
sglang/srt/server.py,sha256=UJjHw17N3YVvhyIe1KPDPVgXr6gwyDj0hyCMtizJuPg,854
|
35
|
+
sglang/srt/server_args.py,sha256=akyBB4uY8m-RlQEzUK6gLj1cxh7wcLO7N4ovDPUZuAo,38990
|
36
|
+
sglang/srt/torch_memory_saver_adapter.py,sha256=--FgbrcvJxTcRe856plD9ktqgrHGPTE18eZCJlE50hY,1255
|
37
|
+
sglang/srt/utils.py,sha256=Mo4Kr1iqoTFCgdnCg-GQA3mXuDpdnzM6OhGOFOTGR28,47142
|
38
38
|
sglang/srt/configs/__init__.py,sha256=Nvwtif0X9IYUtj0aL9XvAo_RRZcxTshsaliwc8djooU,347
|
39
39
|
sglang/srt/configs/chatglm.py,sha256=j-b0YkdYUmQm2y1kNmMJtKeACxWKmBbvNNkDWbs6kbI,2907
|
40
40
|
sglang/srt/configs/dbrx.py,sha256=tdhIkXAQl1yr0MxqFmsDG1E0e2puRTTKm6UTyANBLac,11005
|
41
|
-
sglang/srt/configs/device_config.py,sha256=
|
41
|
+
sglang/srt/configs/device_config.py,sha256=kfmpPOECqYxcRoY-ko0QZRhyiBWUGP2CMF51DMUN5nU,435
|
42
42
|
sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
|
43
43
|
sglang/srt/configs/load_config.py,sha256=TcPi_HY6xu5SiVZsxPOoB5pGeDUNebOk7muoUH9VBDg,3083
|
44
|
-
sglang/srt/configs/model_config.py,sha256=
|
44
|
+
sglang/srt/configs/model_config.py,sha256=sQIOfslBRzhOjucZdd8zE8nO9PEOc7zc6cZMbguQgoY,16876
|
45
45
|
sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
|
46
|
-
sglang/srt/constrained/
|
47
|
-
sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
|
46
|
+
sglang/srt/constrained/base_grammar_backend.py,sha256=JFQFiAZLSqV6vck-ewIEzEEyncWLbRz_gkvkqpC282k,3185
|
48
47
|
sglang/srt/constrained/outlines_backend.py,sha256=CipNHNNXs8xtnJNVNe6FCwZUlSbIXbGmWVlZz3hUpFQ,6820
|
49
48
|
sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
|
50
|
-
sglang/srt/constrained/xgrammar_backend.py,sha256=
|
51
|
-
sglang/srt/distributed/__init__.py,sha256=
|
52
|
-
sglang/srt/distributed/communication_op.py,sha256=
|
53
|
-
sglang/srt/distributed/parallel_state.py,sha256=
|
54
|
-
sglang/srt/distributed/utils.py,sha256=
|
55
|
-
sglang/srt/distributed/device_communicators/
|
56
|
-
sglang/srt/distributed/device_communicators/
|
57
|
-
sglang/srt/distributed/device_communicators/
|
58
|
-
sglang/srt/distributed/device_communicators/
|
59
|
-
sglang/srt/distributed/device_communicators/
|
60
|
-
sglang/srt/distributed/device_communicators/
|
61
|
-
sglang/srt/distributed/device_communicators/
|
62
|
-
sglang/srt/distributed/device_communicators/
|
63
|
-
sglang/srt/
|
64
|
-
sglang/srt/
|
49
|
+
sglang/srt/constrained/xgrammar_backend.py,sha256=l-37tdrPsp7xnxZpY8_0W1DnZSiBAH9e-BcwiAO8b0g,5048
|
50
|
+
sglang/srt/distributed/__init__.py,sha256=jFOcyt-wFAPMBUAf9zkZalNQlt-4rqmT6pCKBz1E4qo,149
|
51
|
+
sglang/srt/distributed/communication_op.py,sha256=IBnFUdMftK_VSTMMMitGveonorFUUVNL4guqO31cMSc,1130
|
52
|
+
sglang/srt/distributed/parallel_state.py,sha256=rTqUtbm6eNNYzlMP8NQC55E842Agtf-g3cGPzqlfbh8,47527
|
53
|
+
sglang/srt/distributed/utils.py,sha256=U-BSaXYjWwnfG8g-tUfBhjKt5Ug097nyHtu3g3aea_Y,8473
|
54
|
+
sglang/srt/distributed/device_communicators/cuda_wrapper.py,sha256=3jvPG-Ow5UBLiXhfx8T8snR7crSZbPpARAggsDPWq7k,7038
|
55
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce.py,sha256=EL4RwwZBnnfigg3MRfc5MFXH4zA17TA6OAwQNxtidEs,15765
|
56
|
+
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py,sha256=q2q1A_Sqvrvkrgf7Tjg5XhXR1JWzzUUPHSicAKK2SjE,11022
|
57
|
+
sglang/srt/distributed/device_communicators/hpu_communicator.py,sha256=gPjEH1-izoby5uDrfUlzNf21luPT0Ow7pJjhCRKnHy8,1728
|
58
|
+
sglang/srt/distributed/device_communicators/pynccl.py,sha256=G-Dut_QJHOUG0j7--ZqapHtvm70Lgl7obtE6ZfgeAiU,10064
|
59
|
+
sglang/srt/distributed/device_communicators/pynccl_wrapper.py,sha256=LblisImY9d6EMz-oPS9J16WHo2Q_SRL1DtlJKK63Hfg,15349
|
60
|
+
sglang/srt/distributed/device_communicators/shm_broadcast.py,sha256=bbruDIM1GgKIdB6gi71_I0mpB179I-qyvwKuSj1Kaic,20816
|
61
|
+
sglang/srt/distributed/device_communicators/xpu_communicator.py,sha256=ajW6132BvA6jkeipEIgN27TFycI0U06Ih2Z8WNjlA4s,1593
|
62
|
+
sglang/srt/entrypoints/engine.py,sha256=pRp35nKjjf4mC7t1ZO3d6jaErfTL2l8nYXf73V2plHo,16702
|
63
|
+
sglang/srt/entrypoints/http_server.py,sha256=XDNPxVuK8EdQWfqqkouNDcsC1dEG5guyFdWxf_xavlY,18714
|
64
|
+
sglang/srt/layers/activation.py,sha256=ckrXVTK60bTPgekE1EYYxflb1XuPSEm6Sidw-z9NuXg,5477
|
65
65
|
sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
|
66
|
+
sglang/srt/layers/dp_attention.py,sha256=ffbupZIhswW0Dl5MW8e6-W4oVmUqAbTnhOVwn6k_8N0,1823
|
66
67
|
sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
|
67
|
-
sglang/srt/layers/linear.py,sha256=
|
68
|
-
sglang/srt/layers/logits_processor.py,sha256=
|
69
|
-
sglang/srt/layers/parameter.py,sha256=
|
68
|
+
sglang/srt/layers/linear.py,sha256=CXi1cA1jZS1bIDd8LIhpKpk2e3zyJG998fl3-P9w5H0,49987
|
69
|
+
sglang/srt/layers/logits_processor.py,sha256=MP3Q09WZS9RgQTp6krOJJa4VxJ2ykXh2KAYolq1SliE,12177
|
70
|
+
sglang/srt/layers/parameter.py,sha256=s3Zhc4ti6tprGxIV9ONyYYTVuXdVnZH4qACpnHUFT0A,14691
|
70
71
|
sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
|
71
|
-
sglang/srt/layers/radix_attention.py,sha256=
|
72
|
-
sglang/srt/layers/rotary_embedding.py,sha256=
|
73
|
-
sglang/srt/layers/sampler.py,sha256=
|
72
|
+
sglang/srt/layers/radix_attention.py,sha256=tPjJA3P9kuFBk2QWFTgOI8UbVUFLVDZgFaQWuokx894,2234
|
73
|
+
sglang/srt/layers/rotary_embedding.py,sha256=vx80KV7txYb73LRzf6wLyhx3YRPlFv07leFwTHGX9Zw,43243
|
74
|
+
sglang/srt/layers/sampler.py,sha256=MeZ28CLtuSRyAZQK7Wt1IHUUS1NV5r2fcvqvy9AgHUI,8553
|
74
75
|
sglang/srt/layers/torchao_utils.py,sha256=8c2vzt106iP_QKbJtfN1GuABW8nCuP5dElQLUeci6qg,3934
|
75
|
-
sglang/srt/layers/vocab_parallel_embedding.py,sha256=
|
76
|
+
sglang/srt/layers/vocab_parallel_embedding.py,sha256=txcjkuSDa6gZwESKj8X-HSLhAnMmDXL0FmFWY9SKqik,22155
|
76
77
|
sglang/srt/layers/attention/__init__.py,sha256=KlQ0fl-o9v_NxBDhNZ4dPW2uQ2HeJjLm-0MTMWgaa28,2980
|
77
78
|
sglang/srt/layers/attention/double_sparsity_backend.py,sha256=QEDF8tQKMkh-nbt4jHKHZhhgHuV0Fla_BPzzoo9JfT4,9231
|
78
|
-
sglang/srt/layers/attention/flashinfer_backend.py,sha256=
|
79
|
+
sglang/srt/layers/attention/flashinfer_backend.py,sha256=XUyR97-WSyE6esq4r4XOcvXRtEJm8JOZ6MrXE-YfsYM,33949
|
79
80
|
sglang/srt/layers/attention/torch_native_backend.py,sha256=KrcAqTLVZLtwgOmB0xhwUUsX32M-5LYZpNxaRNT4VuA,9252
|
80
|
-
sglang/srt/layers/attention/triton_backend.py,sha256=
|
81
|
+
sglang/srt/layers/attention/triton_backend.py,sha256=P329qd6i7XfgB2UH7KXNid67v-kziV1sgcAuh3RWna8,6654
|
82
|
+
sglang/srt/layers/attention/vision.py,sha256=mn8fruFob-Cif0_6V5P6W-2lCqJOiIsvsmYtH-bEvcU,6643
|
81
83
|
sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=ltWcZ00ugpglSYvszpGb-UCpGIixdG25cWtSrOOOMik,17943
|
82
84
|
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
|
83
85
|
sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
|
84
86
|
sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
|
85
|
-
sglang/srt/layers/moe/fused_moe_native.py,sha256=
|
86
|
-
sglang/srt/layers/moe/topk.py,sha256=
|
87
|
+
sglang/srt/layers/moe/fused_moe_native.py,sha256=87YD7KhITKVy13WsQAuMRsYV0UlvJgfE085MX9bO7nQ,3849
|
88
|
+
sglang/srt/layers/moe/topk.py,sha256=qcWDUVvEV6TIO_idymStylkpPp6dMk-wbYj2Zq4ZYJ0,7057
|
87
89
|
sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
88
90
|
sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
|
89
|
-
sglang/srt/layers/moe/ep_moe/layer.py,sha256=
|
91
|
+
sglang/srt/layers/moe/ep_moe/layer.py,sha256=13nATwnOfCJKvnIUPcI7QFEDAblTg7vlWu9tNMwSlVY,22300
|
90
92
|
sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
|
91
|
-
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=
|
92
|
-
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=
|
93
|
+
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=yIi04FbQFBj01_pl02riB4mPfQ82zSowOgkQv3GMJCM,36774
|
94
|
+
sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=feYhe2V6mu6J7bqG3AYr3fdFrxdzJhGBbN99MRlheZA,22486
|
93
95
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
94
96
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JJN0hryyLr5Zv3dSS7C8cPFhAwTT6XxUVnBGMZvV6JA,2752
|
95
97
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ouRyZ5PEMPP2njPftCNhs-1g1y6wueWLmhI7G1SjV1k,4131
|
@@ -181,12 +183,14 @@ sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=LwEoCt1lUc0uvCvRhBAy6Gkx1
|
|
181
183
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=aMP7oZmh8BZnPOrl0MFibcdhTn3VmOSjqoKoK2rMSbU,4323
|
182
184
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
183
185
|
"sglang/srt/layers/moe/fused_moe_triton/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Uz5X80VcNBOaxshwVNUEittHk2zqB4HQCfTJ4TPG5aM,3274
|
184
|
-
sglang/srt/layers/quantization/__init__.py,sha256=
|
186
|
+
sglang/srt/layers/quantization/__init__.py,sha256=_Sba1KQnmZNKGDKM1MfBs2T3uDqOHfeW6IHO2mTUvfs,4471
|
185
187
|
sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
|
186
|
-
sglang/srt/layers/quantization/fp8.py,sha256=
|
188
|
+
sglang/srt/layers/quantization/fp8.py,sha256=_dRakGjHn_ou8ZHNL6bIr4-g24w1x_TpLqywtLG6xaw,34784
|
187
189
|
sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
|
188
|
-
sglang/srt/layers/quantization/fp8_utils.py,sha256=
|
189
|
-
sglang/srt/layers/quantization/
|
190
|
+
sglang/srt/layers/quantization/fp8_utils.py,sha256=7v-RNwuYXa-gPO3msRDB0Z3uajOQMYd2Cj0NMoq1hg4,4148
|
191
|
+
sglang/srt/layers/quantization/int8_kernel.py,sha256=t_BLVf8XjOyn7S3Lu3B4hXvw8DvTg4Anco7TNadL58U,1436
|
192
|
+
sglang/srt/layers/quantization/modelopt_quant.py,sha256=_VdVz77dTP-IczPeFrdH6Ttro2D26BZvMlZkCKWj_5o,6200
|
193
|
+
sglang/srt/layers/quantization/w8a8_int8.py,sha256=PBapNo27rkUdX95E0ihUZeHTXP2Id3CwlNlMU8GIuZc,3344
|
190
194
|
"sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
191
195
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
192
196
|
"sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
@@ -225,96 +229,101 @@ sglang/srt/layers/quantization/modelopt_quant.py,sha256=07WU6ej0nvAvmZdySwo8l4TH
|
|
225
229
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
|
226
230
|
"sglang/srt/layers/quantization/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
|
227
231
|
"sglang/srt/layers/quantization/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
228
|
-
sglang/srt/lora/lora.py,sha256
|
232
|
+
sglang/srt/lora/lora.py,sha256=xS0YCrlEQb_LlU85TllesoMw0Td88voMU6DSZ9w80cs,14845
|
229
233
|
sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
|
230
234
|
sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
|
231
235
|
sglang/srt/managers/cache_controller.py,sha256=DXnIunJgtTws1WF2vZOYVQe56vacV7Mn4wL9zoG8Xz8,10909
|
232
|
-
sglang/srt/managers/
|
233
|
-
sglang/srt/managers/
|
234
|
-
sglang/srt/managers/
|
235
|
-
sglang/srt/managers/
|
236
|
-
sglang/srt/managers/
|
237
|
-
sglang/srt/managers/
|
238
|
-
sglang/srt/managers/
|
239
|
-
sglang/srt/managers/
|
240
|
-
sglang/srt/managers/
|
241
|
-
sglang/srt/managers/
|
242
|
-
sglang/srt/managers/
|
236
|
+
sglang/srt/managers/configure_logging.py,sha256=aY9xExurz7t_IdItd-9GuVuM7kEGB8_bRryhZxKdu9o,1542
|
237
|
+
sglang/srt/managers/data_parallel_controller.py,sha256=b64aC6iLr5RolJyNQnT-yTQ_TSI9DDLtuABf_TPTUrM,9421
|
238
|
+
sglang/srt/managers/detokenizer_manager.py,sha256=OuaynGaVk9rqCRHl9qrAkGc03FEy0lWOmdpexprNWuM,9518
|
239
|
+
sglang/srt/managers/image_processor.py,sha256=dEjEWzrmJyEXhr5sKBw4BEUEjla8CNdkzFGfogPGmFY,19103
|
240
|
+
sglang/srt/managers/io_struct.py,sha256=m60xntysIGTjj6DCmsmI4NA4IUQY26UH1okHHpDVdQ0,17273
|
241
|
+
sglang/srt/managers/schedule_batch.py,sha256=wj_blBaSLAVIjnj4gE2eO8bVSgimXsxkf-qWtW1tJK0,48282
|
242
|
+
sglang/srt/managers/schedule_policy.py,sha256=Qero_lwPEb7bM87qjWtYijGyRhtY0mMwjWP6SbjvaUE,18260
|
243
|
+
sglang/srt/managers/scheduler.py,sha256=3ID9i4xluP_g07vhjt9g2qQu5h3GXJEaDG92OtVxY1U,69101
|
244
|
+
sglang/srt/managers/session_controller.py,sha256=WXRbtninVEVM0rQYiXFzOwsDph0TNj1L2sRCWQF0dSg,5571
|
245
|
+
sglang/srt/managers/tokenizer_manager.py,sha256=AuX2uMXdXQ-h_i9M_bFB_u_IsdnFPCgPjj97o19ivOw,38539
|
246
|
+
sglang/srt/managers/tp_worker.py,sha256=OiHpFR9Hy1GpgLEkTDsykBiFuv1VKmkjQS58gQVPQIs,8126
|
247
|
+
sglang/srt/managers/tp_worker_overlap_thread.py,sha256=7p6zREndc4a9fmYfqW4iY9IYANxdoAioaf0hU92-8Ow,8893
|
248
|
+
sglang/srt/managers/utils.py,sha256=5i75uLlQOF_5CaT02CrWtwozMTtwTg2_nLP8Dtr-JZQ,1536
|
243
249
|
sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
|
244
250
|
sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
|
245
251
|
sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
|
246
|
-
sglang/srt/mem_cache/memory_pool.py,sha256=
|
252
|
+
sglang/srt/mem_cache/memory_pool.py,sha256=9ud97u1cXnN6O0qlR8tv8woN_20gqisTV6aBgHqhinc,19682
|
247
253
|
sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fWWJHcq4,10945
|
248
|
-
sglang/srt/metrics/collector.py,sha256=
|
254
|
+
sglang/srt/metrics/collector.py,sha256=_yl0_paSARxS1ypZgd-pLJ29tMizolHuwROX21dOXTk,7326
|
249
255
|
sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
|
250
|
-
sglang/srt/model_executor/cuda_graph_runner.py,sha256=
|
256
|
+
sglang/srt/model_executor/cuda_graph_runner.py,sha256=riMehP1dsKUQBb4xOCQmR22gBkoVsPdWknaG7KmmGy0,18461
|
251
257
|
sglang/srt/model_executor/forward_batch_info.py,sha256=Vu6qlbfm6dMUfvGaSmmLIroi8hBqfDpNVLxl7oECzIs,15001
|
252
|
-
sglang/srt/model_executor/model_runner.py,sha256=
|
258
|
+
sglang/srt/model_executor/model_runner.py,sha256=6EVc4YbBCVqEKG5QajGfU8yrezCNtJ_e6XeTa-2vU8s,32899
|
253
259
|
sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
|
254
|
-
sglang/srt/model_loader/loader.py,sha256=
|
260
|
+
sglang/srt/model_loader/loader.py,sha256=LGss7TwRdn_iqKEtxUzL64Crkb0pHCK2Eg-N9knZ3JQ,43896
|
255
261
|
sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
|
256
|
-
sglang/srt/model_loader/weight_utils.py,sha256=
|
257
|
-
sglang/srt/models/baichuan.py,sha256=
|
258
|
-
sglang/srt/models/chatglm.py,sha256=
|
259
|
-
sglang/srt/models/commandr.py,sha256=
|
260
|
-
sglang/srt/models/dbrx.py,sha256=
|
261
|
-
sglang/srt/models/deepseek.py,sha256=
|
262
|
-
sglang/srt/models/deepseek_v2.py,sha256=
|
263
|
-
sglang/srt/models/exaone.py,sha256=
|
264
|
-
sglang/srt/models/gemma.py,sha256=
|
265
|
-
sglang/srt/models/gemma2.py,sha256
|
262
|
+
sglang/srt/model_loader/weight_utils.py,sha256=uoWVerKIcycsWhq8yZO6PlZxDuHaR_MlgbdiO1q-1wk,25957
|
263
|
+
sglang/srt/models/baichuan.py,sha256=I7o7DPny8sWG7dFtr0GTovXFwvncEUleaCVcNNiYnko,14907
|
264
|
+
sglang/srt/models/chatglm.py,sha256=n8uZpx9iHw6V-XCns9mtTf99Iqh35ZjPC5bFDYtkoes,12897
|
265
|
+
sglang/srt/models/commandr.py,sha256=oevnEqmttuC4ANXkn_9z4_9yrD7wtscLFklNCW_hf_k,14220
|
266
|
+
sglang/srt/models/dbrx.py,sha256=qVccAJmNKrdqPbisqPbg45QZOU7IQrIhb6AeOjV9slw,14564
|
267
|
+
sglang/srt/models/deepseek.py,sha256=KfcQ54BqlS73XQmtcG0sfnmm3VXOGwUIkd34WS6Gp0Y,15694
|
268
|
+
sglang/srt/models/deepseek_v2.py,sha256=7-HwkdEcrluFOK2hYKWXtMSSD130FiiBQkbX_ldzj_c,38511
|
269
|
+
sglang/srt/models/exaone.py,sha256=Wvr6XofnH2feJ-TzAm5aD1YTyfcum6JdnKMG1S7Xy4g,13035
|
270
|
+
sglang/srt/models/gemma.py,sha256=4Jvt9F-BNhPFiBi5H8aPqcYqKeJLI9KZKy2WpR96RpM,12123
|
271
|
+
sglang/srt/models/gemma2.py,sha256=pHGvr1PPOWgT8JO-eE8OOFGArU8Ka3eoHAxUXICpgVA,15722
|
266
272
|
sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb-_Hq8,2494
|
267
|
-
sglang/srt/models/gpt2.py,sha256=
|
268
|
-
sglang/srt/models/gpt_bigcode.py,sha256=
|
269
|
-
sglang/srt/models/granite.py,sha256=
|
270
|
-
sglang/srt/models/grok.py,sha256=
|
271
|
-
sglang/srt/models/internlm2.py,sha256=
|
273
|
+
sglang/srt/models/gpt2.py,sha256=chg-5MfAl_gRYXMVrnKkWxY6zA09LEEEIdnWnnZn7N4,9367
|
274
|
+
sglang/srt/models/gpt_bigcode.py,sha256=DH8p76sPPhsxmNz2Dh5Vi5xQmLP_gEZDilIOTlHsEuw,9534
|
275
|
+
sglang/srt/models/granite.py,sha256=3HqQXJlfoKd11w1NCpTYmiPO9HlkA1jJqoAmuTzHuU0,20406
|
276
|
+
sglang/srt/models/grok.py,sha256=NGFAYfpk3LHRfORZd8GmqsHg9L5y-p3YlmarP0X9BzI,17999
|
277
|
+
sglang/srt/models/internlm2.py,sha256=INGGwSCYKoZRAokXJC78RKKde2fgHn9P4JG-N37Pfn0,12124
|
272
278
|
sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
|
273
|
-
sglang/srt/models/llama.py,sha256
|
279
|
+
sglang/srt/models/llama.py,sha256=YKtLpxgk_OmPRpBJSr1BCTWC6IILFzZtakKnWYYHKw0,22040
|
274
280
|
sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
|
275
281
|
sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
|
276
282
|
sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
|
277
283
|
sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
|
278
284
|
sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
|
279
285
|
sglang/srt/models/llavavid.py,sha256=dYUkKfHoE15vF_VXA_s_ICCTUMSmSgvP181fk8dUi0g,12185
|
280
|
-
sglang/srt/models/minicpm.py,sha256=
|
281
|
-
sglang/srt/models/minicpm3.py,sha256=
|
286
|
+
sglang/srt/models/minicpm.py,sha256=hVWri0-3sAiuGOMcIhGL2GphQZ13qBcLXuLTsQVALGY,13720
|
287
|
+
sglang/srt/models/minicpm3.py,sha256=r48jFCi2SXycZgcrlHiuIRYsV3xITJrax913V2yYxw0,24695
|
288
|
+
sglang/srt/models/minicpmv.py,sha256=lgWqj1bWMDvPHPE5POVEjhnY-_qMSidkbsBLMYBtDgM,43181
|
282
289
|
sglang/srt/models/mistral.py,sha256=EYifJUUzN2Z2-iL37eJiNZF_DB0H4pa0mKlgYRIxM70,838
|
283
|
-
sglang/srt/models/mixtral.py,sha256=
|
284
|
-
sglang/srt/models/mixtral_quant.py,sha256=
|
285
|
-
sglang/srt/models/mllama.py,sha256=
|
286
|
-
sglang/srt/models/olmo.py,sha256
|
287
|
-
sglang/srt/models/olmo2.py,sha256=
|
288
|
-
sglang/srt/models/olmoe.py,sha256=
|
289
|
-
sglang/srt/models/phi3_small.py,sha256=
|
290
|
-
sglang/srt/models/qwen.py,sha256=
|
291
|
-
sglang/srt/models/qwen2.py,sha256=
|
292
|
-
sglang/srt/models/
|
293
|
-
sglang/srt/models/
|
290
|
+
sglang/srt/models/mixtral.py,sha256=ybArp6vx7VTrjQ3kqH1FHJ1gQzsFPI5vv1C-Pnix6ws,14520
|
291
|
+
sglang/srt/models/mixtral_quant.py,sha256=_gy4gKwFX6BNlU6xE-n0N3vVNhftxgZjWEDKTCKV_2M,14019
|
292
|
+
sglang/srt/models/mllama.py,sha256=vK80nRlSH7hFjBeqVRGVJ4XepfxkpY4HJPpvZ15CzMA,37751
|
293
|
+
sglang/srt/models/olmo.py,sha256=-t5s3DI-CxiMqRAvKS73NTMNrRpQRD8eh2VabCNYDnE,11699
|
294
|
+
sglang/srt/models/olmo2.py,sha256=Wg4mo53c3OIAWmAMZ-TR9VRzSfKqhBZixqvrF8AbIJg,13430
|
295
|
+
sglang/srt/models/olmoe.py,sha256=luqgdyCYJTFyhaRfZElWSFV17ee6FjfU0CpemMmsTS8,15147
|
296
|
+
sglang/srt/models/phi3_small.py,sha256=jVKH2twKfELtqyjMWjH8CnyXlCKEkYtiUUnx18k9OLQ,14799
|
297
|
+
sglang/srt/models/qwen.py,sha256=dg_sVrh7I58Q_LevvO2d5dFZi1T19V2czNh8-9nPUaE,9901
|
298
|
+
sglang/srt/models/qwen2.py,sha256=caVvsTu0QteR9Q65p81JEgESSV9_nP3yPaShRMu6uDY,14936
|
299
|
+
sglang/srt/models/qwen2_eagle.py,sha256=KTtejEezdLfd_odg3Na1i5kBk7W-YFg9hImfWyrMgVc,4288
|
300
|
+
sglang/srt/models/qwen2_moe.py,sha256=GWi5nuaQWifPmyC3ld2G1wZJS5Xva6-1yjCUrNcGhkY,16539
|
301
|
+
sglang/srt/models/qwen2_vl.py,sha256=r0OmFH8OcsIZ96fKqXaAWGLUe6oTVW_w6Gt5PChYUXE,23139
|
294
302
|
sglang/srt/models/registry.py,sha256=inKh9iwOp3LFYm3nqujg-OtABClOP-ifc1stA9cZegA,3434
|
295
|
-
sglang/srt/models/stablelm.py,sha256=
|
296
|
-
sglang/srt/models/torch_native_llama.py,sha256=
|
297
|
-
sglang/srt/models/xverse.py,sha256=
|
298
|
-
sglang/srt/models/xverse_moe.py,sha256=
|
303
|
+
sglang/srt/models/stablelm.py,sha256=dO6EwFFiBWn-8yxV9tb3OtjNe9D0dF57Z298g7SmrhU,11308
|
304
|
+
sglang/srt/models/torch_native_llama.py,sha256=EIoG22kJtk6qAZxzpe2efB4diyohOhDFlq4RKj7Wbbs,18755
|
305
|
+
sglang/srt/models/xverse.py,sha256=sYSSbwB_VC6uGzxkzNHluaJzvSfQXCxQG_OsrIWLWvU,13549
|
306
|
+
sglang/srt/models/xverse_moe.py,sha256=vN486GkRHvgyRgSW2e_zTOQHDkWx86lthahtKxl6M10,15511
|
299
307
|
sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
|
300
308
|
sglang/srt/openai_api/adapter.py,sha256=Yv-rEA0Jd54iFlnkVy-OZM4EnPqkW_NLtDPGCiPWVWo,56386
|
301
|
-
sglang/srt/openai_api/protocol.py,sha256=
|
302
|
-
sglang/srt/sampling/
|
303
|
-
sglang/srt/sampling/
|
309
|
+
sglang/srt/openai_api/protocol.py,sha256=_vSaNQVueM3FZJcFZgbvwQPeFA_nN5lcW3NBJdYj_C0,11697
|
310
|
+
sglang/srt/sampling/custom_logit_processor.py,sha256=tDvoLgLqn-sy1qcY6vSrpbnHCeqbdk0uhMOO-uy4p4E,1099
|
311
|
+
sglang/srt/sampling/sampling_batch_info.py,sha256=4H9y4qoWsyxg4B99DwD1avJNudgzsgPkA0pj5YoDKXk,15426
|
312
|
+
sglang/srt/sampling/sampling_params.py,sha256=NCw0zLAnu8u_vQ8QD1RbEw1F9gc7sDWnBCrXn2DEp6o,5788
|
304
313
|
sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
|
305
314
|
sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
|
306
315
|
sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
|
307
316
|
sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
|
308
317
|
sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
|
309
|
-
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=
|
318
|
+
sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=vmE5muVz_ztRA6glgYOiQnKas_zTvQZ3nxcUEQao-L8,3070
|
310
319
|
sglang/srt/speculative/build_eagle_tree.py,sha256=SIKuOFUOIzMLyanL5vViPmFBEiUHm_ezwiGuIyLmauE,9886
|
311
320
|
sglang/srt/speculative/eagle_utils.py,sha256=Z51xGuvn-ZIMp0OXENZUhpDOz8kTDkujhHZA-Z2MKbA,23422
|
312
|
-
sglang/srt/speculative/eagle_worker.py,sha256=
|
321
|
+
sglang/srt/speculative/eagle_worker.py,sha256=P__BMJ0eKLaPzCS8jEWylk2POstue5u3RIVZeFtj84I,7843
|
313
322
|
sglang/srt/speculative/spec_info.py,sha256=D7A27UU1iOwIBEjXTgAxZ7jdftbTiVlMCvK8GmYr2zg,488
|
314
323
|
sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
|
315
324
|
sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
|
316
325
|
sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
|
317
|
-
sglang/test/runners.py,sha256=
|
326
|
+
sglang/test/runners.py,sha256=4TaXHFrpOCzXUNI-D8IEFWrPdA-EsitLiKi3_pBwKTo,14940
|
318
327
|
sglang/test/simple_eval_common.py,sha256=joqrGysuLnJFtzDRIgFkMsRyKUSyjVPFWp0_PHAL3Ik,12378
|
319
328
|
sglang/test/simple_eval_gpqa.py,sha256=8Xt9Bw05c7SZTYrCZgB68OZUqUbLo69ywiyx0bTvSUk,3220
|
320
329
|
sglang/test/simple_eval_humaneval.py,sha256=zmV3xWYc2OrpiT9Dy55RTKZL5DEROD1cJ0NA_-cU5zI,5685
|
@@ -324,11 +333,11 @@ sglang/test/simple_eval_mmlu.py,sha256=FkwamjGMjueTixymkedF-YiPloSLiy4ftILFUrKZ9
|
|
324
333
|
sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxoE,1442
|
325
334
|
sglang/test/test_block_fp8.py,sha256=rhrIun8aW5zq2qvuGRlo7F7aZ_upjVxtQMVlyc2Th_E,11771
|
326
335
|
sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
|
327
|
-
sglang/test/test_programs.py,sha256=
|
328
|
-
sglang/test/test_utils.py,sha256=
|
336
|
+
sglang/test/test_programs.py,sha256=b8FMo_5gz9T4wPyUe8SmcE1sCISYlo_NvBHFxzUcF5I,18835
|
337
|
+
sglang/test/test_utils.py,sha256=fQykvQgdt-m9YFVnyX0A52f_fbPcuqweqYr7CJnI-t0,24694
|
329
338
|
sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
|
330
|
-
sglang-0.4.1.
|
331
|
-
sglang-0.4.1.
|
332
|
-
sglang-0.4.1.
|
333
|
-
sglang-0.4.1.
|
334
|
-
sglang-0.4.1.
|
339
|
+
sglang-0.4.1.post7.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
|
340
|
+
sglang-0.4.1.post7.dist-info/METADATA,sha256=sqwbzcMMlW-UrNVOnYIUexx9SFkg0QU8DdZl96CV0Ts,22997
|
341
|
+
sglang-0.4.1.post7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
342
|
+
sglang-0.4.1.post7.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
|
343
|
+
sglang-0.4.1.post7.dist-info/RECORD,,
|
sglang/launch_server_llavavid.py
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
"""Launch the inference server for Llava-video model."""
|
2
|
-
|
3
|
-
import json
|
4
|
-
import sys
|
5
|
-
|
6
|
-
from sglang.srt.server import launch_server, prepare_server_args
|
7
|
-
|
8
|
-
if __name__ == "__main__":
|
9
|
-
server_args = prepare_server_args(sys.argv[1:])
|
10
|
-
|
11
|
-
model_override_args = {}
|
12
|
-
model_override_args["mm_spatial_pool_stride"] = 2
|
13
|
-
model_override_args["architectures"] = ["LlavaVidForCausalLM"]
|
14
|
-
model_override_args["num_frames"] = 16
|
15
|
-
model_override_args["model_type"] = "llavavid"
|
16
|
-
if model_override_args["num_frames"] == 32:
|
17
|
-
model_override_args["rope_scaling"] = {"factor": 2.0, "rope_type": "linear"}
|
18
|
-
model_override_args["max_sequence_length"] = 4096 * 2
|
19
|
-
model_override_args["tokenizer_model_max_length"] = 4096 * 2
|
20
|
-
model_override_args["model_max_length"] = 4096 * 2
|
21
|
-
if "34b" in server_args.model_path.lower():
|
22
|
-
model_override_args["image_token_index"] = 64002
|
23
|
-
server_args.json_model_override_args = json.dumps(model_override_args)
|
24
|
-
|
25
|
-
launch_server(server_args)
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# Copyright 2023-2024 SGLang Team
|
2
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
|
-
# you may not use this file except in compliance with the License.
|
4
|
-
# You may obtain a copy of the License at
|
5
|
-
#
|
6
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
-
#
|
8
|
-
# Unless required by applicable law or agreed to in writing, software
|
9
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
10
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
11
|
-
# See the License for the specific language governing permissions and
|
12
|
-
# limitations under the License.
|
13
|
-
# ==============================================================================
|
14
|
-
|
15
|
-
# TODO(lmzheng): make this an optional dependency
|
16
|
-
from sglang.srt.constrained.outlines_backend import build_regex_from_object
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|